Skip to main content

formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod effects;
7pub mod eval;
8pub mod eval_delta;
9pub mod graph;
10pub mod ingest;
11pub mod ingest_builder;
12pub mod journal;
13pub mod plan;
14pub mod range_view;
15pub mod row_visibility;
16pub mod scheduler;
17pub mod spill;
18pub mod vertex;
19pub mod virtual_deps;
20
21// New SoA modules
22pub mod csr_edges;
23pub mod debug_views;
24pub mod delta_edges;
25pub mod interval_tree;
26pub mod named_range;
27pub mod sheet_index;
28pub mod sheet_registry;
29pub mod topo;
30pub mod vertex_store;
31
32// Phase 1: Arena modules
33pub mod arena;
34
35// Phase 1: Warmup configuration (kept for compatibility)
36pub mod tuning;
37
38#[cfg(test)]
39mod tests;
40
41pub use eval::{Engine, EngineAction, EvalResult, RecalcPlan, VirtualDepTelemetry};
42pub use eval_delta::{DeltaMode, EvalDelta};
43pub use journal::{ActionJournal, ArrowOp, ArrowUndoBatch, GraphUndoBatch};
44// Use SoA implementation
45pub use graph::snapshot::VertexSnapshot;
46pub use graph::{
47    ChangeEvent, DependencyGraph, DependencyRef, OperationSummary, StripeKey, StripeType,
48    block_index,
49};
50pub use row_visibility::{RowVisibilitySource, VisibilityMaskMode};
51pub use scheduler::{Layer, Schedule, Scheduler};
52pub use vertex::{VertexId, VertexKind};
53
54pub use graph::editor::{
55    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
56    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
57};
58
59pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
60
61// CalcObserver is defined below
62
63use crate::timezone::TimeZoneSpec;
64use crate::traits::EvaluationContext;
65use crate::traits::VolatileLevel;
66use chrono::{DateTime, Utc};
67use formualizer_common::error::{ExcelError, ExcelErrorKind};
68use std::collections::HashMap;
69
70impl<R: EvaluationContext> Engine<R> {
71    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
72        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
73    }
74}
75
76/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
77pub trait CalcObserver: Send + Sync {
78    fn on_eval_start(&self, vertex_id: VertexId);
79    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
80    fn on_cycle_detected(&self, cycle: &[VertexId]);
81    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
82}
83
84/// Default no-op observer
85impl CalcObserver for () {
86    fn on_eval_start(&self, _vertex_id: VertexId) {}
87    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
88    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
89    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
90}
91
92/// Deterministic evaluation configuration.
93///
94/// When enabled, volatile sources (clock/timezone) are derived solely from this config.
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum DeterministicMode {
97    /// Non-deterministic: uses the system clock.
98    Disabled {
99        /// Timezone used by volatile date/time builtins.
100        timezone: TimeZoneSpec,
101    },
102    /// Deterministic: uses a fixed timestamp in the provided timezone.
103    Enabled {
104        /// Fixed timestamp expressed in UTC.
105        timestamp_utc: DateTime<Utc>,
106        /// Timezone used to interpret `timestamp_utc` for NOW()/TODAY().
107        timezone: TimeZoneSpec,
108    },
109}
110
111impl Default for DeterministicMode {
112    fn default() -> Self {
113        Self::Disabled {
114            timezone: TimeZoneSpec::default(),
115        }
116    }
117}
118
119impl DeterministicMode {
120    pub fn is_enabled(&self) -> bool {
121        matches!(self, DeterministicMode::Enabled { .. })
122    }
123
124    pub fn timezone(&self) -> &TimeZoneSpec {
125        match self {
126            DeterministicMode::Disabled { timezone } => timezone,
127            DeterministicMode::Enabled { timezone, .. } => timezone,
128        }
129    }
130
131    pub fn validate(&self) -> Result<(), ExcelError> {
132        if let DeterministicMode::Enabled { timezone, .. } = self {
133            timezone
134                .validate_for_determinism()
135                .map_err(|msg| ExcelError::new(ExcelErrorKind::Value).with_message(msg))?;
136        }
137        Ok(())
138    }
139
140    pub fn build_clock(
141        &self,
142    ) -> Result<std::sync::Arc<dyn crate::timezone::ClockProvider>, ExcelError> {
143        self.validate()?;
144        Ok(match self {
145            DeterministicMode::Disabled { timezone } => {
146                std::sync::Arc::new(crate::timezone::SystemClock::new(timezone.clone()))
147            }
148            DeterministicMode::Enabled {
149                timestamp_utc,
150                timezone,
151            } => std::sync::Arc::new(crate::timezone::FixedClock::new(
152                *timestamp_utc,
153                timezone.clone(),
154            )),
155        })
156    }
157}
158
159/// Policy for handling malformed formulas encountered during workbook ingest.
160#[derive(Debug, Clone, Copy, PartialEq, Eq)]
161pub enum FormulaParsePolicy {
162    /// Reject malformed formulas and fail the load/evaluation path.
163    Strict,
164    /// Convert malformed formulas into literal error formulas (`#ERROR!`).
165    CoerceToError,
166    /// Keep the backend-provided cached value and drop the formula.
167    KeepCachedValue,
168    /// Treat the original formula text as a plain text literal.
169    AsText,
170}
171
172/// Captured diagnostic for a malformed formula encountered during ingest/graph-build.
173#[derive(Debug, Clone, PartialEq, Eq)]
174pub struct FormulaParseDiagnostic {
175    pub sheet: String,
176    pub row: u32,
177    pub col: u32,
178    pub formula: String,
179    pub message: String,
180    pub policy: FormulaParsePolicy,
181}
182
183/// Configuration for the evaluation engine
184#[derive(Debug, Clone)]
185pub struct EvalConfig {
186    pub enable_parallel: bool,
187    pub max_threads: Option<usize>,
188    // 🔮 Scalability Hook: Resource limits (future-proofing)
189    pub max_vertices: Option<usize>,
190    pub max_eval_time: Option<std::time::Duration>,
191    pub max_memory_mb: Option<usize>,
192
193    /// Default sheet name used when no sheet is provided.
194    pub default_sheet_name: String,
195
196    /// When false, resolve defined names case-insensitively (ASCII only).
197    ///
198    /// This matches Excel behavior for defined names.
199    pub case_sensitive_names: bool,
200
201    /// When false, resolve table names case-insensitively (ASCII only).
202    ///
203    /// This matches Excel behavior for native table (ListObject) names.
204    pub case_sensitive_tables: bool,
205
206    /// Stable workbook seed used for deterministic RNG composition
207    pub workbook_seed: u64,
208
209    /// Volatile granularity for RNG seeding and re-evaluation policy
210    pub volatile_level: VolatileLevel,
211
212    /// Deterministic evaluation configuration (clock/timezone injection).
213    pub deterministic_mode: DeterministicMode,
214
215    // Range handling configuration (Phase 5)
216    /// Ranges with size <= this limit are expanded into individual Cell dependencies
217    pub range_expansion_limit: usize,
218
219    /// Fallback maximum row bound for open-ended references (e.g. `A:A`, `A1:A`).
220    ///
221    /// This is only used when used-bounds cannot be determined.
222    pub max_open_ended_rows: u32,
223
224    /// Fallback maximum column bound for open-ended references (e.g. `1:1`, `A1:1`).
225    ///
226    /// This is only used when used-bounds cannot be determined.
227    pub max_open_ended_cols: u32,
228
229    /// Height of stripe blocks for dense range indexing
230    pub stripe_height: u32,
231    /// Width of stripe blocks for dense range indexing  
232    pub stripe_width: u32,
233    /// Enable block stripes for dense ranges (vs row/column stripes only)
234    pub enable_block_stripes: bool,
235
236    /// Spill behavior configuration (conflicts, bounds, buffering)
237    pub spill: SpillConfig,
238
239    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
240    pub use_dynamic_topo: bool,
241    /// Maximum nodes to visit before falling back to full rebuild
242    pub pk_visit_budget: usize,
243    /// Operations between periodic rank compaction
244    pub pk_compaction_interval_ops: u64,
245    /// Maximum width for parallel evaluation layers
246    pub max_layer_width: Option<usize>,
247    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
248    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
249    pub pk_reject_cycle_edges: bool,
250    /// Sheet index build strategy for bulk loads
251    pub sheet_index_mode: SheetIndexMode,
252
253    /// Warmup configuration for global pass planning (Phase 1)
254    pub warmup: tuning::WarmupConfig,
255
256    /// Enable Arrow-backed storage reads (Phase A)
257    pub arrow_storage_enabled: bool,
258    /// Enable delta overlay for Arrow sheets (Phase C)
259    pub delta_overlay_enabled: bool,
260
261    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
262    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
263    pub write_formula_overlay_enabled: bool,
264
265    /// Optional memory budget (in bytes) for formula/spill computed Arrow overlays.
266    ///
267    /// When set, the engine will compact computed overlays into base lanes when the
268    /// estimated usage exceeds this cap.
269    pub max_overlay_memory_bytes: Option<usize>,
270
271    /// Workbook date system: Excel 1900 (default) or 1904.
272    pub date_system: DateSystem,
273
274    /// Policy for malformed formulas encountered during ingest/graph-build.
275    pub formula_parse_policy: FormulaParsePolicy,
276
277    /// Defer dependency graph building: ingest values immediately but stage formulas
278    /// for on-demand graph construction during evaluation.
279    pub defer_graph_building: bool,
280
281    /// Enable virtual dependency convergence telemetry collection.
282    ///
283    /// When disabled, the engine avoids per-pass timing/edge-count bookkeeping.
284    pub enable_virtual_dep_telemetry: bool,
285}
286
287impl Default for EvalConfig {
288    fn default() -> Self {
289        Self {
290            enable_parallel: true,
291            max_threads: None,
292            max_vertices: None,
293            max_eval_time: None,
294            max_memory_mb: None,
295
296            default_sheet_name: format!("Sheet{}", 1),
297
298            // Excel compatibility: identifiers are case-insensitive by default.
299            case_sensitive_names: false,
300            case_sensitive_tables: false,
301
302            // Deterministic RNG seed (matches traits default)
303            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
304
305            // Volatile model default
306            volatile_level: VolatileLevel::Always,
307
308            deterministic_mode: DeterministicMode::default(),
309
310            // Range handling defaults (Phase 5)
311            range_expansion_limit: 64,
312            // Open-ended reference defaults (Excel max dimensions).
313            // Lower these to cap `A:A` / `1:1` when used-bounds are unknown.
314            max_open_ended_rows: 1_048_576,
315            max_open_ended_cols: 16_384,
316            stripe_height: 256,
317            stripe_width: 256,
318            enable_block_stripes: false,
319            spill: SpillConfig::default(),
320
321            // Dynamic topology configuration
322            use_dynamic_topo: false, // Disabled by default for compatibility
323            pk_visit_budget: 50_000,
324            pk_compaction_interval_ops: 100_000,
325            max_layer_width: None,
326            pk_reject_cycle_edges: false,
327            sheet_index_mode: SheetIndexMode::Eager,
328            warmup: tuning::WarmupConfig::default(),
329            arrow_storage_enabled: true,
330            delta_overlay_enabled: true,
331            write_formula_overlay_enabled: true,
332            max_overlay_memory_bytes: None,
333            date_system: DateSystem::Excel1900,
334            formula_parse_policy: FormulaParsePolicy::Strict,
335            defer_graph_building: false,
336            enable_virtual_dep_telemetry: false,
337        }
338    }
339}
340
341impl EvalConfig {
342    #[inline]
343    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
344        self.range_expansion_limit = limit;
345        self
346    }
347
348    #[inline]
349    pub fn with_parallel(mut self, enable: bool) -> Self {
350        self.enable_parallel = enable;
351        self
352    }
353
354    #[inline]
355    pub fn with_block_stripes(mut self, enable: bool) -> Self {
356        self.enable_block_stripes = enable;
357        self
358    }
359
360    #[inline]
361    pub fn with_case_sensitive_names(mut self, enable: bool) -> Self {
362        self.case_sensitive_names = enable;
363        self
364    }
365
366    #[inline]
367    pub fn with_case_sensitive_tables(mut self, enable: bool) -> Self {
368        self.case_sensitive_tables = enable;
369        self
370    }
371
372    #[inline]
373    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
374        self.arrow_storage_enabled = enable;
375        self
376    }
377
378    #[inline]
379    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
380        self.delta_overlay_enabled = enable;
381        self
382    }
383
384    #[inline]
385    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
386        self.write_formula_overlay_enabled = enable;
387        self
388    }
389
390    #[inline]
391    pub fn with_date_system(mut self, system: DateSystem) -> Self {
392        self.date_system = system;
393        self
394    }
395
396    #[inline]
397    pub fn with_formula_parse_policy(mut self, policy: FormulaParsePolicy) -> Self {
398        self.formula_parse_policy = policy;
399        self
400    }
401
402    #[inline]
403    pub fn with_virtual_dep_telemetry(mut self, enable: bool) -> Self {
404        self.enable_virtual_dep_telemetry = enable;
405        self
406    }
407}
408
409#[derive(Debug, Clone, Copy, PartialEq, Eq)]
410pub enum SheetIndexMode {
411    /// Build full interval-tree based index during inserts (current behavior)
412    Eager,
413    /// Defer building any sheet index until first range query or explicit finalize
414    Lazy,
415    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
416    FastBatch,
417}
418
419pub use formualizer_common::DateSystem;
420
421/// Construct a new engine with the given resolver and configuration
422pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
423where
424    R: EvaluationContext + 'static,
425{
426    Engine::new(resolver, config)
427}
428
429/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
430#[derive(Debug, Clone, Copy, PartialEq, Eq)]
431pub struct SpillConfig {
432    /// What to do when target region overlaps non-empty cells or other spills.
433    pub conflict_policy: SpillConflictPolicy,
434    /// Tiebreaker used when policy allows preemption or multiple anchors race.
435    pub tiebreaker: SpillTiebreaker,
436    /// Bounds handling when result exceeds sheet capacity.
437    pub bounds_policy: SpillBoundsPolicy,
438    /// Buffering approach for spill writes.
439    pub buffer_mode: SpillBufferMode,
440    /// Optional memory budget for shadow buffering in bytes.
441    pub memory_budget_bytes: Option<u64>,
442    /// Cancellation behavior while streaming rows.
443    pub cancellation: SpillCancellationPolicy,
444    /// Visibility policy for staged writes.
445    pub visibility: SpillVisibility,
446
447    /// Hard cap on the number of cells a single spill may project.
448    ///
449    /// This prevents pathological vertex explosions from very large dynamic arrays.
450    pub max_spill_cells: u32,
451}
452
453impl Default for SpillConfig {
454    fn default() -> Self {
455        Self {
456            conflict_policy: SpillConflictPolicy::Error,
457            tiebreaker: SpillTiebreaker::FirstWins,
458            bounds_policy: SpillBoundsPolicy::Strict,
459            buffer_mode: SpillBufferMode::ShadowBuffer,
460            memory_budget_bytes: None,
461            cancellation: SpillCancellationPolicy::Cooperative,
462            visibility: SpillVisibility::OnCommit,
463            // Conservative: enough for common UI patterns, small enough to avoid graph blowups.
464            max_spill_cells: 10_000,
465        }
466    }
467}
468
469#[derive(Debug, Clone, Copy, PartialEq, Eq)]
470pub enum SpillConflictPolicy {
471    Error,
472    Preempt,
473}
474
475#[derive(Debug, Clone, Copy, PartialEq, Eq)]
476pub enum SpillTiebreaker {
477    FirstWins,
478    EvaluationEpochAsc,
479    AnchorAddressAsc,
480    FunctionPriorityThenAddress,
481}
482
483#[derive(Debug, Clone, Copy, PartialEq, Eq)]
484pub enum SpillBoundsPolicy {
485    Strict,
486    Truncate,
487}
488
489#[derive(Debug, Clone, Copy, PartialEq, Eq)]
490pub enum SpillBufferMode {
491    ShadowBuffer,
492    PersistenceJournal,
493}
494
495#[derive(Debug, Clone, Copy, PartialEq, Eq)]
496pub enum SpillCancellationPolicy {
497    Cooperative,
498    Strict,
499}
500
501#[derive(Debug, Clone, Copy, PartialEq, Eq)]
502pub enum SpillVisibility {
503    OnCommit,
504    StagedLayer,
505}
506
507/*
508 * Scenario: Tombstone Registry for Missing Sheets
509 * When a sheet is deleted, formulas pointing to it become "orphans."
510 * Instead of losing the connection, we store the formula's VertexId
511 * under the name of the missing sheet.
512 *
513 * Why it matters:
514 * This allows Sheet Addition to remain O(1) for the general case,
515 * while providing O(N_orphans) recovery for broken formulas.
516 */
517#[derive(Debug, Default)]
518pub struct TombstoneRegistry {
519    // Maps "SheetName" -> Vec<VertexId of formulas waiting for it>
520    pub pending_references: HashMap<String, Vec<VertexId>>,
521}
522
523impl TombstoneRegistry {
524    /// Record that a vertex is waiting for a specific sheet name to appear.
525    pub fn add_orphan(&mut self, sheet_name: String, vertex_id: VertexId) {
526        self.pending_references
527            .entry(sheet_name)
528            .or_default()
529            .push(vertex_id);
530    }
531
532    /// Retrieve and remove all vertices waiting for a specific sheet name.
533    pub fn take_orphans(&mut self, sheet_name: &str) -> Vec<VertexId> {
534        self.pending_references
535            .remove(sheet_name)
536            .unwrap_or_default()
537    }
538}