Skip to main content

formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod effects;
7pub mod eval;
8pub mod eval_delta;
9pub mod graph;
10pub mod ingest;
11pub mod ingest_builder;
12pub mod journal;
13pub mod plan;
14pub mod range_view;
15pub mod scheduler;
16pub mod spill;
17pub mod vertex;
18pub mod virtual_deps;
19
20// New SoA modules
21pub mod csr_edges;
22pub mod debug_views;
23pub mod delta_edges;
24pub mod interval_tree;
25pub mod named_range;
26pub mod sheet_index;
27pub mod sheet_registry;
28pub mod topo;
29pub mod vertex_store;
30
31// Phase 1: Arena modules
32pub mod arena;
33
34// Phase 1: Warmup configuration (kept for compatibility)
35pub mod tuning;
36
37#[cfg(test)]
38mod tests;
39
40pub use eval::{Engine, EngineAction, EvalResult, RecalcPlan, VirtualDepTelemetry};
41pub use eval_delta::{DeltaMode, EvalDelta};
42pub use journal::{ActionJournal, ArrowOp, ArrowUndoBatch, GraphUndoBatch};
43// Use SoA implementation
44pub use graph::snapshot::VertexSnapshot;
45pub use graph::{
46    ChangeEvent, DependencyGraph, DependencyRef, OperationSummary, StripeKey, StripeType,
47    block_index,
48};
49pub use scheduler::{Layer, Schedule, Scheduler};
50pub use vertex::{VertexId, VertexKind};
51
52pub use graph::editor::{
53    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
54    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
55};
56
57pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
58
59// CalcObserver is defined below
60
61use crate::timezone::TimeZoneSpec;
62use crate::traits::EvaluationContext;
63use crate::traits::VolatileLevel;
64use chrono::{DateTime, Utc};
65use formualizer_common::error::{ExcelError, ExcelErrorKind};
66
67impl<R: EvaluationContext> Engine<R> {
68    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
69        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
70    }
71}
72
73/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
74pub trait CalcObserver: Send + Sync {
75    fn on_eval_start(&self, vertex_id: VertexId);
76    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
77    fn on_cycle_detected(&self, cycle: &[VertexId]);
78    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
79}
80
81/// Default no-op observer
82impl CalcObserver for () {
83    fn on_eval_start(&self, _vertex_id: VertexId) {}
84    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
85    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
86    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
87}
88
89/// Deterministic evaluation configuration.
90///
91/// When enabled, volatile sources (clock/timezone) are derived solely from this config.
92#[derive(Debug, Clone, PartialEq, Eq)]
93pub enum DeterministicMode {
94    /// Non-deterministic: uses the system clock.
95    Disabled {
96        /// Timezone used by volatile date/time builtins.
97        timezone: TimeZoneSpec,
98    },
99    /// Deterministic: uses a fixed timestamp in the provided timezone.
100    Enabled {
101        /// Fixed timestamp expressed in UTC.
102        timestamp_utc: DateTime<Utc>,
103        /// Timezone used to interpret `timestamp_utc` for NOW()/TODAY().
104        timezone: TimeZoneSpec,
105    },
106}
107
108impl Default for DeterministicMode {
109    fn default() -> Self {
110        Self::Disabled {
111            timezone: TimeZoneSpec::default(),
112        }
113    }
114}
115
116impl DeterministicMode {
117    pub fn is_enabled(&self) -> bool {
118        matches!(self, DeterministicMode::Enabled { .. })
119    }
120
121    pub fn timezone(&self) -> &TimeZoneSpec {
122        match self {
123            DeterministicMode::Disabled { timezone } => timezone,
124            DeterministicMode::Enabled { timezone, .. } => timezone,
125        }
126    }
127
128    pub fn validate(&self) -> Result<(), ExcelError> {
129        if let DeterministicMode::Enabled { timezone, .. } = self {
130            timezone
131                .validate_for_determinism()
132                .map_err(|msg| ExcelError::new(ExcelErrorKind::Value).with_message(msg))?;
133        }
134        Ok(())
135    }
136
137    pub fn build_clock(
138        &self,
139    ) -> Result<std::sync::Arc<dyn crate::timezone::ClockProvider>, ExcelError> {
140        self.validate()?;
141        Ok(match self {
142            DeterministicMode::Disabled { timezone } => {
143                std::sync::Arc::new(crate::timezone::SystemClock::new(timezone.clone()))
144            }
145            DeterministicMode::Enabled {
146                timestamp_utc,
147                timezone,
148            } => std::sync::Arc::new(crate::timezone::FixedClock::new(
149                *timestamp_utc,
150                timezone.clone(),
151            )),
152        })
153    }
154}
155
156/// Policy for handling malformed formulas encountered during workbook ingest.
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub enum FormulaParsePolicy {
159    /// Reject malformed formulas and fail the load/evaluation path.
160    Strict,
161    /// Convert malformed formulas into literal error formulas (`#ERROR!`).
162    CoerceToError,
163    /// Keep the backend-provided cached value and drop the formula.
164    KeepCachedValue,
165    /// Treat the original formula text as a plain text literal.
166    AsText,
167}
168
169/// Captured diagnostic for a malformed formula encountered during ingest/graph-build.
170#[derive(Debug, Clone, PartialEq, Eq)]
171pub struct FormulaParseDiagnostic {
172    pub sheet: String,
173    pub row: u32,
174    pub col: u32,
175    pub formula: String,
176    pub message: String,
177    pub policy: FormulaParsePolicy,
178}
179
180/// Configuration for the evaluation engine
181#[derive(Debug, Clone)]
182pub struct EvalConfig {
183    pub enable_parallel: bool,
184    pub max_threads: Option<usize>,
185    // 🔮 Scalability Hook: Resource limits (future-proofing)
186    pub max_vertices: Option<usize>,
187    pub max_eval_time: Option<std::time::Duration>,
188    pub max_memory_mb: Option<usize>,
189
190    /// Default sheet name used when no sheet is provided.
191    pub default_sheet_name: String,
192
193    /// When false, resolve defined names case-insensitively (ASCII only).
194    ///
195    /// This matches Excel behavior for defined names.
196    pub case_sensitive_names: bool,
197
198    /// When false, resolve table names case-insensitively (ASCII only).
199    ///
200    /// This matches Excel behavior for native table (ListObject) names.
201    pub case_sensitive_tables: bool,
202
203    /// Stable workbook seed used for deterministic RNG composition
204    pub workbook_seed: u64,
205
206    /// Volatile granularity for RNG seeding and re-evaluation policy
207    pub volatile_level: VolatileLevel,
208
209    /// Deterministic evaluation configuration (clock/timezone injection).
210    pub deterministic_mode: DeterministicMode,
211
212    // Range handling configuration (Phase 5)
213    /// Ranges with size <= this limit are expanded into individual Cell dependencies
214    pub range_expansion_limit: usize,
215
216    /// Fallback maximum row bound for open-ended references (e.g. `A:A`, `A1:A`).
217    ///
218    /// This is only used when used-bounds cannot be determined.
219    pub max_open_ended_rows: u32,
220
221    /// Fallback maximum column bound for open-ended references (e.g. `1:1`, `A1:1`).
222    ///
223    /// This is only used when used-bounds cannot be determined.
224    pub max_open_ended_cols: u32,
225
226    /// Height of stripe blocks for dense range indexing
227    pub stripe_height: u32,
228    /// Width of stripe blocks for dense range indexing  
229    pub stripe_width: u32,
230    /// Enable block stripes for dense ranges (vs row/column stripes only)
231    pub enable_block_stripes: bool,
232
233    /// Spill behavior configuration (conflicts, bounds, buffering)
234    pub spill: SpillConfig,
235
236    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
237    pub use_dynamic_topo: bool,
238    /// Maximum nodes to visit before falling back to full rebuild
239    pub pk_visit_budget: usize,
240    /// Operations between periodic rank compaction
241    pub pk_compaction_interval_ops: u64,
242    /// Maximum width for parallel evaluation layers
243    pub max_layer_width: Option<usize>,
244    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
245    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
246    pub pk_reject_cycle_edges: bool,
247    /// Sheet index build strategy for bulk loads
248    pub sheet_index_mode: SheetIndexMode,
249
250    /// Warmup configuration for global pass planning (Phase 1)
251    pub warmup: tuning::WarmupConfig,
252
253    /// Enable Arrow-backed storage reads (Phase A)
254    pub arrow_storage_enabled: bool,
255    /// Enable delta overlay for Arrow sheets (Phase C)
256    pub delta_overlay_enabled: bool,
257
258    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
259    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
260    pub write_formula_overlay_enabled: bool,
261
262    /// Optional memory budget (in bytes) for formula/spill computed Arrow overlays.
263    ///
264    /// When set, the engine will compact computed overlays into base lanes when the
265    /// estimated usage exceeds this cap.
266    pub max_overlay_memory_bytes: Option<usize>,
267
268    /// Workbook date system: Excel 1900 (default) or 1904.
269    pub date_system: DateSystem,
270
271    /// Policy for malformed formulas encountered during ingest/graph-build.
272    pub formula_parse_policy: FormulaParsePolicy,
273
274    /// Defer dependency graph building: ingest values immediately but stage formulas
275    /// for on-demand graph construction during evaluation.
276    pub defer_graph_building: bool,
277
278    /// Enable virtual dependency convergence telemetry collection.
279    ///
280    /// When disabled, the engine avoids per-pass timing/edge-count bookkeeping.
281    pub enable_virtual_dep_telemetry: bool,
282}
283
284impl Default for EvalConfig {
285    fn default() -> Self {
286        Self {
287            enable_parallel: true,
288            max_threads: None,
289            max_vertices: None,
290            max_eval_time: None,
291            max_memory_mb: None,
292
293            default_sheet_name: format!("Sheet{}", 1),
294
295            // Excel compatibility: identifiers are case-insensitive by default.
296            case_sensitive_names: false,
297            case_sensitive_tables: false,
298
299            // Deterministic RNG seed (matches traits default)
300            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
301
302            // Volatile model default
303            volatile_level: VolatileLevel::Always,
304
305            deterministic_mode: DeterministicMode::default(),
306
307            // Range handling defaults (Phase 5)
308            range_expansion_limit: 64,
309            // Open-ended reference defaults (Excel max dimensions).
310            // Lower these to cap `A:A` / `1:1` when used-bounds are unknown.
311            max_open_ended_rows: 1_048_576,
312            max_open_ended_cols: 16_384,
313            stripe_height: 256,
314            stripe_width: 256,
315            enable_block_stripes: false,
316            spill: SpillConfig::default(),
317
318            // Dynamic topology configuration
319            use_dynamic_topo: false, // Disabled by default for compatibility
320            pk_visit_budget: 50_000,
321            pk_compaction_interval_ops: 100_000,
322            max_layer_width: None,
323            pk_reject_cycle_edges: false,
324            sheet_index_mode: SheetIndexMode::Eager,
325            warmup: tuning::WarmupConfig::default(),
326            arrow_storage_enabled: true,
327            delta_overlay_enabled: true,
328            write_formula_overlay_enabled: true,
329            max_overlay_memory_bytes: None,
330            date_system: DateSystem::Excel1900,
331            formula_parse_policy: FormulaParsePolicy::Strict,
332            defer_graph_building: false,
333            enable_virtual_dep_telemetry: false,
334        }
335    }
336}
337
338impl EvalConfig {
339    #[inline]
340    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
341        self.range_expansion_limit = limit;
342        self
343    }
344
345    #[inline]
346    pub fn with_parallel(mut self, enable: bool) -> Self {
347        self.enable_parallel = enable;
348        self
349    }
350
351    #[inline]
352    pub fn with_block_stripes(mut self, enable: bool) -> Self {
353        self.enable_block_stripes = enable;
354        self
355    }
356
357    #[inline]
358    pub fn with_case_sensitive_names(mut self, enable: bool) -> Self {
359        self.case_sensitive_names = enable;
360        self
361    }
362
363    #[inline]
364    pub fn with_case_sensitive_tables(mut self, enable: bool) -> Self {
365        self.case_sensitive_tables = enable;
366        self
367    }
368
369    #[inline]
370    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
371        self.arrow_storage_enabled = enable;
372        self
373    }
374
375    #[inline]
376    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
377        self.delta_overlay_enabled = enable;
378        self
379    }
380
381    #[inline]
382    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
383        self.write_formula_overlay_enabled = enable;
384        self
385    }
386
387    #[inline]
388    pub fn with_date_system(mut self, system: DateSystem) -> Self {
389        self.date_system = system;
390        self
391    }
392
393    #[inline]
394    pub fn with_formula_parse_policy(mut self, policy: FormulaParsePolicy) -> Self {
395        self.formula_parse_policy = policy;
396        self
397    }
398
399    #[inline]
400    pub fn with_virtual_dep_telemetry(mut self, enable: bool) -> Self {
401        self.enable_virtual_dep_telemetry = enable;
402        self
403    }
404}
405
406#[derive(Debug, Clone, Copy, PartialEq, Eq)]
407pub enum SheetIndexMode {
408    /// Build full interval-tree based index during inserts (current behavior)
409    Eager,
410    /// Defer building any sheet index until first range query or explicit finalize
411    Lazy,
412    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
413    FastBatch,
414}
415
416pub use formualizer_common::DateSystem;
417
418/// Construct a new engine with the given resolver and configuration
419pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
420where
421    R: EvaluationContext + 'static,
422{
423    Engine::new(resolver, config)
424}
425
426/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
427#[derive(Debug, Clone, Copy, PartialEq, Eq)]
428pub struct SpillConfig {
429    /// What to do when target region overlaps non-empty cells or other spills.
430    pub conflict_policy: SpillConflictPolicy,
431    /// Tiebreaker used when policy allows preemption or multiple anchors race.
432    pub tiebreaker: SpillTiebreaker,
433    /// Bounds handling when result exceeds sheet capacity.
434    pub bounds_policy: SpillBoundsPolicy,
435    /// Buffering approach for spill writes.
436    pub buffer_mode: SpillBufferMode,
437    /// Optional memory budget for shadow buffering in bytes.
438    pub memory_budget_bytes: Option<u64>,
439    /// Cancellation behavior while streaming rows.
440    pub cancellation: SpillCancellationPolicy,
441    /// Visibility policy for staged writes.
442    pub visibility: SpillVisibility,
443
444    /// Hard cap on the number of cells a single spill may project.
445    ///
446    /// This prevents pathological vertex explosions from very large dynamic arrays.
447    pub max_spill_cells: u32,
448}
449
450impl Default for SpillConfig {
451    fn default() -> Self {
452        Self {
453            conflict_policy: SpillConflictPolicy::Error,
454            tiebreaker: SpillTiebreaker::FirstWins,
455            bounds_policy: SpillBoundsPolicy::Strict,
456            buffer_mode: SpillBufferMode::ShadowBuffer,
457            memory_budget_bytes: None,
458            cancellation: SpillCancellationPolicy::Cooperative,
459            visibility: SpillVisibility::OnCommit,
460            // Conservative: enough for common UI patterns, small enough to avoid graph blowups.
461            max_spill_cells: 10_000,
462        }
463    }
464}
465
466#[derive(Debug, Clone, Copy, PartialEq, Eq)]
467pub enum SpillConflictPolicy {
468    Error,
469    Preempt,
470}
471
472#[derive(Debug, Clone, Copy, PartialEq, Eq)]
473pub enum SpillTiebreaker {
474    FirstWins,
475    EvaluationEpochAsc,
476    AnchorAddressAsc,
477    FunctionPriorityThenAddress,
478}
479
480#[derive(Debug, Clone, Copy, PartialEq, Eq)]
481pub enum SpillBoundsPolicy {
482    Strict,
483    Truncate,
484}
485
486#[derive(Debug, Clone, Copy, PartialEq, Eq)]
487pub enum SpillBufferMode {
488    ShadowBuffer,
489    PersistenceJournal,
490}
491
492#[derive(Debug, Clone, Copy, PartialEq, Eq)]
493pub enum SpillCancellationPolicy {
494    Cooperative,
495    Strict,
496}
497
498#[derive(Debug, Clone, Copy, PartialEq, Eq)]
499pub enum SpillVisibility {
500    OnCommit,
501    StagedLayer,
502}