Skip to main content

formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod effects;
7pub mod eval;
8pub mod eval_delta;
9pub mod graph;
10pub mod ingest;
11pub mod ingest_builder;
12pub mod journal;
13pub mod plan;
14pub mod range_view;
15pub mod scheduler;
16pub mod spill;
17pub mod vertex;
18
19// New SoA modules
20pub mod csr_edges;
21pub mod debug_views;
22pub mod delta_edges;
23pub mod interval_tree;
24pub mod named_range;
25pub mod sheet_index;
26pub mod sheet_registry;
27pub mod topo;
28pub mod vertex_store;
29
30// Phase 1: Arena modules
31pub mod arena;
32
33// Phase 1: Warmup configuration (kept for compatibility)
34pub mod tuning;
35
36#[cfg(test)]
37mod tests;
38
39pub use eval::{Engine, EngineAction, EvalResult, RecalcPlan};
40pub use eval_delta::{DeltaMode, EvalDelta};
41pub use journal::{ActionJournal, ArrowOp, ArrowUndoBatch, GraphUndoBatch};
42// Use SoA implementation
43pub use graph::snapshot::VertexSnapshot;
44pub use graph::{
45    ChangeEvent, DependencyGraph, DependencyRef, OperationSummary, StripeKey, StripeType,
46    block_index,
47};
48pub use scheduler::{Layer, Schedule, Scheduler};
49pub use vertex::{VertexId, VertexKind};
50
51pub use graph::editor::{
52    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
53    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
54};
55
56pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
57
58// CalcObserver is defined below
59
60use crate::timezone::TimeZoneSpec;
61use crate::traits::EvaluationContext;
62use crate::traits::VolatileLevel;
63use chrono::{DateTime, Utc};
64use formualizer_common::error::{ExcelError, ExcelErrorKind};
65
66impl<R: EvaluationContext> Engine<R> {
67    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
68        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
69    }
70}
71
72/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
73pub trait CalcObserver: Send + Sync {
74    fn on_eval_start(&self, vertex_id: VertexId);
75    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
76    fn on_cycle_detected(&self, cycle: &[VertexId]);
77    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
78}
79
80/// Default no-op observer
81impl CalcObserver for () {
82    fn on_eval_start(&self, _vertex_id: VertexId) {}
83    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
84    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
85    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
86}
87
88/// Deterministic evaluation configuration.
89///
90/// When enabled, volatile sources (clock/timezone) are derived solely from this config.
91#[derive(Debug, Clone, PartialEq, Eq)]
92pub enum DeterministicMode {
93    /// Non-deterministic: uses the system clock.
94    Disabled {
95        /// Timezone used by volatile date/time builtins.
96        timezone: TimeZoneSpec,
97    },
98    /// Deterministic: uses a fixed timestamp in the provided timezone.
99    Enabled {
100        /// Fixed timestamp expressed in UTC.
101        timestamp_utc: DateTime<Utc>,
102        /// Timezone used to interpret `timestamp_utc` for NOW()/TODAY().
103        timezone: TimeZoneSpec,
104    },
105}
106
107impl Default for DeterministicMode {
108    fn default() -> Self {
109        Self::Disabled {
110            timezone: TimeZoneSpec::default(),
111        }
112    }
113}
114
115impl DeterministicMode {
116    pub fn is_enabled(&self) -> bool {
117        matches!(self, DeterministicMode::Enabled { .. })
118    }
119
120    pub fn timezone(&self) -> &TimeZoneSpec {
121        match self {
122            DeterministicMode::Disabled { timezone } => timezone,
123            DeterministicMode::Enabled { timezone, .. } => timezone,
124        }
125    }
126
127    pub fn validate(&self) -> Result<(), ExcelError> {
128        if let DeterministicMode::Enabled { timezone, .. } = self {
129            timezone
130                .validate_for_determinism()
131                .map_err(|msg| ExcelError::new(ExcelErrorKind::Value).with_message(msg))?;
132        }
133        Ok(())
134    }
135
136    pub fn build_clock(
137        &self,
138    ) -> Result<std::sync::Arc<dyn crate::timezone::ClockProvider>, ExcelError> {
139        self.validate()?;
140        Ok(match self {
141            DeterministicMode::Disabled { timezone } => {
142                std::sync::Arc::new(crate::timezone::SystemClock::new(timezone.clone()))
143            }
144            DeterministicMode::Enabled {
145                timestamp_utc,
146                timezone,
147            } => std::sync::Arc::new(crate::timezone::FixedClock::new(
148                *timestamp_utc,
149                timezone.clone(),
150            )),
151        })
152    }
153}
154
155/// Configuration for the evaluation engine
156#[derive(Debug, Clone)]
157pub struct EvalConfig {
158    pub enable_parallel: bool,
159    pub max_threads: Option<usize>,
160    // 🔮 Scalability Hook: Resource limits (future-proofing)
161    pub max_vertices: Option<usize>,
162    pub max_eval_time: Option<std::time::Duration>,
163    pub max_memory_mb: Option<usize>,
164
165    /// Default sheet name used when no sheet is provided.
166    pub default_sheet_name: String,
167
168    /// When false, resolve defined names case-insensitively (ASCII only).
169    ///
170    /// This matches Excel behavior for defined names.
171    pub case_sensitive_names: bool,
172
173    /// When false, resolve table names case-insensitively (ASCII only).
174    ///
175    /// This matches Excel behavior for native table (ListObject) names.
176    pub case_sensitive_tables: bool,
177
178    /// Stable workbook seed used for deterministic RNG composition
179    pub workbook_seed: u64,
180
181    /// Volatile granularity for RNG seeding and re-evaluation policy
182    pub volatile_level: VolatileLevel,
183
184    /// Deterministic evaluation configuration (clock/timezone injection).
185    pub deterministic_mode: DeterministicMode,
186
187    // Range handling configuration (Phase 5)
188    /// Ranges with size <= this limit are expanded into individual Cell dependencies
189    pub range_expansion_limit: usize,
190
191    /// Fallback maximum row bound for open-ended references (e.g. `A:A`, `A1:A`).
192    ///
193    /// This is only used when used-bounds cannot be determined.
194    pub max_open_ended_rows: u32,
195
196    /// Fallback maximum column bound for open-ended references (e.g. `1:1`, `A1:1`).
197    ///
198    /// This is only used when used-bounds cannot be determined.
199    pub max_open_ended_cols: u32,
200
201    /// Height of stripe blocks for dense range indexing
202    pub stripe_height: u32,
203    /// Width of stripe blocks for dense range indexing  
204    pub stripe_width: u32,
205    /// Enable block stripes for dense ranges (vs row/column stripes only)
206    pub enable_block_stripes: bool,
207
208    /// Spill behavior configuration (conflicts, bounds, buffering)
209    pub spill: SpillConfig,
210
211    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
212    pub use_dynamic_topo: bool,
213    /// Maximum nodes to visit before falling back to full rebuild
214    pub pk_visit_budget: usize,
215    /// Operations between periodic rank compaction
216    pub pk_compaction_interval_ops: u64,
217    /// Maximum width for parallel evaluation layers
218    pub max_layer_width: Option<usize>,
219    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
220    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
221    pub pk_reject_cycle_edges: bool,
222    /// Sheet index build strategy for bulk loads
223    pub sheet_index_mode: SheetIndexMode,
224
225    /// Warmup configuration for global pass planning (Phase 1)
226    pub warmup: tuning::WarmupConfig,
227
228    /// Enable Arrow-backed storage reads (Phase A)
229    pub arrow_storage_enabled: bool,
230    /// Enable delta overlay for Arrow sheets (Phase C)
231    pub delta_overlay_enabled: bool,
232
233    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
234    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
235    pub write_formula_overlay_enabled: bool,
236
237    /// Optional memory budget (in bytes) for formula/spill computed Arrow overlays.
238    ///
239    /// When set, the engine will compact computed overlays into base lanes when the
240    /// estimated usage exceeds this cap.
241    pub max_overlay_memory_bytes: Option<usize>,
242
243    /// Workbook date system: Excel 1900 (default) or 1904.
244    pub date_system: DateSystem,
245
246    /// Defer dependency graph building: ingest values immediately but stage formulas
247    /// for on-demand graph construction during evaluation.
248    pub defer_graph_building: bool,
249}
250
251impl Default for EvalConfig {
252    fn default() -> Self {
253        Self {
254            enable_parallel: true,
255            max_threads: None,
256            max_vertices: None,
257            max_eval_time: None,
258            max_memory_mb: None,
259
260            default_sheet_name: format!("Sheet{}", 1),
261
262            // Excel compatibility: identifiers are case-insensitive by default.
263            case_sensitive_names: false,
264            case_sensitive_tables: false,
265
266            // Deterministic RNG seed (matches traits default)
267            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
268
269            // Volatile model default
270            volatile_level: VolatileLevel::Always,
271
272            deterministic_mode: DeterministicMode::default(),
273
274            // Range handling defaults (Phase 5)
275            range_expansion_limit: 64,
276            // Open-ended reference defaults (Excel max dimensions).
277            // Lower these to cap `A:A` / `1:1` when used-bounds are unknown.
278            max_open_ended_rows: 1_048_576,
279            max_open_ended_cols: 16_384,
280            stripe_height: 256,
281            stripe_width: 256,
282            enable_block_stripes: false,
283            spill: SpillConfig::default(),
284
285            // Dynamic topology configuration
286            use_dynamic_topo: false, // Disabled by default for compatibility
287            pk_visit_budget: 50_000,
288            pk_compaction_interval_ops: 100_000,
289            max_layer_width: None,
290            pk_reject_cycle_edges: false,
291            sheet_index_mode: SheetIndexMode::Eager,
292            warmup: tuning::WarmupConfig::default(),
293            arrow_storage_enabled: true,
294            delta_overlay_enabled: true,
295            write_formula_overlay_enabled: true,
296            max_overlay_memory_bytes: None,
297            date_system: DateSystem::Excel1900,
298            defer_graph_building: false,
299        }
300    }
301}
302
303impl EvalConfig {
304    #[inline]
305    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
306        self.range_expansion_limit = limit;
307        self
308    }
309
310    #[inline]
311    pub fn with_parallel(mut self, enable: bool) -> Self {
312        self.enable_parallel = enable;
313        self
314    }
315
316    #[inline]
317    pub fn with_block_stripes(mut self, enable: bool) -> Self {
318        self.enable_block_stripes = enable;
319        self
320    }
321
322    #[inline]
323    pub fn with_case_sensitive_names(mut self, enable: bool) -> Self {
324        self.case_sensitive_names = enable;
325        self
326    }
327
328    #[inline]
329    pub fn with_case_sensitive_tables(mut self, enable: bool) -> Self {
330        self.case_sensitive_tables = enable;
331        self
332    }
333
334    #[inline]
335    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
336        self.arrow_storage_enabled = enable;
337        self
338    }
339
340    #[inline]
341    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
342        self.delta_overlay_enabled = enable;
343        self
344    }
345
346    #[inline]
347    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
348        self.write_formula_overlay_enabled = enable;
349        self
350    }
351
352    #[inline]
353    pub fn with_date_system(mut self, system: DateSystem) -> Self {
354        self.date_system = system;
355        self
356    }
357}
358
359#[derive(Debug, Clone, Copy, PartialEq, Eq)]
360pub enum SheetIndexMode {
361    /// Build full interval-tree based index during inserts (current behavior)
362    Eager,
363    /// Defer building any sheet index until first range query or explicit finalize
364    Lazy,
365    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
366    FastBatch,
367}
368
369pub use formualizer_common::DateSystem;
370
371/// Construct a new engine with the given resolver and configuration
372pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
373where
374    R: EvaluationContext + 'static,
375{
376    Engine::new(resolver, config)
377}
378
379/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
380#[derive(Debug, Clone, Copy, PartialEq, Eq)]
381pub struct SpillConfig {
382    /// What to do when target region overlaps non-empty cells or other spills.
383    pub conflict_policy: SpillConflictPolicy,
384    /// Tiebreaker used when policy allows preemption or multiple anchors race.
385    pub tiebreaker: SpillTiebreaker,
386    /// Bounds handling when result exceeds sheet capacity.
387    pub bounds_policy: SpillBoundsPolicy,
388    /// Buffering approach for spill writes.
389    pub buffer_mode: SpillBufferMode,
390    /// Optional memory budget for shadow buffering in bytes.
391    pub memory_budget_bytes: Option<u64>,
392    /// Cancellation behavior while streaming rows.
393    pub cancellation: SpillCancellationPolicy,
394    /// Visibility policy for staged writes.
395    pub visibility: SpillVisibility,
396
397    /// Hard cap on the number of cells a single spill may project.
398    ///
399    /// This prevents pathological vertex explosions from very large dynamic arrays.
400    pub max_spill_cells: u32,
401}
402
403impl Default for SpillConfig {
404    fn default() -> Self {
405        Self {
406            conflict_policy: SpillConflictPolicy::Error,
407            tiebreaker: SpillTiebreaker::FirstWins,
408            bounds_policy: SpillBoundsPolicy::Strict,
409            buffer_mode: SpillBufferMode::ShadowBuffer,
410            memory_budget_bytes: None,
411            cancellation: SpillCancellationPolicy::Cooperative,
412            visibility: SpillVisibility::OnCommit,
413            // Conservative: enough for common UI patterns, small enough to avoid graph blowups.
414            max_spill_cells: 10_000,
415        }
416    }
417}
418
419#[derive(Debug, Clone, Copy, PartialEq, Eq)]
420pub enum SpillConflictPolicy {
421    Error,
422    Preempt,
423}
424
425#[derive(Debug, Clone, Copy, PartialEq, Eq)]
426pub enum SpillTiebreaker {
427    FirstWins,
428    EvaluationEpochAsc,
429    AnchorAddressAsc,
430    FunctionPriorityThenAddress,
431}
432
433#[derive(Debug, Clone, Copy, PartialEq, Eq)]
434pub enum SpillBoundsPolicy {
435    Strict,
436    Truncate,
437}
438
439#[derive(Debug, Clone, Copy, PartialEq, Eq)]
440pub enum SpillBufferMode {
441    ShadowBuffer,
442    PersistenceJournal,
443}
444
445#[derive(Debug, Clone, Copy, PartialEq, Eq)]
446pub enum SpillCancellationPolicy {
447    Cooperative,
448    Strict,
449}
450
451#[derive(Debug, Clone, Copy, PartialEq, Eq)]
452pub enum SpillVisibility {
453    OnCommit,
454    StagedLayer,
455}