Skip to main content

formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod eval;
7pub mod eval_delta;
8pub mod graph;
9pub mod ingest;
10pub mod ingest_builder;
11pub mod plan;
12pub mod range_view;
13pub mod scheduler;
14pub mod spill;
15pub mod vertex;
16
17// New SoA modules
18pub mod csr_edges;
19pub mod debug_views;
20pub mod delta_edges;
21pub mod interval_tree;
22pub mod named_range;
23pub mod sheet_index;
24pub mod sheet_registry;
25pub mod topo;
26pub mod vertex_store;
27
28// Phase 1: Arena modules
29pub mod arena;
30
31// Phase 1: Warmup configuration (kept for compatibility)
32pub mod tuning;
33
34#[cfg(test)]
35mod tests;
36
37pub use eval::{Engine, EvalResult, RecalcPlan};
38pub use eval_delta::{DeltaMode, EvalDelta};
39// Use SoA implementation
40pub use graph::snapshot::VertexSnapshot;
41pub use graph::{
42    ChangeEvent, DependencyGraph, DependencyRef, OperationSummary, StripeKey, StripeType,
43    block_index,
44};
45pub use scheduler::{Layer, Schedule, Scheduler};
46pub use vertex::{VertexId, VertexKind};
47
48pub use graph::editor::{
49    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
50    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
51};
52
53pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
54
55// CalcObserver is defined below
56
57use crate::traits::EvaluationContext;
58use crate::traits::VolatileLevel;
59
60impl<R: EvaluationContext> Engine<R> {
61    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
62        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
63    }
64}
65
66/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
67pub trait CalcObserver: Send + Sync {
68    fn on_eval_start(&self, vertex_id: VertexId);
69    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
70    fn on_cycle_detected(&self, cycle: &[VertexId]);
71    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
72}
73
74/// Default no-op observer
75impl CalcObserver for () {
76    fn on_eval_start(&self, _vertex_id: VertexId) {}
77    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
78    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
79    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
80}
81
82/// Configuration for the evaluation engine
83#[derive(Debug, Clone)]
84pub struct EvalConfig {
85    pub enable_parallel: bool,
86    pub max_threads: Option<usize>,
87    // 🔮 Scalability Hook: Resource limits (future-proofing)
88    pub max_vertices: Option<usize>,
89    pub max_eval_time: Option<std::time::Duration>,
90    pub max_memory_mb: Option<usize>,
91
92    /// Default sheet name used when no sheet is provided.
93    pub default_sheet_name: String,
94
95    /// Stable workbook seed used for deterministic RNG composition
96    pub workbook_seed: u64,
97
98    /// Volatile granularity for RNG seeding and re-evaluation policy
99    pub volatile_level: VolatileLevel,
100
101    // Range handling configuration (Phase 5)
102    /// Ranges with size <= this limit are expanded into individual Cell dependencies
103    pub range_expansion_limit: usize,
104
105    /// Fallback maximum row bound for open-ended references (e.g. `A:A`, `A1:A`).
106    ///
107    /// This is only used when used-bounds cannot be determined.
108    pub max_open_ended_rows: u32,
109
110    /// Fallback maximum column bound for open-ended references (e.g. `1:1`, `A1:1`).
111    ///
112    /// This is only used when used-bounds cannot be determined.
113    pub max_open_ended_cols: u32,
114
115    /// Height of stripe blocks for dense range indexing
116    pub stripe_height: u32,
117    /// Width of stripe blocks for dense range indexing  
118    pub stripe_width: u32,
119    /// Enable block stripes for dense ranges (vs row/column stripes only)
120    pub enable_block_stripes: bool,
121
122    /// Spill behavior configuration (conflicts, bounds, buffering)
123    pub spill: SpillConfig,
124
125    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
126    pub use_dynamic_topo: bool,
127    /// Maximum nodes to visit before falling back to full rebuild
128    pub pk_visit_budget: usize,
129    /// Operations between periodic rank compaction
130    pub pk_compaction_interval_ops: u64,
131    /// Maximum width for parallel evaluation layers
132    pub max_layer_width: Option<usize>,
133    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
134    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
135    pub pk_reject_cycle_edges: bool,
136    /// Sheet index build strategy for bulk loads
137    pub sheet_index_mode: SheetIndexMode,
138
139    /// Warmup configuration for global pass planning (Phase 1)
140    pub warmup: tuning::WarmupConfig,
141
142    /// Enable Arrow-backed storage reads (Phase A)
143    pub arrow_storage_enabled: bool,
144    /// Enable delta overlay for Arrow sheets (Phase C)
145    pub delta_overlay_enabled: bool,
146
147    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
148    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
149    pub write_formula_overlay_enabled: bool,
150
151    /// Workbook date system: Excel 1900 (default) or 1904.
152    pub date_system: DateSystem,
153
154    /// Defer dependency graph building: ingest values immediately but stage formulas
155    /// for on-demand graph construction during evaluation.
156    pub defer_graph_building: bool,
157}
158
159impl Default for EvalConfig {
160    fn default() -> Self {
161        Self {
162            enable_parallel: true,
163            max_threads: None,
164            max_vertices: None,
165            max_eval_time: None,
166            max_memory_mb: None,
167
168            default_sheet_name: format!("Sheet{}", 1),
169
170            // Deterministic RNG seed (matches traits default)
171            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
172
173            // Volatile model default
174            volatile_level: VolatileLevel::Always,
175
176            // Range handling defaults (Phase 5)
177            range_expansion_limit: 64,
178            // Open-ended reference defaults (Excel max dimensions).
179            // Lower these to cap `A:A` / `1:1` when used-bounds are unknown.
180            max_open_ended_rows: 1_048_576,
181            max_open_ended_cols: 16_384,
182            stripe_height: 256,
183            stripe_width: 256,
184            enable_block_stripes: false,
185            spill: SpillConfig::default(),
186
187            // Dynamic topology configuration
188            use_dynamic_topo: false, // Disabled by default for compatibility
189            pk_visit_budget: 50_000,
190            pk_compaction_interval_ops: 100_000,
191            max_layer_width: None,
192            pk_reject_cycle_edges: false,
193            sheet_index_mode: SheetIndexMode::Eager,
194            warmup: tuning::WarmupConfig::default(),
195            arrow_storage_enabled: true,
196            delta_overlay_enabled: true,
197            write_formula_overlay_enabled: true,
198            date_system: DateSystem::Excel1900,
199            defer_graph_building: false,
200        }
201    }
202}
203
204impl EvalConfig {
205    #[inline]
206    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
207        self.range_expansion_limit = limit;
208        self
209    }
210
211    #[inline]
212    pub fn with_parallel(mut self, enable: bool) -> Self {
213        self.enable_parallel = enable;
214        self
215    }
216
217    #[inline]
218    pub fn with_block_stripes(mut self, enable: bool) -> Self {
219        self.enable_block_stripes = enable;
220        self
221    }
222
223    #[inline]
224    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
225        self.arrow_storage_enabled = enable;
226        self
227    }
228
229    #[inline]
230    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
231        self.delta_overlay_enabled = enable;
232        self
233    }
234
235    #[inline]
236    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
237        self.write_formula_overlay_enabled = enable;
238        self
239    }
240
241    #[inline]
242    pub fn with_date_system(mut self, system: DateSystem) -> Self {
243        self.date_system = system;
244        self
245    }
246}
247
248#[derive(Debug, Clone, Copy, PartialEq, Eq)]
249pub enum SheetIndexMode {
250    /// Build full interval-tree based index during inserts (current behavior)
251    Eager,
252    /// Defer building any sheet index until first range query or explicit finalize
253    Lazy,
254    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
255    FastBatch,
256}
257
258pub use formualizer_common::DateSystem;
259
260/// Construct a new engine with the given resolver and configuration
261pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
262where
263    R: EvaluationContext + 'static,
264{
265    Engine::new(resolver, config)
266}
267
268/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
269#[derive(Debug, Clone, Copy, PartialEq, Eq)]
270pub struct SpillConfig {
271    /// What to do when target region overlaps non-empty cells or other spills.
272    pub conflict_policy: SpillConflictPolicy,
273    /// Tiebreaker used when policy allows preemption or multiple anchors race.
274    pub tiebreaker: SpillTiebreaker,
275    /// Bounds handling when result exceeds sheet capacity.
276    pub bounds_policy: SpillBoundsPolicy,
277    /// Buffering approach for spill writes.
278    pub buffer_mode: SpillBufferMode,
279    /// Optional memory budget for shadow buffering in bytes.
280    pub memory_budget_bytes: Option<u64>,
281    /// Cancellation behavior while streaming rows.
282    pub cancellation: SpillCancellationPolicy,
283    /// Visibility policy for staged writes.
284    pub visibility: SpillVisibility,
285}
286
287impl Default for SpillConfig {
288    fn default() -> Self {
289        Self {
290            conflict_policy: SpillConflictPolicy::Error,
291            tiebreaker: SpillTiebreaker::FirstWins,
292            bounds_policy: SpillBoundsPolicy::Strict,
293            buffer_mode: SpillBufferMode::ShadowBuffer,
294            memory_budget_bytes: None,
295            cancellation: SpillCancellationPolicy::Cooperative,
296            visibility: SpillVisibility::OnCommit,
297        }
298    }
299}
300
301#[derive(Debug, Clone, Copy, PartialEq, Eq)]
302pub enum SpillConflictPolicy {
303    Error,
304    Preempt,
305}
306
307#[derive(Debug, Clone, Copy, PartialEq, Eq)]
308pub enum SpillTiebreaker {
309    FirstWins,
310    EvaluationEpochAsc,
311    AnchorAddressAsc,
312    FunctionPriorityThenAddress,
313}
314
315#[derive(Debug, Clone, Copy, PartialEq, Eq)]
316pub enum SpillBoundsPolicy {
317    Strict,
318    Truncate,
319}
320
321#[derive(Debug, Clone, Copy, PartialEq, Eq)]
322pub enum SpillBufferMode {
323    ShadowBuffer,
324    PersistenceJournal,
325}
326
327#[derive(Debug, Clone, Copy, PartialEq, Eq)]
328pub enum SpillCancellationPolicy {
329    Cooperative,
330    Strict,
331}
332
333#[derive(Debug, Clone, Copy, PartialEq, Eq)]
334pub enum SpillVisibility {
335    OnCommit,
336    StagedLayer,
337}