formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod eval;
7pub mod graph;
8pub mod ingest;
9pub mod ingest_builder;
10pub mod plan;
11pub mod range_view;
12pub mod scheduler;
13pub mod spill;
14pub mod vertex;
15
16// New SoA modules
17pub mod csr_edges;
18pub mod debug_views;
19pub mod delta_edges;
20pub mod interval_tree;
21pub mod named_range;
22pub mod sheet_index;
23pub mod sheet_registry;
24pub mod topo;
25pub mod vertex_store;
26
27// Phase 1: Arena modules
28pub mod arena;
29
30// Phase 1: Warmup configuration (kept for compatibility)
31pub mod tuning;
32
33#[cfg(test)]
34mod tests;
35
36use std::fmt::{Display, Formatter};
37
38pub use eval::{Engine, EvalResult, RecalcPlan};
39// Use SoA implementation
40pub use graph::snapshot::VertexSnapshot;
41pub use graph::{
42    ChangeEvent, DependencyGraph, DependencyRef, OperationSummary, StripeKey, StripeType,
43    block_index,
44};
45pub use scheduler::{Layer, Schedule, Scheduler};
46pub use vertex::{VertexId, VertexKind};
47
48pub use graph::editor::{
49    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
50    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
51};
52
53pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
54
55// CalcObserver is defined below
56
57use crate::traits::EvaluationContext;
58use crate::traits::VolatileLevel;
59
60impl<R: EvaluationContext> Engine<R> {
61    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
62        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
63    }
64}
65
66/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
67pub trait CalcObserver: Send + Sync {
68    fn on_eval_start(&self, vertex_id: VertexId);
69    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
70    fn on_cycle_detected(&self, cycle: &[VertexId]);
71    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
72}
73
74/// Default no-op observer
75impl CalcObserver for () {
76    fn on_eval_start(&self, _vertex_id: VertexId) {}
77    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
78    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
79    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
80}
81
82/// Configuration for the evaluation engine
83#[derive(Debug, Clone)]
84pub struct EvalConfig {
85    pub enable_parallel: bool,
86    pub max_threads: Option<usize>,
87    // 🔮 Scalability Hook: Resource limits (future-proofing)
88    pub max_vertices: Option<usize>,
89    pub max_eval_time: Option<std::time::Duration>,
90    pub max_memory_mb: Option<usize>,
91
92    /// Default sheet name used when no sheet is provided.
93    pub default_sheet_name: String,
94
95    /// Stable workbook seed used for deterministic RNG composition
96    pub workbook_seed: u64,
97
98    /// Volatile granularity for RNG seeding and re-evaluation policy
99    pub volatile_level: VolatileLevel,
100
101    // Range handling configuration (Phase 5)
102    /// Ranges with size <= this limit are expanded into individual Cell dependencies
103    pub range_expansion_limit: usize,
104    /// Height of stripe blocks for dense range indexing
105    pub stripe_height: u32,
106    /// Width of stripe blocks for dense range indexing  
107    pub stripe_width: u32,
108    /// Enable block stripes for dense ranges (vs row/column stripes only)
109    pub enable_block_stripes: bool,
110
111    /// Spill behavior configuration (conflicts, bounds, buffering)
112    pub spill: SpillConfig,
113
114    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
115    pub use_dynamic_topo: bool,
116    /// Maximum nodes to visit before falling back to full rebuild
117    pub pk_visit_budget: usize,
118    /// Operations between periodic rank compaction
119    pub pk_compaction_interval_ops: u64,
120    /// Maximum width for parallel evaluation layers
121    pub max_layer_width: Option<usize>,
122    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
123    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
124    pub pk_reject_cycle_edges: bool,
125    /// Sheet index build strategy for bulk loads
126    pub sheet_index_mode: SheetIndexMode,
127
128    /// Warmup configuration for global pass planning (Phase 1)
129    pub warmup: tuning::WarmupConfig,
130
131    /// Enable Arrow-backed storage reads (Phase A)
132    pub arrow_storage_enabled: bool,
133    /// Enable Arrow fast paths in builtins (Phase B)
134    pub arrow_fastpath_enabled: bool,
135    /// Enable delta overlay for Arrow sheets (Phase C)
136    pub delta_overlay_enabled: bool,
137
138    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
139    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
140    pub write_formula_overlay_enabled: bool,
141
142    /// Workbook date system: Excel 1900 (default) or 1904.
143    pub date_system: DateSystem,
144
145    /// Defer dependency graph building: ingest values immediately but stage formulas
146    /// for on-demand graph construction during evaluation.
147    pub defer_graph_building: bool,
148}
149
150impl Default for EvalConfig {
151    fn default() -> Self {
152        Self {
153            enable_parallel: true,
154            max_threads: None,
155            max_vertices: None,
156            max_eval_time: None,
157            max_memory_mb: None,
158
159            default_sheet_name: format!("Sheet{}", 1),
160
161            // Deterministic RNG seed (matches traits default)
162            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
163
164            // Volatile model default
165            volatile_level: VolatileLevel::Always,
166
167            // Range handling defaults (Phase 5)
168            range_expansion_limit: 64,
169            stripe_height: 256,
170            stripe_width: 256,
171            enable_block_stripes: false,
172            spill: SpillConfig::default(),
173
174            // Dynamic topology configuration
175            use_dynamic_topo: false, // Disabled by default for compatibility
176            pk_visit_budget: 50_000,
177            pk_compaction_interval_ops: 100_000,
178            max_layer_width: None,
179            pk_reject_cycle_edges: false,
180            sheet_index_mode: SheetIndexMode::Eager,
181            warmup: tuning::WarmupConfig::default(),
182            arrow_storage_enabled: true,
183            arrow_fastpath_enabled: true,
184            delta_overlay_enabled: true,
185            write_formula_overlay_enabled: true,
186            date_system: DateSystem::Excel1900,
187            defer_graph_building: false,
188        }
189    }
190}
191
192impl EvalConfig {
193    #[inline]
194    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
195        self.range_expansion_limit = limit;
196        self
197    }
198
199    #[inline]
200    pub fn with_parallel(mut self, enable: bool) -> Self {
201        self.enable_parallel = enable;
202        self
203    }
204
205    #[inline]
206    pub fn with_block_stripes(mut self, enable: bool) -> Self {
207        self.enable_block_stripes = enable;
208        self
209    }
210
211    #[inline]
212    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
213        self.arrow_storage_enabled = enable;
214        self
215    }
216
217    #[inline]
218    pub fn with_arrow_fastpath(mut self, enable: bool) -> Self {
219        self.arrow_fastpath_enabled = enable;
220        self
221    }
222
223    #[inline]
224    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
225        self.delta_overlay_enabled = enable;
226        self
227    }
228
229    #[inline]
230    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
231        self.write_formula_overlay_enabled = enable;
232        self
233    }
234
235    #[inline]
236    pub fn with_date_system(mut self, system: DateSystem) -> Self {
237        self.date_system = system;
238        self
239    }
240}
241
242#[derive(Debug, Clone, Copy, PartialEq, Eq)]
243pub enum SheetIndexMode {
244    /// Build full interval-tree based index during inserts (current behavior)
245    Eager,
246    /// Defer building any sheet index until first range query or explicit finalize
247    Lazy,
248    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
249    FastBatch,
250}
251
252/// Excel workbook date system selection.
253#[derive(Debug, Clone, Copy, PartialEq, Eq)]
254pub enum DateSystem {
255    /// Excel 1900 date system with the historical 1900-02-29 compatibility gap.
256    Excel1900,
257    /// Excel 1904 date system (epoch 1904-01-01), no 1900 leap-year bug offset.
258    Excel1904,
259}
260
261impl Display for DateSystem {
262    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
263        match self {
264            DateSystem::Excel1900 => write!(f, "1900"),
265            DateSystem::Excel1904 => write!(f, "1904"),
266        }
267    }
268}
269
270/// Construct a new engine with the given resolver and configuration
271pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
272where
273    R: EvaluationContext + 'static,
274{
275    Engine::new(resolver, config)
276}
277
278/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
279#[derive(Debug, Clone, Copy, PartialEq, Eq)]
280pub struct SpillConfig {
281    /// What to do when target region overlaps non-empty cells or other spills.
282    pub conflict_policy: SpillConflictPolicy,
283    /// Tiebreaker used when policy allows preemption or multiple anchors race.
284    pub tiebreaker: SpillTiebreaker,
285    /// Bounds handling when result exceeds sheet capacity.
286    pub bounds_policy: SpillBoundsPolicy,
287    /// Buffering approach for spill writes.
288    pub buffer_mode: SpillBufferMode,
289    /// Optional memory budget for shadow buffering in bytes.
290    pub memory_budget_bytes: Option<u64>,
291    /// Cancellation behavior while streaming rows.
292    pub cancellation: SpillCancellationPolicy,
293    /// Visibility policy for staged writes.
294    pub visibility: SpillVisibility,
295}
296
297impl Default for SpillConfig {
298    fn default() -> Self {
299        Self {
300            conflict_policy: SpillConflictPolicy::Error,
301            tiebreaker: SpillTiebreaker::FirstWins,
302            bounds_policy: SpillBoundsPolicy::Strict,
303            buffer_mode: SpillBufferMode::ShadowBuffer,
304            memory_budget_bytes: None,
305            cancellation: SpillCancellationPolicy::Cooperative,
306            visibility: SpillVisibility::OnCommit,
307        }
308    }
309}
310
311#[derive(Debug, Clone, Copy, PartialEq, Eq)]
312pub enum SpillConflictPolicy {
313    Error,
314    Preempt,
315}
316
317#[derive(Debug, Clone, Copy, PartialEq, Eq)]
318pub enum SpillTiebreaker {
319    FirstWins,
320    EvaluationEpochAsc,
321    AnchorAddressAsc,
322    FunctionPriorityThenAddress,
323}
324
325#[derive(Debug, Clone, Copy, PartialEq, Eq)]
326pub enum SpillBoundsPolicy {
327    Strict,
328    Truncate,
329}
330
331#[derive(Debug, Clone, Copy, PartialEq, Eq)]
332pub enum SpillBufferMode {
333    ShadowBuffer,
334    PersistenceJournal,
335}
336
337#[derive(Debug, Clone, Copy, PartialEq, Eq)]
338pub enum SpillCancellationPolicy {
339    Cooperative,
340    Strict,
341}
342
343#[derive(Debug, Clone, Copy, PartialEq, Eq)]
344pub enum SpillVisibility {
345    OnCommit,
346    StagedLayer,
347}