formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod eval;
7pub mod graph;
8pub mod ingest;
9pub mod ingest_builder;
10pub mod plan;
11pub mod range_view;
12pub mod scheduler;
13pub mod spill;
14pub mod vertex;
15
16// New SoA modules
17pub mod csr_edges;
18pub mod debug_views;
19pub mod delta_edges;
20pub mod epoch_tracker;
21pub mod interval_tree;
22pub mod named_range;
23pub mod packed_coord;
24pub mod sheet_index;
25pub mod sheet_registry;
26pub mod topo;
27pub mod vertex_store;
28
29// Phase 1: Arena modules
30pub mod arena;
31
32// Phase 1: Warmup modules
33pub mod cache;
34pub mod masks;
35pub mod metrics;
36pub mod pass_planner;
37pub mod reference_fingerprint;
38pub mod tuning;
39pub mod warmup;
40
41#[cfg(test)]
42mod tests;
43
44use std::fmt::{Display, Formatter};
45
46pub use eval::{Engine, EvalResult};
47// Use SoA implementation
48pub use graph::snapshot::VertexSnapshot;
49pub use graph::{
50    ChangeEvent, DependencyGraph, DependencyRef, OperationSummary, StripeKey, StripeType,
51    block_index,
52};
53pub use scheduler::{Layer, Schedule, Scheduler};
54pub use vertex::{VertexId, VertexKind};
55
56pub use graph::editor::{
57    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
58    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
59};
60
61pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
62
63// CalcObserver is defined below
64
65use crate::traits::EvaluationContext;
66use crate::traits::VolatileLevel;
67
68impl<R: EvaluationContext> Engine<R> {
69    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
70        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
71    }
72}
73
74/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
75pub trait CalcObserver: Send + Sync {
76    fn on_eval_start(&self, vertex_id: VertexId);
77    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
78    fn on_cycle_detected(&self, cycle: &[VertexId]);
79    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
80}
81
82/// Default no-op observer
83impl CalcObserver for () {
84    fn on_eval_start(&self, _vertex_id: VertexId) {}
85    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
86    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
87    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
88}
89
90/// Configuration for the evaluation engine
91#[derive(Debug, Clone)]
92pub struct EvalConfig {
93    pub enable_parallel: bool,
94    pub max_threads: Option<usize>,
95    // 🔮 Scalability Hook: Resource limits (future-proofing)
96    pub max_vertices: Option<usize>,
97    pub max_eval_time: Option<std::time::Duration>,
98    pub max_memory_mb: Option<usize>,
99
100    /// Stable workbook seed used for deterministic RNG composition
101    pub workbook_seed: u64,
102
103    /// Volatile granularity for RNG seeding and re-evaluation policy
104    pub volatile_level: VolatileLevel,
105
106    // Range handling configuration (Phase 5)
107    /// Ranges with size <= this limit are expanded into individual Cell dependencies
108    pub range_expansion_limit: usize,
109    /// Height of stripe blocks for dense range indexing
110    pub stripe_height: u32,
111    /// Width of stripe blocks for dense range indexing  
112    pub stripe_width: u32,
113    /// Enable block stripes for dense ranges (vs row/column stripes only)
114    pub enable_block_stripes: bool,
115
116    /// Spill behavior configuration (conflicts, bounds, buffering)
117    pub spill: SpillConfig,
118
119    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
120    pub use_dynamic_topo: bool,
121    /// Maximum nodes to visit before falling back to full rebuild
122    pub pk_visit_budget: usize,
123    /// Operations between periodic rank compaction
124    pub pk_compaction_interval_ops: u64,
125    /// Maximum width for parallel evaluation layers
126    pub max_layer_width: Option<usize>,
127    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
128    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
129    pub pk_reject_cycle_edges: bool,
130    /// Sheet index build strategy for bulk loads
131    pub sheet_index_mode: SheetIndexMode,
132
133    /// Warmup configuration for global pass planning (Phase 1)
134    pub warmup: tuning::WarmupConfig,
135
136    /// Enable Arrow-backed storage reads (Phase A)
137    pub arrow_storage_enabled: bool,
138    /// Enable Arrow fast paths in builtins (Phase B)
139    pub arrow_fastpath_enabled: bool,
140    /// Enable delta overlay for Arrow sheets (Phase C)
141    pub delta_overlay_enabled: bool,
142
143    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
144    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
145    pub write_formula_overlay_enabled: bool,
146
147    /// Workbook date system: Excel 1900 (default) or 1904.
148    pub date_system: DateSystem,
149
150    /// Defer dependency graph building: ingest values immediately but stage formulas
151    /// for on-demand graph construction during evaluation.
152    pub defer_graph_building: bool,
153}
154
155impl Default for EvalConfig {
156    fn default() -> Self {
157        Self {
158            enable_parallel: true,
159            max_threads: None,
160            max_vertices: None,
161            max_eval_time: None,
162            max_memory_mb: None,
163
164            // Deterministic RNG seed (matches traits default)
165            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
166
167            // Volatile model default
168            volatile_level: VolatileLevel::Always,
169
170            // Range handling defaults (Phase 5)
171            range_expansion_limit: 64,
172            stripe_height: 256,
173            stripe_width: 256,
174            enable_block_stripes: false,
175            spill: SpillConfig::default(),
176
177            // Dynamic topology configuration
178            use_dynamic_topo: false, // Disabled by default for compatibility
179            pk_visit_budget: 50_000,
180            pk_compaction_interval_ops: 100_000,
181            max_layer_width: None,
182            pk_reject_cycle_edges: false,
183            sheet_index_mode: SheetIndexMode::Eager,
184            warmup: tuning::WarmupConfig::default(),
185            arrow_storage_enabled: true,
186            arrow_fastpath_enabled: true,
187            delta_overlay_enabled: true,
188            write_formula_overlay_enabled: true,
189            date_system: DateSystem::Excel1900,
190            defer_graph_building: false,
191        }
192    }
193}
194
195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
196pub enum SheetIndexMode {
197    /// Build full interval-tree based index during inserts (current behavior)
198    Eager,
199    /// Defer building any sheet index until first range query or explicit finalize
200    Lazy,
201    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
202    FastBatch,
203}
204
205/// Excel workbook date system selection.
206#[derive(Debug, Clone, Copy, PartialEq, Eq)]
207pub enum DateSystem {
208    /// Excel 1900 date system with the historical 1900-02-29 compatibility gap.
209    Excel1900,
210    /// Excel 1904 date system (epoch 1904-01-01), no 1900 leap-year bug offset.
211    Excel1904,
212}
213
214impl Display for DateSystem {
215    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
216        match self {
217            DateSystem::Excel1900 => write!(f, "1900"),
218            DateSystem::Excel1904 => write!(f, "1904"),
219        }
220    }
221}
222
223/// Construct a new engine with the given resolver and configuration
224pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
225where
226    R: EvaluationContext + 'static,
227{
228    Engine::new(resolver, config)
229}
230
231/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
232#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub struct SpillConfig {
234    /// What to do when target region overlaps non-empty cells or other spills.
235    pub conflict_policy: SpillConflictPolicy,
236    /// Tiebreaker used when policy allows preemption or multiple anchors race.
237    pub tiebreaker: SpillTiebreaker,
238    /// Bounds handling when result exceeds sheet capacity.
239    pub bounds_policy: SpillBoundsPolicy,
240    /// Buffering approach for spill writes.
241    pub buffer_mode: SpillBufferMode,
242    /// Optional memory budget for shadow buffering in bytes.
243    pub memory_budget_bytes: Option<u64>,
244    /// Cancellation behavior while streaming rows.
245    pub cancellation: SpillCancellationPolicy,
246    /// Visibility policy for staged writes.
247    pub visibility: SpillVisibility,
248}
249
250impl Default for SpillConfig {
251    fn default() -> Self {
252        Self {
253            conflict_policy: SpillConflictPolicy::Error,
254            tiebreaker: SpillTiebreaker::FirstWins,
255            bounds_policy: SpillBoundsPolicy::Strict,
256            buffer_mode: SpillBufferMode::ShadowBuffer,
257            memory_budget_bytes: None,
258            cancellation: SpillCancellationPolicy::Cooperative,
259            visibility: SpillVisibility::OnCommit,
260        }
261    }
262}
263
264#[derive(Debug, Clone, Copy, PartialEq, Eq)]
265pub enum SpillConflictPolicy {
266    Error,
267    Preempt,
268}
269
270#[derive(Debug, Clone, Copy, PartialEq, Eq)]
271pub enum SpillTiebreaker {
272    FirstWins,
273    EvaluationEpochAsc,
274    AnchorAddressAsc,
275    FunctionPriorityThenAddress,
276}
277
278#[derive(Debug, Clone, Copy, PartialEq, Eq)]
279pub enum SpillBoundsPolicy {
280    Strict,
281    Truncate,
282}
283
284#[derive(Debug, Clone, Copy, PartialEq, Eq)]
285pub enum SpillBufferMode {
286    ShadowBuffer,
287    PersistenceJournal,
288}
289
290#[derive(Debug, Clone, Copy, PartialEq, Eq)]
291pub enum SpillCancellationPolicy {
292    Cooperative,
293    Strict,
294}
295
296#[derive(Debug, Clone, Copy, PartialEq, Eq)]
297pub enum SpillVisibility {
298    OnCommit,
299    StagedLayer,
300}