formualizer_workbook/
loader.rs

1use crate::error::{IoError, col_to_a1};
2use crate::traits::{LoadStrategy, NamedRange, NamedRangeScope, SpreadsheetReader};
3use formualizer_eval::engine::Engine;
4use formualizer_eval::traits::EvaluationContext;
5use formualizer_eval::{reference::CellRef, reference::Coord};
6use formualizer_parse::parser;
7use rustc_hash::FxHashSet;
8use std::time::Instant;
9
10#[derive(Debug, Default)]
11pub struct LoaderStats {
12    pub cells_loaded: usize,
13    pub formulas_loaded: usize,
14    pub sheets_loaded: usize,
15    pub load_time_ms: u64,
16    // New granular metrics (aggregated across all sheets loaded in this session)
17    pub backend_read_time_ms: u64,
18    pub engine_insert_time_ms: u64,
19    pub vertex_alloc_time_ms: u64,
20    pub sheet_index_time_ms: u64,
21    pub edges_rebuild_time_ms: u64,
22}
23
24pub struct WorkbookLoader<B: SpreadsheetReader> {
25    backend: B,
26    strategy: LoadStrategy,
27    stats: LoaderStats,
28    pending_named_ranges: Vec<NamedRange>,
29    seen_named_ranges: FxHashSet<(NamedRangeScope, String, String)>,
30}
31
32impl<B: SpreadsheetReader> WorkbookLoader<B> {
33    pub fn new(backend: B, strategy: LoadStrategy) -> Self {
34        Self {
35            backend,
36            strategy,
37            stats: LoaderStats::default(),
38            pending_named_ranges: Vec::new(),
39            seen_named_ranges: FxHashSet::default(),
40        }
41    }
42
43    pub fn stats(&self) -> &LoaderStats {
44        &self.stats
45    }
46
47    pub fn load_into_engine<R>(&mut self, engine: &mut Engine<R>) -> Result<(), IoError>
48    where
49        R: EvaluationContext,
50    {
51        let start = Instant::now();
52
53        // Use batch API for performance
54        engine.begin_batch();
55
56        let result = match self.strategy {
57            LoadStrategy::EagerAll => {
58                // Load all sheets
59                for sheet in self
60                    .backend
61                    .sheet_names()
62                    .map_err(|e| IoError::from_backend("backend", e))?
63                {
64                    self.load_sheet_into_graph(&sheet, engine)?;
65                }
66                Ok(())
67            }
68            LoadStrategy::EagerSheet => {
69                // Load default or first sheet
70                let sheets = self
71                    .backend
72                    .sheet_names()
73                    .map_err(|e| IoError::from_backend("backend", e))?;
74                if let Some(sheet) = sheets.first() {
75                    self.load_sheet_into_graph(sheet, engine)
76                } else {
77                    Ok(())
78                }
79            }
80            LoadStrategy::LazyCell | LoadStrategy::LazyRange { .. } => {
81                // For v1, treat as EagerSheet since engine doesn't support on-demand
82                eprintln!("Warning: LazyCell/LazyRange not yet supported, using EagerSheet");
83                let sheets = self
84                    .backend
85                    .sheet_names()
86                    .map_err(|e| IoError::from_backend("backend", e))?;
87                if let Some(sheet) = sheets.first() {
88                    self.load_sheet_into_graph(sheet, engine)
89                } else {
90                    Ok(())
91                }
92            }
93            LoadStrategy::WriteOnly => Ok(()),
94        };
95
96        engine.end_batch();
97
98        if result.is_ok() {
99            self.register_named_ranges(engine)?;
100        }
101
102        let elapsed_ms = start.elapsed().as_millis() as u64;
103        self.stats.load_time_ms = if elapsed_ms == 0 { 1 } else { elapsed_ms };
104        result
105    }
106
107    fn load_sheet_into_graph<R>(
108        &mut self,
109        sheet: &str,
110        engine: &mut Engine<R>,
111    ) -> Result<(), IoError>
112    where
113        R: EvaluationContext,
114    {
115        use std::time::Instant as _Instant;
116        let t_read_start = _Instant::now();
117        let sheet_data = self
118            .backend
119            .read_sheet(sheet)
120            .map_err(|e| IoError::from_backend("backend", e))?;
121        let read_elapsed = t_read_start.elapsed();
122        self.stats.backend_read_time_ms += read_elapsed.as_millis() as u64;
123
124        self.stats.cells_loaded += sheet_data.cells.len();
125
126        let t_insert_start = _Instant::now();
127        // Process cells in batches for better performance
128        let has_any_formula = sheet_data
129            .cells
130            .iter()
131            .any(|(_, c)| c.formula.as_ref().map(|s| !s.is_empty()).unwrap_or(false));
132
133        if !has_any_formula {
134            // Fast path: reserve and bulk insert values
135            engine.begin_batch();
136            let to_insert = sheet_data
137                .cells
138                .into_iter()
139                .filter_map(|((r, c), cell)| cell.value.map(|v| (r, c, v)));
140            engine.graph.bulk_insert_values(sheet, to_insert);
141            engine.end_batch();
142        } else {
143            for ((row, col), cell) in sheet_data.cells {
144                // Value first (if present) using normal API (ensures snapshot / consistency)
145                if let Some(value) = cell.value {
146                    engine
147                        .set_cell_value(sheet, row, col, value)
148                        .map_err(IoError::Engine)?;
149                }
150                if let Some(formula_str) = cell.formula {
151                    if formula_str.is_empty() {
152                        continue;
153                    }
154                    if engine.config.defer_graph_building {
155                        engine.stage_formula_text(sheet, row, col, formula_str);
156                        self.stats.formulas_loaded += 1;
157                    } else {
158                        let ast =
159                            parser::parse(&formula_str).map_err(|e| IoError::FormulaParser {
160                                sheet: sheet.to_string(),
161                                row,
162                                col: col_to_a1(col),
163                                message: e.to_string(),
164                            })?;
165                        engine
166                            .set_cell_formula(sheet, row, col, ast)
167                            .map_err(IoError::Engine)?;
168                        self.stats.formulas_loaded += 1;
169                    }
170                }
171            }
172        }
173
174        self.stats.sheets_loaded += 1;
175        let insert_elapsed = t_insert_start.elapsed();
176        self.stats.engine_insert_time_ms += insert_elapsed.as_millis() as u64;
177
178        self.collect_named_ranges(sheet, &sheet_data.named_ranges);
179        Ok(())
180    }
181
182    fn collect_named_ranges(&mut self, sheet: &str, ranges: &[NamedRange]) {
183        for named in ranges {
184            if named.address.sheet != sheet {
185                // Defer to the sheet the range references to avoid duplicating entries.
186                continue;
187            }
188            let key = (
189                named.scope.clone(),
190                named.address.sheet.clone(),
191                named.name.clone(),
192            );
193            if !self.seen_named_ranges.insert(key) {
194                continue;
195            }
196            self.pending_named_ranges.push(named.clone());
197        }
198    }
199
200    fn register_named_ranges<R>(&mut self, engine: &mut Engine<R>) -> Result<(), IoError>
201    where
202        R: EvaluationContext,
203    {
204        for named in self.pending_named_ranges.drain(..) {
205            let addr = &named.address;
206            let sheet_id = match engine.graph.sheet_id(&addr.sheet) {
207                Some(id) => id,
208                None => {
209                    #[cfg(feature = "tracing")]
210                    tracing::warn!(
211                        name = %named.name,
212                        sheet = %addr.sheet,
213                        "named range references sheet that was not loaded; skipping"
214                    );
215                    continue;
216                }
217            };
218
219            let sr0 = addr.start_row.saturating_sub(1);
220            let sc0 = addr.start_col.saturating_sub(1);
221            let er0 = addr.end_row.saturating_sub(1);
222            let ec0 = addr.end_col.saturating_sub(1);
223
224            let start_coord = Coord::new(sr0, sc0, true, true);
225            let end_coord = Coord::new(er0, ec0, true, true);
226            let start_ref = CellRef::new(sheet_id, start_coord);
227            let end_ref = CellRef::new(sheet_id, end_coord);
228
229            let definition = if sr0 == er0 && sc0 == ec0 {
230                formualizer_eval::engine::named_range::NamedDefinition::Cell(start_ref)
231            } else {
232                let range_ref = formualizer_eval::reference::RangeRef::new(start_ref, end_ref);
233                formualizer_eval::engine::named_range::NamedDefinition::Range(range_ref)
234            };
235
236            let scope = match named.scope {
237                NamedRangeScope::Workbook => {
238                    formualizer_eval::engine::named_range::NameScope::Workbook
239                }
240                NamedRangeScope::Sheet => {
241                    formualizer_eval::engine::named_range::NameScope::Sheet(sheet_id)
242                }
243            };
244
245            engine
246                .graph
247                .define_name(&named.name, definition, scope)
248                .map_err(IoError::Engine)?;
249        }
250
251        self.seen_named_ranges.clear();
252        Ok(())
253    }
254}