Skip to main content

formualizer_eval/engine/
plan.rs

1use crate::SheetId;
2use crate::engine::arena::{AstNodeData, AstNodeId, DataStore};
3use crate::engine::sheet_registry::SheetRegistry;
4use formualizer_common::Coord as AbsCoord;
5use formualizer_common::CoordBuildHasher;
6use formualizer_common::ExcelError;
7use formualizer_common::PackedSheetCell;
8use formualizer_parse::parser::{CollectPolicy, ReferenceType};
9use std::collections::HashMap;
10
11/// Compact range descriptor used during planning (engine-only)
12#[derive(Debug, Clone, PartialEq, Eq, Hash)]
13pub enum RangeKey {
14    Rect {
15        sheet: SheetId,
16        start: AbsCoord,
17        end: AbsCoord, // inclusive
18    },
19    WholeRow {
20        sheet: SheetId,
21        row: u32,
22    },
23    WholeCol {
24        sheet: SheetId,
25        col: u32,
26    },
27    /// Partially bounded rectangle; None means unbounded in that direction
28    OpenRect {
29        sheet: SheetId,
30        start: Option<AbsCoord>,
31        end: Option<AbsCoord>,
32    },
33}
34
35/// Bitflags conveying per-formula traits
36pub type FormulaFlags = u8;
37pub const F_VOLATILE: FormulaFlags = 0b0000_0001;
38pub const F_HAS_RANGES: FormulaFlags = 0b0000_0010;
39pub const F_HAS_NAMES: FormulaFlags = 0b0000_0100;
40pub const F_HAS_TABLES: FormulaFlags = 0b0001_0000;
41pub const F_LIKELY_ARRAY: FormulaFlags = 0b0000_1000;
42
43#[derive(Debug, Default, Clone)]
44pub struct DependencyPlan {
45    pub formula_targets: Vec<(SheetId, AbsCoord)>,
46    pub global_cells: Vec<(SheetId, AbsCoord)>,
47    pub vertex_pool: Vec<(SheetId, AbsCoord)>,
48    pub vertex_pool_packed: Vec<PackedSheetCell>,
49    pub formula_target_pool_indices: Vec<u32>,
50    pub global_cell_pool_indices: Vec<u32>,
51    pub per_formula_cells: Vec<Vec<u32>>, // indices into global_cells
52    pub per_formula_ranges: Vec<Vec<RangeKey>>,
53    pub per_formula_names: Vec<Vec<String>>,
54    pub per_formula_tables: Vec<Vec<String>>,
55    pub per_formula_flags: Vec<FormulaFlags>,
56    pub edges_flat: Option<Vec<u32>>, // optional flat adjacency (indices into global_cells)
57    pub offsets: Option<Vec<u32>>,    // len = num_formulas + 1 when edges_flat is Some
58}
59
60#[derive(Debug, Clone, Copy)]
61pub enum DependencyPlanAst<'a> {
62    Tree(&'a formualizer_parse::parser::ASTNode),
63    Arena(AstNodeId),
64}
65
66fn collect_references_arena(
67    data_store: &DataStore,
68    ast_id: AstNodeId,
69    sheet_reg: &SheetRegistry,
70    policy: &CollectPolicy,
71) -> Result<Vec<ReferenceType>, ExcelError> {
72    let mut out = Vec::new();
73    let mut stack = Vec::with_capacity(8);
74    stack.push(ast_id);
75
76    while let Some(node_id) = stack.pop() {
77        let Some(node) = data_store.get_node(node_id) else {
78            return Err(ExcelError::new(formualizer_common::ExcelErrorKind::Value)
79                .with_message("Missing interned formula AST"));
80        };
81
82        match node {
83            AstNodeData::Reference { ref_type, .. } => {
84                let reference = data_store.reconstruct_reference_type_for_eval(ref_type, sheet_reg);
85                match reference {
86                    ReferenceType::Range {
87                        sheet,
88                        start_row,
89                        start_col,
90                        end_row,
91                        end_col,
92                        start_row_abs,
93                        start_col_abs,
94                        end_row_abs,
95                        end_col_abs,
96                    } => {
97                        if policy.expand_small_ranges
98                            && let (Some(sr), Some(sc), Some(er), Some(ec)) =
99                                (start_row, start_col, end_row, end_col)
100                        {
101                            let rows = er.saturating_sub(sr) + 1;
102                            let cols = ec.saturating_sub(sc) + 1;
103                            let area = rows.saturating_mul(cols);
104                            if area as usize <= policy.range_expansion_limit {
105                                let row_abs = start_row_abs && end_row_abs;
106                                let col_abs = start_col_abs && end_col_abs;
107                                for r in sr..=er {
108                                    for c in sc..=ec {
109                                        out.push(ReferenceType::Cell {
110                                            sheet: sheet.clone(),
111                                            row: r,
112                                            col: c,
113                                            row_abs,
114                                            col_abs,
115                                        });
116                                    }
117                                }
118                                continue;
119                            }
120                        }
121                        out.push(ReferenceType::Range {
122                            sheet,
123                            start_row,
124                            start_col,
125                            end_row,
126                            end_col,
127                            start_row_abs,
128                            start_col_abs,
129                            end_row_abs,
130                            end_col_abs,
131                        });
132                    }
133                    ReferenceType::NamedRange(_) if !policy.include_names => {}
134                    other => out.push(other),
135                }
136            }
137            AstNodeData::UnaryOp { expr_id, .. } => stack.push(*expr_id),
138            AstNodeData::BinaryOp {
139                left_id, right_id, ..
140            } => {
141                stack.push(*right_id);
142                stack.push(*left_id);
143            }
144            AstNodeData::Function { .. } => {
145                if let Some(args) = data_store.get_args(node_id) {
146                    for arg in args.iter().rev() {
147                        stack.push(*arg);
148                    }
149                }
150            }
151            AstNodeData::Array { .. } => {
152                if let Some((_, _, elems)) = data_store.get_array_elems(node_id) {
153                    for elem in elems.iter().rev() {
154                        stack.push(*elem);
155                    }
156                }
157            }
158            AstNodeData::Literal(_) => {}
159        }
160    }
161
162    Ok(out)
163}
164
165/// Build a compact dependency plan from ASTs without mutating the graph.
166/// Sheets referenced by name are resolved/created through SheetRegistry at plan time.
167pub fn build_dependency_plan<'a, I>(
168    sheet_reg: &mut SheetRegistry,
169    formulas: I,
170    policy: &CollectPolicy,
171    volatile_flags: Option<&[bool]>,
172) -> Result<DependencyPlan, ExcelError>
173where
174    I: Iterator<Item = (&'a str, u32, u32, &'a formualizer_parse::parser::ASTNode)>,
175{
176    let mut plan = DependencyPlan::default();
177
178    // Global cell pool: packed absolute cell -> index.
179    //
180    // Uses CoordBuildHasher because FxHasher's weak avalanche collides badly
181    // on structured packed keys (PackedSheetCell reserves bits 50..64 and has
182    // narrow dynamic range on row-major workloads), turning this O(N) loop
183    // into O(N^2). See formualizer_common::coord_hash.
184    let mut cell_index: HashMap<PackedSheetCell, u32, CoordBuildHasher> =
185        HashMap::with_hasher(CoordBuildHasher);
186    // Unified vertex pool for loader-specialized ensure.
187    let mut vertex_pool_index: HashMap<PackedSheetCell, u32, CoordBuildHasher> =
188        HashMap::with_hasher(CoordBuildHasher);
189
190    let mut ensure_vertex_pool_index =
191        |plan: &mut DependencyPlan, key: (SheetId, AbsCoord)| -> u32 {
192            let packed = PackedSheetCell::try_new(key.0, key.1.row(), key.1.col())
193                .expect("plan vertex pool coordinate must fit PackedSheetCell");
194            match vertex_pool_index.get(&packed) {
195                Some(&idx) => idx,
196                None => {
197                    let new_idx = plan.vertex_pool.len() as u32;
198                    plan.vertex_pool.push(key);
199                    plan.vertex_pool_packed.push(packed);
200                    vertex_pool_index.insert(packed, new_idx);
201                    new_idx
202                }
203            }
204        };
205
206    for (i, (sheet_name, row, col, ast)) in formulas.enumerate() {
207        let sheet_id = sheet_reg.id_for(sheet_name);
208        let target = (sheet_id, AbsCoord::from_excel(row, col));
209        plan.formula_targets.push(target);
210        let target_pool_idx = ensure_vertex_pool_index(&mut plan, target);
211        plan.formula_target_pool_indices.push(target_pool_idx);
212
213        let mut flags: FormulaFlags = 0;
214        if let Some(v) = volatile_flags.and_then(|v| v.get(i)).copied()
215            && v
216        {
217            flags |= F_VOLATILE;
218        }
219
220        let mut per_cells: Vec<u32> = Vec::new();
221        let mut per_ranges: Vec<RangeKey> = Vec::new();
222        let mut per_names: Vec<String> = Vec::new();
223        let mut per_tables: Vec<String> = Vec::new();
224
225        // Collect references using core collector (may expand small ranges per policy)
226        let refs = ast.collect_references(policy);
227        for r in refs {
228            match r {
229                ReferenceType::Cell {
230                    sheet, row, col, ..
231                } => {
232                    let dep_sheet = sheet
233                        .as_deref()
234                        .map(|name| sheet_reg.id_for(name))
235                        .unwrap_or(sheet_id);
236                    let key = (dep_sheet, AbsCoord::from_excel(row, col));
237                    let packed = PackedSheetCell::try_new(dep_sheet, key.1.row(), key.1.col())
238                        .expect("plan dependency coordinate must fit PackedSheetCell");
239                    let idx = match cell_index.get(&packed) {
240                        Some(&idx) => idx,
241                        None => {
242                            let new_idx = plan.global_cells.len() as u32;
243                            plan.global_cells.push(key);
244                            cell_index.insert(packed, new_idx);
245                            let pool_idx = ensure_vertex_pool_index(&mut plan, key);
246                            plan.global_cell_pool_indices.push(pool_idx);
247                            new_idx
248                        }
249                    };
250                    per_cells.push(idx);
251                }
252                ReferenceType::Range {
253                    sheet,
254                    start_row,
255                    start_col,
256                    end_row,
257                    end_col,
258                    ..
259                } => {
260                    let dep_sheet = sheet
261                        .as_deref()
262                        .map(|name| sheet_reg.id_for(name))
263                        .unwrap_or(sheet_id);
264                    match (start_row, start_col, end_row, end_col) {
265                        (Some(sr), Some(sc), Some(er), Some(ec)) => {
266                            per_ranges.push(RangeKey::Rect {
267                                sheet: dep_sheet,
268                                start: AbsCoord::from_excel(sr, sc),
269                                end: AbsCoord::from_excel(er, ec),
270                            })
271                        }
272                        (None, Some(c), None, Some(ec)) if c == ec => {
273                            per_ranges.push(RangeKey::WholeCol {
274                                sheet: dep_sheet,
275                                col: c,
276                            })
277                        }
278                        (Some(r), None, Some(er), None) if r == er => {
279                            per_ranges.push(RangeKey::WholeRow {
280                                sheet: dep_sheet,
281                                row: r,
282                            })
283                        }
284                        _ => per_ranges.push(RangeKey::OpenRect {
285                            sheet: dep_sheet,
286                            start: start_row
287                                .zip(start_col)
288                                .map(|(r, c)| AbsCoord::from_excel(r, c)),
289                            end: end_row
290                                .zip(end_col)
291                                .map(|(r, c)| AbsCoord::from_excel(r, c)),
292                        }),
293                    }
294                }
295                ReferenceType::External(ext) => match ext.kind {
296                    formualizer_parse::parser::ExternalRefKind::Cell { .. } => {
297                        flags |= F_HAS_NAMES;
298                        per_names.push(ext.raw.clone());
299                    }
300                    formualizer_parse::parser::ExternalRefKind::Range { .. } => {
301                        flags |= F_HAS_TABLES;
302                        per_tables.push(ext.raw.clone());
303                    }
304                },
305                ReferenceType::NamedRange(name) => {
306                    // Resolution handled later; mark via flags if caller cares
307                    flags |= F_HAS_NAMES;
308                    per_names.push(name);
309                }
310                ReferenceType::Table(tref) => {
311                    flags |= F_HAS_TABLES;
312                    per_tables.push(tref.name);
313                }
314                // 3D refs are parsed but not yet planned. They neither create
315                // dependencies nor participate in the cell/range plan; the
316                // evaluator will surface #N/IMPL! when one is encountered.
317                ReferenceType::Cell3D { .. } | ReferenceType::Range3D { .. } => {}
318            }
319        }
320
321        plan.per_formula_cells.push(per_cells);
322        plan.per_formula_ranges.push(per_ranges);
323        plan.per_formula_names.push(per_names);
324        plan.per_formula_tables.push(per_tables);
325        plan.per_formula_flags.push(flags);
326    }
327
328    Ok(plan)
329}
330
331/// Build a compact dependency plan from a mix of tree and arena ASTs.
332pub fn build_dependency_plan_mixed<'a, I>(
333    sheet_reg: &mut SheetRegistry,
334    data_store: &DataStore,
335    formulas: I,
336    policy: &CollectPolicy,
337    volatile_flags: Option<&[bool]>,
338) -> Result<DependencyPlan, ExcelError>
339where
340    I: Iterator<Item = (&'a str, u32, u32, DependencyPlanAst<'a>)>,
341{
342    let mut plan = DependencyPlan::default();
343
344    let mut cell_index: HashMap<PackedSheetCell, u32, CoordBuildHasher> =
345        HashMap::with_hasher(CoordBuildHasher);
346    let mut vertex_pool_index: HashMap<PackedSheetCell, u32, CoordBuildHasher> =
347        HashMap::with_hasher(CoordBuildHasher);
348
349    let mut ensure_vertex_pool_index =
350        |plan: &mut DependencyPlan, key: (SheetId, AbsCoord)| -> u32 {
351            let packed = PackedSheetCell::try_new(key.0, key.1.row(), key.1.col())
352                .expect("plan vertex pool coordinate must fit PackedSheetCell");
353            match vertex_pool_index.get(&packed) {
354                Some(&idx) => idx,
355                None => {
356                    let new_idx = plan.vertex_pool.len() as u32;
357                    plan.vertex_pool.push(key);
358                    plan.vertex_pool_packed.push(packed);
359                    vertex_pool_index.insert(packed, new_idx);
360                    new_idx
361                }
362            }
363        };
364
365    for (i, (sheet_name, row, col, ast)) in formulas.enumerate() {
366        let sheet_id = sheet_reg.id_for(sheet_name);
367        let target = (sheet_id, AbsCoord::from_excel(row, col));
368        plan.formula_targets.push(target);
369        let target_pool_idx = ensure_vertex_pool_index(&mut plan, target);
370        plan.formula_target_pool_indices.push(target_pool_idx);
371
372        let mut flags: FormulaFlags = 0;
373        if let Some(v) = volatile_flags.and_then(|v| v.get(i)).copied()
374            && v
375        {
376            flags |= F_VOLATILE;
377        }
378
379        let mut per_cells: Vec<u32> = Vec::new();
380        let mut per_ranges: Vec<RangeKey> = Vec::new();
381        let mut per_names: Vec<String> = Vec::new();
382        let mut per_tables: Vec<String> = Vec::new();
383
384        let refs = match ast {
385            DependencyPlanAst::Tree(ast) => ast.collect_references(policy).into_iter().collect(),
386            DependencyPlanAst::Arena(ast_id) => {
387                collect_references_arena(data_store, ast_id, sheet_reg, policy)?
388            }
389        };
390        for r in refs {
391            match r {
392                ReferenceType::Cell {
393                    sheet, row, col, ..
394                } => {
395                    let dep_sheet = sheet
396                        .as_deref()
397                        .map(|name| sheet_reg.id_for(name))
398                        .unwrap_or(sheet_id);
399                    let key = (dep_sheet, AbsCoord::from_excel(row, col));
400                    let packed = PackedSheetCell::try_new(dep_sheet, key.1.row(), key.1.col())
401                        .expect("plan dependency coordinate must fit PackedSheetCell");
402                    let idx = match cell_index.get(&packed) {
403                        Some(&idx) => idx,
404                        None => {
405                            let new_idx = plan.global_cells.len() as u32;
406                            plan.global_cells.push(key);
407                            cell_index.insert(packed, new_idx);
408                            let pool_idx = ensure_vertex_pool_index(&mut plan, key);
409                            plan.global_cell_pool_indices.push(pool_idx);
410                            new_idx
411                        }
412                    };
413                    per_cells.push(idx);
414                }
415                ReferenceType::Range {
416                    sheet,
417                    start_row,
418                    start_col,
419                    end_row,
420                    end_col,
421                    ..
422                } => {
423                    let dep_sheet = sheet
424                        .as_deref()
425                        .map(|name| sheet_reg.id_for(name))
426                        .unwrap_or(sheet_id);
427                    match (start_row, start_col, end_row, end_col) {
428                        (Some(sr), Some(sc), Some(er), Some(ec)) => {
429                            per_ranges.push(RangeKey::Rect {
430                                sheet: dep_sheet,
431                                start: AbsCoord::from_excel(sr, sc),
432                                end: AbsCoord::from_excel(er, ec),
433                            })
434                        }
435                        (None, Some(c), None, Some(ec)) if c == ec => {
436                            per_ranges.push(RangeKey::WholeCol {
437                                sheet: dep_sheet,
438                                col: c,
439                            })
440                        }
441                        (Some(r), None, Some(er), None) if r == er => {
442                            per_ranges.push(RangeKey::WholeRow {
443                                sheet: dep_sheet,
444                                row: r,
445                            })
446                        }
447                        _ => per_ranges.push(RangeKey::OpenRect {
448                            sheet: dep_sheet,
449                            start: start_row
450                                .zip(start_col)
451                                .map(|(r, c)| AbsCoord::from_excel(r, c)),
452                            end: end_row
453                                .zip(end_col)
454                                .map(|(r, c)| AbsCoord::from_excel(r, c)),
455                        }),
456                    }
457                }
458                ReferenceType::External(ext) => match ext.kind {
459                    formualizer_parse::parser::ExternalRefKind::Cell { .. } => {
460                        flags |= F_HAS_NAMES;
461                        per_names.push(ext.raw.clone());
462                    }
463                    formualizer_parse::parser::ExternalRefKind::Range { .. } => {
464                        flags |= F_HAS_TABLES;
465                        per_tables.push(ext.raw.clone());
466                    }
467                },
468                ReferenceType::NamedRange(name) => {
469                    flags |= F_HAS_NAMES;
470                    per_names.push(name);
471                }
472                ReferenceType::Table(tref) => {
473                    flags |= F_HAS_TABLES;
474                    per_tables.push(tref.name);
475                }
476                ReferenceType::Cell3D { .. } | ReferenceType::Range3D { .. } => {}
477            }
478        }
479
480        plan.per_formula_cells.push(per_cells);
481        plan.per_formula_ranges.push(per_ranges);
482        plan.per_formula_names.push(per_names);
483        plan.per_formula_tables.push(per_tables);
484        plan.per_formula_flags.push(flags);
485    }
486
487    Ok(plan)
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493    use crate::engine::arena::DataStore;
494    use crate::engine::sheet_registry::SheetRegistry;
495    use formualizer_parse::parse;
496
497    #[test]
498    fn mixed_arena_plan_matches_tree_plan_for_basic_refs() {
499        let asts = [
500            parse("=A1+SUM(B2:C3)+NamedThing").unwrap(),
501            parse("=Sheet2!D4+Table1[#Data]").unwrap(),
502        ];
503        let policy = CollectPolicy {
504            expand_small_ranges: true,
505            range_expansion_limit: 16,
506            include_names: true,
507        };
508
509        let mut tree_reg = SheetRegistry::new();
510        let tree_plan = build_dependency_plan(
511            &mut tree_reg,
512            asts.iter()
513                .enumerate()
514                .map(|(i, ast)| ("Sheet1", (i + 1) as u32, 5, ast)),
515            &policy,
516            Some(&[false, true]),
517        )
518        .unwrap();
519
520        let mut arena_reg = SheetRegistry::new();
521        arena_reg.id_for("Sheet1");
522        let mut store = DataStore::new();
523        let ids: Vec<_> = asts
524            .iter()
525            .map(|ast| store.store_ast(ast, &arena_reg))
526            .collect();
527        let arena_plan = build_dependency_plan_mixed(
528            &mut arena_reg,
529            &store,
530            ids.iter()
531                .enumerate()
532                .map(|(i, id)| ("Sheet1", (i + 1) as u32, 5, DependencyPlanAst::Arena(*id))),
533            &policy,
534            Some(&[false, true]),
535        )
536        .unwrap();
537
538        assert_eq!(arena_plan.formula_targets, tree_plan.formula_targets);
539        assert_eq!(arena_plan.global_cells, tree_plan.global_cells);
540        assert_eq!(arena_plan.vertex_pool, tree_plan.vertex_pool);
541        assert_eq!(
542            arena_plan.formula_target_pool_indices,
543            tree_plan.formula_target_pool_indices
544        );
545        assert_eq!(
546            arena_plan.global_cell_pool_indices,
547            tree_plan.global_cell_pool_indices
548        );
549        assert_eq!(arena_plan.per_formula_cells, tree_plan.per_formula_cells);
550        assert_eq!(arena_plan.per_formula_ranges, tree_plan.per_formula_ranges);
551        assert_eq!(arena_plan.per_formula_names, tree_plan.per_formula_names);
552        assert_eq!(arena_plan.per_formula_tables, tree_plan.per_formula_tables);
553        assert_eq!(arena_plan.per_formula_flags, tree_plan.per_formula_flags);
554    }
555}