Skip to main content

formualizer_eval/engine/graph/
formula_analysis.rs

1use super::*;
2use formualizer_parse::parser::{ASTNode, ASTNodeType, ReferenceType};
3
4// Type alias for complex return types (local to analysis).
5type ExtractDependenciesResult = Result<
6    (
7        Vec<VertexId>,
8        Vec<SharedRangeRef<'static>>,
9        Vec<CellRef>,
10        Vec<VertexId>,
11    ),
12    ExcelError,
13>;
14
15type ExtractDependenciesWithPendingNamesResult = Result<
16    (
17        Vec<VertexId>,
18        Vec<SharedRangeRef<'static>>,
19        Vec<CellRef>,
20        Vec<VertexId>,
21        Vec<String>,
22    ),
23    ExcelError,
24>;
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27enum UnresolvedNamePolicy {
28    Error,
29    Collect,
30}
31
32impl DependencyGraph {
33    // Helper methods for formula analysis / dependency extraction.
34
35    pub(super) fn extract_dependencies(
36        &mut self,
37        ast: &ASTNode,
38        current_sheet_id: SheetId,
39    ) -> ExtractDependenciesResult {
40        let (dependencies, ranges, placeholders, named_dependencies, _pending_names) =
41            self.extract_dependencies_inner(ast, current_sheet_id, UnresolvedNamePolicy::Error)?;
42        Ok((dependencies, ranges, placeholders, named_dependencies))
43    }
44
45    pub(super) fn extract_dependencies_with_pending_names(
46        &mut self,
47        ast: &ASTNode,
48        current_sheet_id: SheetId,
49    ) -> ExtractDependenciesWithPendingNamesResult {
50        self.extract_dependencies_inner(ast, current_sheet_id, UnresolvedNamePolicy::Collect)
51    }
52
53    fn extract_dependencies_inner(
54        &mut self,
55        ast: &ASTNode,
56        current_sheet_id: SheetId,
57        unresolved_name_policy: UnresolvedNamePolicy,
58    ) -> ExtractDependenciesWithPendingNamesResult {
59        let mut dependencies = FxHashSet::default();
60        let mut range_dependencies: Vec<SharedRangeRef<'static>> = Vec::new();
61        let mut created_placeholders = Vec::new();
62        let mut named_dependencies = Vec::new();
63        let mut unresolved_names = FxHashSet::default();
64        let mut local_scopes: Vec<FxHashSet<String>> = Vec::new();
65        self.extract_dependencies_recursive(
66            ast,
67            current_sheet_id,
68            &mut dependencies,
69            &mut range_dependencies,
70            &mut created_placeholders,
71            &mut named_dependencies,
72            &mut unresolved_names,
73            &mut local_scopes,
74            unresolved_name_policy,
75        )?;
76
77        // Deduplicate range references.
78        let mut deduped_ranges = Vec::new();
79        for range_ref in range_dependencies {
80            if !deduped_ranges.contains(&range_ref) {
81                deduped_ranges.push(range_ref);
82            }
83        }
84
85        named_dependencies.sort_unstable_by_key(|v| v.0);
86        named_dependencies.dedup_by_key(|v| v.0);
87
88        let mut unresolved_names: Vec<String> = unresolved_names.into_iter().collect();
89        unresolved_names.sort();
90
91        Ok((
92            dependencies.into_iter().collect(),
93            deduped_ranges,
94            created_placeholders,
95            named_dependencies,
96            unresolved_names,
97        ))
98    }
99
100    fn extract_dependencies_recursive(
101        &mut self,
102        ast: &ASTNode,
103        current_sheet_id: SheetId,
104        dependencies: &mut FxHashSet<VertexId>,
105        range_dependencies: &mut Vec<SharedRangeRef<'static>>,
106        created_placeholders: &mut Vec<CellRef>,
107        named_dependencies: &mut Vec<VertexId>,
108        unresolved_names: &mut FxHashSet<String>,
109        local_scopes: &mut Vec<FxHashSet<String>>,
110        unresolved_name_policy: UnresolvedNamePolicy,
111    ) -> Result<(), ExcelError> {
112        match &ast.node_type {
113            ASTNodeType::Reference { reference, .. } => match reference {
114                ReferenceType::External(ext) => match ext.kind {
115                    formualizer_parse::parser::ExternalRefKind::Cell { .. } => {
116                        let name = ext.raw.as_str();
117                        if let Some(source) = self.resolve_source_scalar_entry(name) {
118                            dependencies.insert(source.vertex);
119                        } else {
120                            return Err(ExcelError::new(ExcelErrorKind::Name)
121                                .with_message(format!("Undefined name: {name}")));
122                        }
123                    }
124                    formualizer_parse::parser::ExternalRefKind::Range { .. } => {
125                        let name = ext.raw.as_str();
126                        if let Some(source) = self.resolve_source_table_entry(name) {
127                            dependencies.insert(source.vertex);
128                        } else {
129                            return Err(ExcelError::new(ExcelErrorKind::Name)
130                                .with_message(format!("Undefined table: {name}")));
131                        }
132                    }
133                },
134                ReferenceType::Cell { .. } => {
135                    let vertex_id = self.get_or_create_vertex_for_reference(
136                        reference,
137                        current_sheet_id,
138                        created_placeholders,
139                    )?;
140                    dependencies.insert(vertex_id);
141                }
142                ReferenceType::Range {
143                    sheet,
144                    start_row,
145                    start_col,
146                    end_row,
147                    end_col,
148                    ..
149                } => {
150                    // If any bound is missing (infinite/partial range), always keep compressed.
151                    let has_unbounded = start_row.is_none()
152                        || end_row.is_none()
153                        || start_col.is_none()
154                        || end_col.is_none();
155                    if has_unbounded {
156                        if let Some(SharedRef::Range(range)) = reference.to_sheet_ref_lossy() {
157                            let owned = range.into_owned();
158                            let sheet_id = match owned.sheet {
159                                SharedSheetLocator::Id(id) => id,
160                                SharedSheetLocator::Current => current_sheet_id,
161                                SharedSheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
162                            };
163                            range_dependencies.push(SharedRangeRef {
164                                sheet: SharedSheetLocator::Id(sheet_id),
165                                start_row: owned.start_row,
166                                start_col: owned.start_col,
167                                end_row: owned.end_row,
168                                end_col: owned.end_col,
169                            });
170                        }
171                    } else {
172                        let sr = start_row.unwrap();
173                        let sc = start_col.unwrap();
174                        let er = end_row.unwrap();
175                        let ec = end_col.unwrap();
176
177                        if sr > er || sc > ec {
178                            return Err(ExcelError::new(ExcelErrorKind::Ref));
179                        }
180
181                        let height = er.saturating_sub(sr) + 1;
182                        let width = ec.saturating_sub(sc) + 1;
183                        let size = (width * height) as usize;
184
185                        if size <= self.config.range_expansion_limit {
186                            // Expand to individual cells.
187                            let sheet_id = match sheet {
188                                Some(name) => self.resolve_existing_sheet_id(name)?,
189                                None => current_sheet_id,
190                            };
191                            for row in sr..=er {
192                                for col in sc..=ec {
193                                    let coord = Coord::from_excel(row, col, true, true);
194                                    let addr = CellRef::new(sheet_id, coord);
195                                    let vertex_id =
196                                        self.get_or_create_vertex(&addr, created_placeholders);
197                                    dependencies.insert(vertex_id);
198                                }
199                            }
200                        } else {
201                            // Keep as a compressed range dependency.
202                            if let Some(SharedRef::Range(range)) = reference.to_sheet_ref_lossy() {
203                                let owned = range.into_owned();
204                                let sheet_id = match owned.sheet {
205                                    SharedSheetLocator::Id(id) => id,
206                                    SharedSheetLocator::Current => current_sheet_id,
207                                    SharedSheetLocator::Name(name) => {
208                                        self.sheet_id_mut(name.as_ref())
209                                    }
210                                };
211                                range_dependencies.push(SharedRangeRef {
212                                    sheet: SharedSheetLocator::Id(sheet_id),
213                                    start_row: owned.start_row,
214                                    start_col: owned.start_col,
215                                    end_row: owned.end_row,
216                                    end_col: owned.end_col,
217                                });
218                            }
219                        }
220                    }
221                }
222                ReferenceType::NamedRange(name) => {
223                    let key = name.to_ascii_uppercase();
224                    if local_scopes.iter().rev().any(|scope| scope.contains(&key)) {
225                        return Ok(());
226                    }
227
228                    if let Some(named_range) = self.resolve_name_entry(name, current_sheet_id) {
229                        dependencies.insert(named_range.vertex);
230                        named_dependencies.push(named_range.vertex);
231                    } else if let Some(source) = self.resolve_source_scalar_entry(name) {
232                        dependencies.insert(source.vertex);
233                    } else {
234                        match unresolved_name_policy {
235                            UnresolvedNamePolicy::Error => {
236                                return Err(ExcelError::new(ExcelErrorKind::Name)
237                                    .with_message(format!("Undefined name: {name}")));
238                            }
239                            UnresolvedNamePolicy::Collect => {
240                                unresolved_names.insert(name.to_string());
241                            }
242                        }
243                    }
244                }
245                ReferenceType::Table(tref) => {
246                    if let Some(table) = self.resolve_table_entry(&tref.name) {
247                        dependencies.insert(table.vertex);
248                    } else if let Some(source) = self.resolve_source_table_entry(&tref.name) {
249                        dependencies.insert(source.vertex);
250                    } else {
251                        return Err(ExcelError::new(ExcelErrorKind::Name)
252                            .with_message(format!("Undefined table: {}", tref.name)));
253                    }
254                }
255                // 3D references parse correctly but aren't yet wired through
256                // the dependency graph; treat them as no-op dependencies for
257                // now so formulas containing them still load. Evaluation will
258                // surface #N/IMPL! via the Resolver path.
259                ReferenceType::Cell3D { .. } | ReferenceType::Range3D { .. } => {}
260            },
261            ASTNodeType::BinaryOp { left, right, .. } => {
262                self.extract_dependencies_recursive(
263                    left,
264                    current_sheet_id,
265                    dependencies,
266                    range_dependencies,
267                    created_placeholders,
268                    named_dependencies,
269                    unresolved_names,
270                    local_scopes,
271                    unresolved_name_policy,
272                )?;
273                self.extract_dependencies_recursive(
274                    right,
275                    current_sheet_id,
276                    dependencies,
277                    range_dependencies,
278                    created_placeholders,
279                    named_dependencies,
280                    unresolved_names,
281                    local_scopes,
282                    unresolved_name_policy,
283                )?;
284            }
285            ASTNodeType::UnaryOp { expr, .. } => {
286                self.extract_dependencies_recursive(
287                    expr,
288                    current_sheet_id,
289                    dependencies,
290                    range_dependencies,
291                    created_placeholders,
292                    named_dependencies,
293                    unresolved_names,
294                    local_scopes,
295                    unresolved_name_policy,
296                )?;
297            }
298            ASTNodeType::Function { name, args } => {
299                if name.eq_ignore_ascii_case("LET") {
300                    if args.len() >= 3 && args.len() % 2 == 1 {
301                        local_scopes.push(FxHashSet::default());
302
303                        for pair_idx in (0..args.len() - 1).step_by(2) {
304                            self.extract_dependencies_recursive(
305                                &args[pair_idx + 1],
306                                current_sheet_id,
307                                dependencies,
308                                range_dependencies,
309                                created_placeholders,
310                                named_dependencies,
311                                unresolved_names,
312                                local_scopes,
313                                unresolved_name_policy,
314                            )?;
315
316                            if let ASTNodeType::Reference {
317                                reference: ReferenceType::NamedRange(local_name),
318                                ..
319                            } = &args[pair_idx].node_type
320                                && let Some(scope) = local_scopes.last_mut()
321                            {
322                                scope.insert(local_name.to_ascii_uppercase());
323                            }
324                        }
325
326                        self.extract_dependencies_recursive(
327                            &args[args.len() - 1],
328                            current_sheet_id,
329                            dependencies,
330                            range_dependencies,
331                            created_placeholders,
332                            named_dependencies,
333                            unresolved_names,
334                            local_scopes,
335                            unresolved_name_policy,
336                        )?;
337
338                        local_scopes.pop();
339                    } else {
340                        for arg in args {
341                            self.extract_dependencies_recursive(
342                                arg,
343                                current_sheet_id,
344                                dependencies,
345                                range_dependencies,
346                                created_placeholders,
347                                named_dependencies,
348                                unresolved_names,
349                                local_scopes,
350                                unresolved_name_policy,
351                            )?;
352                        }
353                    }
354                } else if name.eq_ignore_ascii_case("LAMBDA") {
355                    if let Some(body) = args.last() {
356                        let mut lambda_scope = FxHashSet::default();
357                        for param in &args[..args.len().saturating_sub(1)] {
358                            if let ASTNodeType::Reference {
359                                reference: ReferenceType::NamedRange(param_name),
360                                ..
361                            } = &param.node_type
362                            {
363                                lambda_scope.insert(param_name.to_ascii_uppercase());
364                            }
365                        }
366                        local_scopes.push(lambda_scope);
367                        self.extract_dependencies_recursive(
368                            body,
369                            current_sheet_id,
370                            dependencies,
371                            range_dependencies,
372                            created_placeholders,
373                            named_dependencies,
374                            unresolved_names,
375                            local_scopes,
376                            unresolved_name_policy,
377                        )?;
378                        local_scopes.pop();
379                    }
380                } else {
381                    for arg in args {
382                        self.extract_dependencies_recursive(
383                            arg,
384                            current_sheet_id,
385                            dependencies,
386                            range_dependencies,
387                            created_placeholders,
388                            named_dependencies,
389                            unresolved_names,
390                            local_scopes,
391                            unresolved_name_policy,
392                        )?;
393                    }
394                }
395            }
396            ASTNodeType::Array(rows) => {
397                for row in rows {
398                    for cell in row {
399                        self.extract_dependencies_recursive(
400                            cell,
401                            current_sheet_id,
402                            dependencies,
403                            range_dependencies,
404                            created_placeholders,
405                            named_dependencies,
406                            unresolved_names,
407                            local_scopes,
408                            unresolved_name_policy,
409                        )?;
410                    }
411                }
412            }
413            ASTNodeType::Call { callee, args } => {
414                // Walk both the callee and the call arguments so any references
415                // they contain are tracked. Full evaluator semantics for
416                // immediate-invocation calls are not yet implemented, but
417                // dependency collection must still cover them.
418                self.extract_dependencies_recursive(
419                    callee,
420                    current_sheet_id,
421                    dependencies,
422                    range_dependencies,
423                    created_placeholders,
424                    named_dependencies,
425                    unresolved_names,
426                    local_scopes,
427                    unresolved_name_policy,
428                )?;
429                for arg in args {
430                    self.extract_dependencies_recursive(
431                        arg,
432                        current_sheet_id,
433                        dependencies,
434                        range_dependencies,
435                        created_placeholders,
436                        named_dependencies,
437                        unresolved_names,
438                        local_scopes,
439                        unresolved_name_policy,
440                    )?;
441                }
442            }
443            ASTNodeType::Literal(_) => {}
444        }
445        Ok(())
446    }
447
448    /// Gets the VertexId for a reference, creating a placeholder vertex if it doesn't exist.
449    fn get_or_create_vertex_for_reference(
450        &mut self,
451        reference: &ReferenceType,
452        current_sheet_id: SheetId,
453        created_placeholders: &mut Vec<CellRef>,
454    ) -> Result<VertexId, ExcelError> {
455        match reference {
456            ReferenceType::Cell {
457                sheet, row, col, ..
458            } => {
459                let sheet_id = match sheet {
460                    Some(name) => self.resolve_existing_sheet_id(name)?,
461                    None => current_sheet_id,
462                };
463                let coord = Coord::from_excel(*row, *col, true, true);
464                let addr = CellRef::new(sheet_id, coord);
465                Ok(self.get_or_create_vertex(&addr, created_placeholders))
466            }
467            _ => Err(ExcelError::new(ExcelErrorKind::Value)
468                .with_message("Expected a cell reference, but got a range or other type.")),
469        }
470    }
471
472    #[inline]
473    pub(super) fn is_ast_volatile(&self, ast: &ASTNode) -> bool {
474        if ast.contains_volatile() {
475            return true;
476        }
477
478        use formualizer_parse::parser::ASTNodeType;
479
480        match &ast.node_type {
481            ASTNodeType::Function { name, args } => {
482                if let Some(func) = crate::function_registry::get("", name)
483                    && func.caps().contains(crate::function::FnCaps::VOLATILE)
484                {
485                    return true;
486                }
487                args.iter().any(|arg| self.is_ast_volatile(arg))
488            }
489            ASTNodeType::BinaryOp { left, right, .. } => {
490                self.is_ast_volatile(left) || self.is_ast_volatile(right)
491            }
492            ASTNodeType::UnaryOp { expr, .. } => self.is_ast_volatile(expr),
493            ASTNodeType::Array(rows) => rows
494                .iter()
495                .any(|row| row.iter().any(|cell| self.is_ast_volatile(cell))),
496            ASTNodeType::Call { callee, args } => {
497                self.is_ast_volatile(callee) || args.iter().any(|a| self.is_ast_volatile(a))
498            }
499            _ => false,
500        }
501    }
502
503    pub fn is_ast_dynamic(&self, ast: &ASTNode) -> bool {
504        use formualizer_parse::parser::ASTNodeType;
505
506        match &ast.node_type {
507            ASTNodeType::Function { name, args } => {
508                if let Some(func) = crate::function_registry::get("", name)
509                    && func
510                        .caps()
511                        .contains(crate::function::FnCaps::DYNAMIC_DEPENDENCY)
512                {
513                    return true;
514                }
515                args.iter().any(|arg| self.is_ast_dynamic(arg))
516            }
517            ASTNodeType::BinaryOp { left, right, .. } => {
518                self.is_ast_dynamic(left) || self.is_ast_dynamic(right)
519            }
520            ASTNodeType::UnaryOp { expr, .. } => self.is_ast_dynamic(expr),
521            ASTNodeType::Array(rows) => rows
522                .iter()
523                .any(|row| row.iter().any(|cell| self.is_ast_dynamic(cell))),
524            ASTNodeType::Call { callee, args } => {
525                self.is_ast_dynamic(callee) || args.iter().any(|a| self.is_ast_dynamic(a))
526            }
527            _ => false,
528        }
529    }
530}