Skip to main content

formualizer_eval/engine/graph/
formula_analysis.rs

1use super::*;
2use formualizer_parse::parser::{ASTNode, ASTNodeType, ReferenceType};
3
4// Type alias for complex return types (local to analysis).
5type ExtractDependenciesResult = Result<
6    (
7        Vec<VertexId>,
8        Vec<SharedRangeRef<'static>>,
9        Vec<CellRef>,
10        Vec<VertexId>,
11    ),
12    ExcelError,
13>;
14
15type ExtractDependenciesWithPendingNamesResult = Result<
16    (
17        Vec<VertexId>,
18        Vec<SharedRangeRef<'static>>,
19        Vec<CellRef>,
20        Vec<VertexId>,
21        Vec<String>,
22    ),
23    ExcelError,
24>;
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27enum UnresolvedNamePolicy {
28    Error,
29    Collect,
30}
31
32impl DependencyGraph {
33    // Helper methods for formula analysis / dependency extraction.
34
35    pub(super) fn extract_dependencies(
36        &mut self,
37        ast: &ASTNode,
38        current_sheet_id: SheetId,
39    ) -> ExtractDependenciesResult {
40        let (dependencies, ranges, placeholders, named_dependencies, _pending_names) =
41            self.extract_dependencies_inner(ast, current_sheet_id, UnresolvedNamePolicy::Error)?;
42        Ok((dependencies, ranges, placeholders, named_dependencies))
43    }
44
45    pub(super) fn extract_dependencies_with_pending_names(
46        &mut self,
47        ast: &ASTNode,
48        current_sheet_id: SheetId,
49    ) -> ExtractDependenciesWithPendingNamesResult {
50        self.extract_dependencies_inner(ast, current_sheet_id, UnresolvedNamePolicy::Collect)
51    }
52
53    fn extract_dependencies_inner(
54        &mut self,
55        ast: &ASTNode,
56        current_sheet_id: SheetId,
57        unresolved_name_policy: UnresolvedNamePolicy,
58    ) -> ExtractDependenciesWithPendingNamesResult {
59        let mut dependencies = FxHashSet::default();
60        let mut range_dependencies: Vec<SharedRangeRef<'static>> = Vec::new();
61        let mut created_placeholders = Vec::new();
62        let mut named_dependencies = Vec::new();
63        let mut unresolved_names = FxHashSet::default();
64        let mut local_scopes: Vec<FxHashSet<String>> = Vec::new();
65        self.extract_dependencies_recursive(
66            ast,
67            current_sheet_id,
68            &mut dependencies,
69            &mut range_dependencies,
70            &mut created_placeholders,
71            &mut named_dependencies,
72            &mut unresolved_names,
73            &mut local_scopes,
74            unresolved_name_policy,
75        )?;
76
77        // Deduplicate range references.
78        let mut deduped_ranges = Vec::new();
79        for range_ref in range_dependencies {
80            if !deduped_ranges.contains(&range_ref) {
81                deduped_ranges.push(range_ref);
82            }
83        }
84
85        named_dependencies.sort_unstable_by_key(|v| v.0);
86        named_dependencies.dedup_by_key(|v| v.0);
87
88        let mut unresolved_names: Vec<String> = unresolved_names.into_iter().collect();
89        unresolved_names.sort();
90
91        Ok((
92            dependencies.into_iter().collect(),
93            deduped_ranges,
94            created_placeholders,
95            named_dependencies,
96            unresolved_names,
97        ))
98    }
99
100    fn extract_dependencies_recursive(
101        &mut self,
102        ast: &ASTNode,
103        current_sheet_id: SheetId,
104        dependencies: &mut FxHashSet<VertexId>,
105        range_dependencies: &mut Vec<SharedRangeRef<'static>>,
106        created_placeholders: &mut Vec<CellRef>,
107        named_dependencies: &mut Vec<VertexId>,
108        unresolved_names: &mut FxHashSet<String>,
109        local_scopes: &mut Vec<FxHashSet<String>>,
110        unresolved_name_policy: UnresolvedNamePolicy,
111    ) -> Result<(), ExcelError> {
112        match &ast.node_type {
113            ASTNodeType::Reference { reference, .. } => match reference {
114                ReferenceType::External(ext) => match ext.kind {
115                    formualizer_parse::parser::ExternalRefKind::Cell { .. } => {
116                        let name = ext.raw.as_str();
117                        if let Some(source) = self.resolve_source_scalar_entry(name) {
118                            dependencies.insert(source.vertex);
119                        } else {
120                            return Err(ExcelError::new(ExcelErrorKind::Name)
121                                .with_message(format!("Undefined name: {name}")));
122                        }
123                    }
124                    formualizer_parse::parser::ExternalRefKind::Range { .. } => {
125                        let name = ext.raw.as_str();
126                        if let Some(source) = self.resolve_source_table_entry(name) {
127                            dependencies.insert(source.vertex);
128                        } else {
129                            return Err(ExcelError::new(ExcelErrorKind::Name)
130                                .with_message(format!("Undefined table: {name}")));
131                        }
132                    }
133                },
134                ReferenceType::Cell { .. } => {
135                    let vertex_id = self.get_or_create_vertex_for_reference(
136                        reference,
137                        current_sheet_id,
138                        created_placeholders,
139                    )?;
140                    dependencies.insert(vertex_id);
141                }
142                ReferenceType::Range {
143                    sheet,
144                    start_row,
145                    start_col,
146                    end_row,
147                    end_col,
148                    ..
149                } => {
150                    // If any bound is missing (infinite/partial range), always keep compressed.
151                    let has_unbounded = start_row.is_none()
152                        || end_row.is_none()
153                        || start_col.is_none()
154                        || end_col.is_none();
155                    if has_unbounded {
156                        if let Some(SharedRef::Range(range)) = reference.to_sheet_ref_lossy() {
157                            let owned = range.into_owned();
158                            let sheet_id = match owned.sheet {
159                                SharedSheetLocator::Id(id) => id,
160                                SharedSheetLocator::Current => current_sheet_id,
161                                SharedSheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
162                            };
163                            range_dependencies.push(SharedRangeRef {
164                                sheet: SharedSheetLocator::Id(sheet_id),
165                                start_row: owned.start_row,
166                                start_col: owned.start_col,
167                                end_row: owned.end_row,
168                                end_col: owned.end_col,
169                            });
170                        }
171                    } else {
172                        let sr = start_row.unwrap();
173                        let sc = start_col.unwrap();
174                        let er = end_row.unwrap();
175                        let ec = end_col.unwrap();
176
177                        if sr > er || sc > ec {
178                            return Err(ExcelError::new(ExcelErrorKind::Ref));
179                        }
180
181                        let height = er.saturating_sub(sr) + 1;
182                        let width = ec.saturating_sub(sc) + 1;
183                        let size = (width * height) as usize;
184
185                        if size <= self.config.range_expansion_limit {
186                            // Expand to individual cells.
187                            let sheet_id = match sheet {
188                                Some(name) => self.resolve_existing_sheet_id(name)?,
189                                None => current_sheet_id,
190                            };
191                            for row in sr..=er {
192                                for col in sc..=ec {
193                                    let coord = Coord::from_excel(row, col, true, true);
194                                    let addr = CellRef::new(sheet_id, coord);
195                                    let vertex_id =
196                                        self.get_or_create_vertex(&addr, created_placeholders);
197                                    dependencies.insert(vertex_id);
198                                }
199                            }
200                        } else {
201                            // Keep as a compressed range dependency.
202                            if let Some(SharedRef::Range(range)) = reference.to_sheet_ref_lossy() {
203                                let owned = range.into_owned();
204                                let sheet_id = match owned.sheet {
205                                    SharedSheetLocator::Id(id) => id,
206                                    SharedSheetLocator::Current => current_sheet_id,
207                                    SharedSheetLocator::Name(name) => {
208                                        self.sheet_id_mut(name.as_ref())
209                                    }
210                                };
211                                range_dependencies.push(SharedRangeRef {
212                                    sheet: SharedSheetLocator::Id(sheet_id),
213                                    start_row: owned.start_row,
214                                    start_col: owned.start_col,
215                                    end_row: owned.end_row,
216                                    end_col: owned.end_col,
217                                });
218                            }
219                        }
220                    }
221                }
222                ReferenceType::NamedRange(name) => {
223                    let key = name.to_ascii_uppercase();
224                    if local_scopes.iter().rev().any(|scope| scope.contains(&key)) {
225                        return Ok(());
226                    }
227
228                    if let Some(named_range) = self.resolve_name_entry(name, current_sheet_id) {
229                        dependencies.insert(named_range.vertex);
230                        named_dependencies.push(named_range.vertex);
231                    } else if let Some(source) = self.resolve_source_scalar_entry(name) {
232                        dependencies.insert(source.vertex);
233                    } else {
234                        match unresolved_name_policy {
235                            UnresolvedNamePolicy::Error => {
236                                return Err(ExcelError::new(ExcelErrorKind::Name)
237                                    .with_message(format!("Undefined name: {name}")));
238                            }
239                            UnresolvedNamePolicy::Collect => {
240                                unresolved_names.insert(name.to_string());
241                            }
242                        }
243                    }
244                }
245                ReferenceType::Table(tref) => {
246                    if let Some(table) = self.resolve_table_entry(&tref.name) {
247                        dependencies.insert(table.vertex);
248                    } else if let Some(source) = self.resolve_source_table_entry(&tref.name) {
249                        dependencies.insert(source.vertex);
250                    } else {
251                        return Err(ExcelError::new(ExcelErrorKind::Name)
252                            .with_message(format!("Undefined table: {}", tref.name)));
253                    }
254                }
255            },
256            ASTNodeType::BinaryOp { left, right, .. } => {
257                self.extract_dependencies_recursive(
258                    left,
259                    current_sheet_id,
260                    dependencies,
261                    range_dependencies,
262                    created_placeholders,
263                    named_dependencies,
264                    unresolved_names,
265                    local_scopes,
266                    unresolved_name_policy,
267                )?;
268                self.extract_dependencies_recursive(
269                    right,
270                    current_sheet_id,
271                    dependencies,
272                    range_dependencies,
273                    created_placeholders,
274                    named_dependencies,
275                    unresolved_names,
276                    local_scopes,
277                    unresolved_name_policy,
278                )?;
279            }
280            ASTNodeType::UnaryOp { expr, .. } => {
281                self.extract_dependencies_recursive(
282                    expr,
283                    current_sheet_id,
284                    dependencies,
285                    range_dependencies,
286                    created_placeholders,
287                    named_dependencies,
288                    unresolved_names,
289                    local_scopes,
290                    unresolved_name_policy,
291                )?;
292            }
293            ASTNodeType::Function { name, args } => {
294                if name.eq_ignore_ascii_case("LET") {
295                    if args.len() >= 3 && args.len() % 2 == 1 {
296                        local_scopes.push(FxHashSet::default());
297
298                        for pair_idx in (0..args.len() - 1).step_by(2) {
299                            self.extract_dependencies_recursive(
300                                &args[pair_idx + 1],
301                                current_sheet_id,
302                                dependencies,
303                                range_dependencies,
304                                created_placeholders,
305                                named_dependencies,
306                                unresolved_names,
307                                local_scopes,
308                                unresolved_name_policy,
309                            )?;
310
311                            if let ASTNodeType::Reference {
312                                reference: ReferenceType::NamedRange(local_name),
313                                ..
314                            } = &args[pair_idx].node_type
315                                && let Some(scope) = local_scopes.last_mut()
316                            {
317                                scope.insert(local_name.to_ascii_uppercase());
318                            }
319                        }
320
321                        self.extract_dependencies_recursive(
322                            &args[args.len() - 1],
323                            current_sheet_id,
324                            dependencies,
325                            range_dependencies,
326                            created_placeholders,
327                            named_dependencies,
328                            unresolved_names,
329                            local_scopes,
330                            unresolved_name_policy,
331                        )?;
332
333                        local_scopes.pop();
334                    } else {
335                        for arg in args {
336                            self.extract_dependencies_recursive(
337                                arg,
338                                current_sheet_id,
339                                dependencies,
340                                range_dependencies,
341                                created_placeholders,
342                                named_dependencies,
343                                unresolved_names,
344                                local_scopes,
345                                unresolved_name_policy,
346                            )?;
347                        }
348                    }
349                } else if name.eq_ignore_ascii_case("LAMBDA") {
350                    if let Some(body) = args.last() {
351                        let mut lambda_scope = FxHashSet::default();
352                        for param in &args[..args.len().saturating_sub(1)] {
353                            if let ASTNodeType::Reference {
354                                reference: ReferenceType::NamedRange(param_name),
355                                ..
356                            } = &param.node_type
357                            {
358                                lambda_scope.insert(param_name.to_ascii_uppercase());
359                            }
360                        }
361                        local_scopes.push(lambda_scope);
362                        self.extract_dependencies_recursive(
363                            body,
364                            current_sheet_id,
365                            dependencies,
366                            range_dependencies,
367                            created_placeholders,
368                            named_dependencies,
369                            unresolved_names,
370                            local_scopes,
371                            unresolved_name_policy,
372                        )?;
373                        local_scopes.pop();
374                    }
375                } else {
376                    for arg in args {
377                        self.extract_dependencies_recursive(
378                            arg,
379                            current_sheet_id,
380                            dependencies,
381                            range_dependencies,
382                            created_placeholders,
383                            named_dependencies,
384                            unresolved_names,
385                            local_scopes,
386                            unresolved_name_policy,
387                        )?;
388                    }
389                }
390            }
391            ASTNodeType::Array(rows) => {
392                for row in rows {
393                    for cell in row {
394                        self.extract_dependencies_recursive(
395                            cell,
396                            current_sheet_id,
397                            dependencies,
398                            range_dependencies,
399                            created_placeholders,
400                            named_dependencies,
401                            unresolved_names,
402                            local_scopes,
403                            unresolved_name_policy,
404                        )?;
405                    }
406                }
407            }
408            ASTNodeType::Literal(_) => {}
409        }
410        Ok(())
411    }
412
413    /// Gets the VertexId for a reference, creating a placeholder vertex if it doesn't exist.
414    fn get_or_create_vertex_for_reference(
415        &mut self,
416        reference: &ReferenceType,
417        current_sheet_id: SheetId,
418        created_placeholders: &mut Vec<CellRef>,
419    ) -> Result<VertexId, ExcelError> {
420        match reference {
421            ReferenceType::Cell {
422                sheet, row, col, ..
423            } => {
424                let sheet_id = match sheet {
425                    Some(name) => self.resolve_existing_sheet_id(name)?,
426                    None => current_sheet_id,
427                };
428                let coord = Coord::from_excel(*row, *col, true, true);
429                let addr = CellRef::new(sheet_id, coord);
430                Ok(self.get_or_create_vertex(&addr, created_placeholders))
431            }
432            _ => Err(ExcelError::new(ExcelErrorKind::Value)
433                .with_message("Expected a cell reference, but got a range or other type.")),
434        }
435    }
436
437    #[inline]
438    pub(super) fn is_ast_volatile(&self, ast: &ASTNode) -> bool {
439        if ast.contains_volatile() {
440            return true;
441        }
442
443        use formualizer_parse::parser::ASTNodeType;
444
445        match &ast.node_type {
446            ASTNodeType::Function { name, args } => {
447                if let Some(func) = crate::function_registry::get("", name)
448                    && func.caps().contains(crate::function::FnCaps::VOLATILE)
449                {
450                    return true;
451                }
452                args.iter().any(|arg| self.is_ast_volatile(arg))
453            }
454            ASTNodeType::BinaryOp { left, right, .. } => {
455                self.is_ast_volatile(left) || self.is_ast_volatile(right)
456            }
457            ASTNodeType::UnaryOp { expr, .. } => self.is_ast_volatile(expr),
458            ASTNodeType::Array(rows) => rows
459                .iter()
460                .any(|row| row.iter().any(|cell| self.is_ast_volatile(cell))),
461            _ => false,
462        }
463    }
464
465    pub fn is_ast_dynamic(&self, ast: &ASTNode) -> bool {
466        use formualizer_parse::parser::ASTNodeType;
467
468        match &ast.node_type {
469            ASTNodeType::Function { name, args } => {
470                if let Some(func) = crate::function_registry::get("", name)
471                    && func
472                        .caps()
473                        .contains(crate::function::FnCaps::DYNAMIC_DEPENDENCY)
474                {
475                    return true;
476                }
477                args.iter().any(|arg| self.is_ast_dynamic(arg))
478            }
479            ASTNodeType::BinaryOp { left, right, .. } => {
480                self.is_ast_dynamic(left) || self.is_ast_dynamic(right)
481            }
482            ASTNodeType::UnaryOp { expr, .. } => self.is_ast_dynamic(expr),
483            ASTNodeType::Array(rows) => rows
484                .iter()
485                .any(|row| row.iter().any(|cell| self.is_ast_dynamic(cell))),
486            _ => false,
487        }
488    }
489}