Skip to main content

formualizer_eval/engine/graph/
formula_analysis.rs

1use super::*;
2use formualizer_parse::parser::{ASTNode, ASTNodeType, ReferenceType};
3
4// Type alias for complex return type (local to analysis).
5type ExtractDependenciesResult = Result<
6    (
7        Vec<VertexId>,
8        Vec<SharedRangeRef<'static>>,
9        Vec<CellRef>,
10        Vec<VertexId>,
11    ),
12    ExcelError,
13>;
14
15impl DependencyGraph {
16    // Helper methods for formula analysis / dependency extraction.
17
18    pub(super) fn extract_dependencies(
19        &mut self,
20        ast: &ASTNode,
21        current_sheet_id: SheetId,
22    ) -> ExtractDependenciesResult {
23        let mut dependencies = FxHashSet::default();
24        let mut range_dependencies: Vec<SharedRangeRef<'static>> = Vec::new();
25        let mut created_placeholders = Vec::new();
26        let mut named_dependencies = Vec::new();
27        let mut local_scopes: Vec<FxHashSet<String>> = Vec::new();
28        self.extract_dependencies_recursive(
29            ast,
30            current_sheet_id,
31            &mut dependencies,
32            &mut range_dependencies,
33            &mut created_placeholders,
34            &mut named_dependencies,
35            &mut local_scopes,
36        )?;
37
38        // Deduplicate range references.
39        let mut deduped_ranges = Vec::new();
40        for range_ref in range_dependencies {
41            if !deduped_ranges.contains(&range_ref) {
42                deduped_ranges.push(range_ref);
43            }
44        }
45
46        named_dependencies.sort_unstable_by_key(|v| v.0);
47        named_dependencies.dedup_by_key(|v| v.0);
48
49        Ok((
50            dependencies.into_iter().collect(),
51            deduped_ranges,
52            created_placeholders,
53            named_dependencies,
54        ))
55    }
56
57    fn extract_dependencies_recursive(
58        &mut self,
59        ast: &ASTNode,
60        current_sheet_id: SheetId,
61        dependencies: &mut FxHashSet<VertexId>,
62        range_dependencies: &mut Vec<SharedRangeRef<'static>>,
63        created_placeholders: &mut Vec<CellRef>,
64        named_dependencies: &mut Vec<VertexId>,
65        local_scopes: &mut Vec<FxHashSet<String>>,
66    ) -> Result<(), ExcelError> {
67        match &ast.node_type {
68            ASTNodeType::Reference { reference, .. } => match reference {
69                ReferenceType::External(ext) => match ext.kind {
70                    formualizer_parse::parser::ExternalRefKind::Cell { .. } => {
71                        let name = ext.raw.as_str();
72                        if let Some(source) = self.resolve_source_scalar_entry(name) {
73                            dependencies.insert(source.vertex);
74                        } else {
75                            return Err(ExcelError::new(ExcelErrorKind::Name)
76                                .with_message(format!("Undefined name: {name}")));
77                        }
78                    }
79                    formualizer_parse::parser::ExternalRefKind::Range { .. } => {
80                        let name = ext.raw.as_str();
81                        if let Some(source) = self.resolve_source_table_entry(name) {
82                            dependencies.insert(source.vertex);
83                        } else {
84                            return Err(ExcelError::new(ExcelErrorKind::Name)
85                                .with_message(format!("Undefined table: {name}")));
86                        }
87                    }
88                },
89                ReferenceType::Cell { .. } => {
90                    let vertex_id = self.get_or_create_vertex_for_reference(
91                        reference,
92                        current_sheet_id,
93                        created_placeholders,
94                    )?;
95                    dependencies.insert(vertex_id);
96                }
97                ReferenceType::Range {
98                    sheet,
99                    start_row,
100                    start_col,
101                    end_row,
102                    end_col,
103                    ..
104                } => {
105                    // If any bound is missing (infinite/partial range), always keep compressed.
106                    let has_unbounded = start_row.is_none()
107                        || end_row.is_none()
108                        || start_col.is_none()
109                        || end_col.is_none();
110                    if has_unbounded {
111                        if let Some(SharedRef::Range(range)) = reference.to_sheet_ref_lossy() {
112                            let owned = range.into_owned();
113                            let sheet_id = match owned.sheet {
114                                SharedSheetLocator::Id(id) => id,
115                                SharedSheetLocator::Current => current_sheet_id,
116                                SharedSheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
117                            };
118                            range_dependencies.push(SharedRangeRef {
119                                sheet: SharedSheetLocator::Id(sheet_id),
120                                start_row: owned.start_row,
121                                start_col: owned.start_col,
122                                end_row: owned.end_row,
123                                end_col: owned.end_col,
124                            });
125                        }
126                    } else {
127                        let sr = start_row.unwrap();
128                        let sc = start_col.unwrap();
129                        let er = end_row.unwrap();
130                        let ec = end_col.unwrap();
131
132                        if sr > er || sc > ec {
133                            return Err(ExcelError::new(ExcelErrorKind::Ref));
134                        }
135
136                        let height = er.saturating_sub(sr) + 1;
137                        let width = ec.saturating_sub(sc) + 1;
138                        let size = (width * height) as usize;
139
140                        if size <= self.config.range_expansion_limit {
141                            // Expand to individual cells.
142                            let sheet_id = match sheet {
143                                Some(name) => self.resolve_existing_sheet_id(name)?,
144                                None => current_sheet_id,
145                            };
146                            for row in sr..=er {
147                                for col in sc..=ec {
148                                    let coord = Coord::from_excel(row, col, true, true);
149                                    let addr = CellRef::new(sheet_id, coord);
150                                    let vertex_id =
151                                        self.get_or_create_vertex(&addr, created_placeholders);
152                                    dependencies.insert(vertex_id);
153                                }
154                            }
155                        } else {
156                            // Keep as a compressed range dependency.
157                            if let Some(SharedRef::Range(range)) = reference.to_sheet_ref_lossy() {
158                                let owned = range.into_owned();
159                                let sheet_id = match owned.sheet {
160                                    SharedSheetLocator::Id(id) => id,
161                                    SharedSheetLocator::Current => current_sheet_id,
162                                    SharedSheetLocator::Name(name) => {
163                                        self.sheet_id_mut(name.as_ref())
164                                    }
165                                };
166                                range_dependencies.push(SharedRangeRef {
167                                    sheet: SharedSheetLocator::Id(sheet_id),
168                                    start_row: owned.start_row,
169                                    start_col: owned.start_col,
170                                    end_row: owned.end_row,
171                                    end_col: owned.end_col,
172                                });
173                            }
174                        }
175                    }
176                }
177                ReferenceType::NamedRange(name) => {
178                    let key = name.to_ascii_uppercase();
179                    if local_scopes.iter().rev().any(|scope| scope.contains(&key)) {
180                        return Ok(());
181                    }
182
183                    if let Some(named_range) = self.resolve_name_entry(name, current_sheet_id) {
184                        dependencies.insert(named_range.vertex);
185                        named_dependencies.push(named_range.vertex);
186                    } else if let Some(source) = self.resolve_source_scalar_entry(name) {
187                        dependencies.insert(source.vertex);
188                    } else {
189                        return Err(ExcelError::new(ExcelErrorKind::Name)
190                            .with_message(format!("Undefined name: {name}")));
191                    }
192                }
193                ReferenceType::Table(tref) => {
194                    if let Some(table) = self.resolve_table_entry(&tref.name) {
195                        dependencies.insert(table.vertex);
196                    } else if let Some(source) = self.resolve_source_table_entry(&tref.name) {
197                        dependencies.insert(source.vertex);
198                    } else {
199                        return Err(ExcelError::new(ExcelErrorKind::Name)
200                            .with_message(format!("Undefined table: {}", tref.name)));
201                    }
202                }
203            },
204            ASTNodeType::BinaryOp { left, right, .. } => {
205                self.extract_dependencies_recursive(
206                    left,
207                    current_sheet_id,
208                    dependencies,
209                    range_dependencies,
210                    created_placeholders,
211                    named_dependencies,
212                    local_scopes,
213                )?;
214                self.extract_dependencies_recursive(
215                    right,
216                    current_sheet_id,
217                    dependencies,
218                    range_dependencies,
219                    created_placeholders,
220                    named_dependencies,
221                    local_scopes,
222                )?;
223            }
224            ASTNodeType::UnaryOp { expr, .. } => {
225                self.extract_dependencies_recursive(
226                    expr,
227                    current_sheet_id,
228                    dependencies,
229                    range_dependencies,
230                    created_placeholders,
231                    named_dependencies,
232                    local_scopes,
233                )?;
234            }
235            ASTNodeType::Function { name, args } => {
236                if name.eq_ignore_ascii_case("LET") {
237                    if args.len() >= 3 && args.len() % 2 == 1 {
238                        local_scopes.push(FxHashSet::default());
239
240                        for pair_idx in (0..args.len() - 1).step_by(2) {
241                            self.extract_dependencies_recursive(
242                                &args[pair_idx + 1],
243                                current_sheet_id,
244                                dependencies,
245                                range_dependencies,
246                                created_placeholders,
247                                named_dependencies,
248                                local_scopes,
249                            )?;
250
251                            if let ASTNodeType::Reference {
252                                reference: ReferenceType::NamedRange(local_name),
253                                ..
254                            } = &args[pair_idx].node_type
255                                && let Some(scope) = local_scopes.last_mut()
256                            {
257                                scope.insert(local_name.to_ascii_uppercase());
258                            }
259                        }
260
261                        self.extract_dependencies_recursive(
262                            &args[args.len() - 1],
263                            current_sheet_id,
264                            dependencies,
265                            range_dependencies,
266                            created_placeholders,
267                            named_dependencies,
268                            local_scopes,
269                        )?;
270
271                        local_scopes.pop();
272                    } else {
273                        for arg in args {
274                            self.extract_dependencies_recursive(
275                                arg,
276                                current_sheet_id,
277                                dependencies,
278                                range_dependencies,
279                                created_placeholders,
280                                named_dependencies,
281                                local_scopes,
282                            )?;
283                        }
284                    }
285                } else if name.eq_ignore_ascii_case("LAMBDA") {
286                    if let Some(body) = args.last() {
287                        let mut lambda_scope = FxHashSet::default();
288                        for param in &args[..args.len().saturating_sub(1)] {
289                            if let ASTNodeType::Reference {
290                                reference: ReferenceType::NamedRange(param_name),
291                                ..
292                            } = &param.node_type
293                            {
294                                lambda_scope.insert(param_name.to_ascii_uppercase());
295                            }
296                        }
297                        local_scopes.push(lambda_scope);
298                        self.extract_dependencies_recursive(
299                            body,
300                            current_sheet_id,
301                            dependencies,
302                            range_dependencies,
303                            created_placeholders,
304                            named_dependencies,
305                            local_scopes,
306                        )?;
307                        local_scopes.pop();
308                    }
309                } else {
310                    for arg in args {
311                        self.extract_dependencies_recursive(
312                            arg,
313                            current_sheet_id,
314                            dependencies,
315                            range_dependencies,
316                            created_placeholders,
317                            named_dependencies,
318                            local_scopes,
319                        )?;
320                    }
321                }
322            }
323            ASTNodeType::Array(rows) => {
324                for row in rows {
325                    for cell in row {
326                        self.extract_dependencies_recursive(
327                            cell,
328                            current_sheet_id,
329                            dependencies,
330                            range_dependencies,
331                            created_placeholders,
332                            named_dependencies,
333                            local_scopes,
334                        )?;
335                    }
336                }
337            }
338            ASTNodeType::Literal(_) => {}
339        }
340        Ok(())
341    }
342
343    /// Gets the VertexId for a reference, creating a placeholder vertex if it doesn't exist.
344    fn get_or_create_vertex_for_reference(
345        &mut self,
346        reference: &ReferenceType,
347        current_sheet_id: SheetId,
348        created_placeholders: &mut Vec<CellRef>,
349    ) -> Result<VertexId, ExcelError> {
350        match reference {
351            ReferenceType::Cell {
352                sheet, row, col, ..
353            } => {
354                let sheet_id = match sheet {
355                    Some(name) => self.resolve_existing_sheet_id(name)?,
356                    None => current_sheet_id,
357                };
358                let coord = Coord::from_excel(*row, *col, true, true);
359                let addr = CellRef::new(sheet_id, coord);
360                Ok(self.get_or_create_vertex(&addr, created_placeholders))
361            }
362            _ => Err(ExcelError::new(ExcelErrorKind::Value)
363                .with_message("Expected a cell reference, but got a range or other type.")),
364        }
365    }
366
367    #[inline]
368    pub(super) fn is_ast_volatile(&self, ast: &ASTNode) -> bool {
369        if ast.contains_volatile() {
370            return true;
371        }
372
373        use formualizer_parse::parser::ASTNodeType;
374
375        match &ast.node_type {
376            ASTNodeType::Function { name, args } => {
377                if let Some(func) = crate::function_registry::get("", name)
378                    && func.caps().contains(crate::function::FnCaps::VOLATILE)
379                {
380                    return true;
381                }
382                args.iter().any(|arg| self.is_ast_volatile(arg))
383            }
384            ASTNodeType::BinaryOp { left, right, .. } => {
385                self.is_ast_volatile(left) || self.is_ast_volatile(right)
386            }
387            ASTNodeType::UnaryOp { expr, .. } => self.is_ast_volatile(expr),
388            ASTNodeType::Array(rows) => rows
389                .iter()
390                .any(|row| row.iter().any(|cell| self.is_ast_volatile(cell))),
391            _ => false,
392        }
393    }
394
395    pub fn is_ast_dynamic(&self, ast: &ASTNode) -> bool {
396        use formualizer_parse::parser::ASTNodeType;
397
398        match &ast.node_type {
399            ASTNodeType::Function { name, args } => {
400                if let Some(func) = crate::function_registry::get("", name)
401                    && func
402                        .caps()
403                        .contains(crate::function::FnCaps::DYNAMIC_DEPENDENCY)
404                {
405                    return true;
406                }
407                args.iter().any(|arg| self.is_ast_dynamic(arg))
408            }
409            ASTNodeType::BinaryOp { left, right, .. } => {
410                self.is_ast_dynamic(left) || self.is_ast_dynamic(right)
411            }
412            ASTNodeType::UnaryOp { expr, .. } => self.is_ast_dynamic(expr),
413            ASTNodeType::Array(rows) => rows
414                .iter()
415                .any(|row| row.iter().any(|cell| self.is_ast_dynamic(cell))),
416            _ => false,
417        }
418    }
419}