Skip to main content

formualizer_eval/engine/graph/
formula_analysis.rs

1use super::*;
2use formualizer_parse::parser::{ASTNode, ASTNodeType, ReferenceType};
3
4// Type alias for complex return type (local to analysis).
5type ExtractDependenciesResult = Result<
6    (
7        Vec<VertexId>,
8        Vec<SharedRangeRef<'static>>,
9        Vec<CellRef>,
10        Vec<VertexId>,
11    ),
12    ExcelError,
13>;
14
15impl DependencyGraph {
16    // Helper methods for formula analysis / dependency extraction.
17
18    pub(super) fn extract_dependencies(
19        &mut self,
20        ast: &ASTNode,
21        current_sheet_id: SheetId,
22    ) -> ExtractDependenciesResult {
23        let mut dependencies = FxHashSet::default();
24        let mut range_dependencies: Vec<SharedRangeRef<'static>> = Vec::new();
25        let mut created_placeholders = Vec::new();
26        let mut named_dependencies = Vec::new();
27        self.extract_dependencies_recursive(
28            ast,
29            current_sheet_id,
30            &mut dependencies,
31            &mut range_dependencies,
32            &mut created_placeholders,
33            &mut named_dependencies,
34        )?;
35
36        // Deduplicate range references.
37        let mut deduped_ranges = Vec::new();
38        for range_ref in range_dependencies {
39            if !deduped_ranges.contains(&range_ref) {
40                deduped_ranges.push(range_ref);
41            }
42        }
43
44        named_dependencies.sort_unstable_by_key(|v| v.0);
45        named_dependencies.dedup_by_key(|v| v.0);
46
47        Ok((
48            dependencies.into_iter().collect(),
49            deduped_ranges,
50            created_placeholders,
51            named_dependencies,
52        ))
53    }
54
55    fn extract_dependencies_recursive(
56        &mut self,
57        ast: &ASTNode,
58        current_sheet_id: SheetId,
59        dependencies: &mut FxHashSet<VertexId>,
60        range_dependencies: &mut Vec<SharedRangeRef<'static>>,
61        created_placeholders: &mut Vec<CellRef>,
62        named_dependencies: &mut Vec<VertexId>,
63    ) -> Result<(), ExcelError> {
64        match &ast.node_type {
65            ASTNodeType::Reference { reference, .. } => match reference {
66                ReferenceType::External(ext) => match ext.kind {
67                    formualizer_parse::parser::ExternalRefKind::Cell { .. } => {
68                        let name = ext.raw.as_str();
69                        if let Some(source) = self.resolve_source_scalar_entry(name) {
70                            dependencies.insert(source.vertex);
71                        } else {
72                            return Err(ExcelError::new(ExcelErrorKind::Name)
73                                .with_message(format!("Undefined name: {name}")));
74                        }
75                    }
76                    formualizer_parse::parser::ExternalRefKind::Range { .. } => {
77                        let name = ext.raw.as_str();
78                        if let Some(source) = self.resolve_source_table_entry(name) {
79                            dependencies.insert(source.vertex);
80                        } else {
81                            return Err(ExcelError::new(ExcelErrorKind::Name)
82                                .with_message(format!("Undefined table: {name}")));
83                        }
84                    }
85                },
86                ReferenceType::Cell { .. } => {
87                    let vertex_id = self.get_or_create_vertex_for_reference(
88                        reference,
89                        current_sheet_id,
90                        created_placeholders,
91                    )?;
92                    dependencies.insert(vertex_id);
93                }
94                ReferenceType::Range {
95                    sheet,
96                    start_row,
97                    start_col,
98                    end_row,
99                    end_col,
100                    ..
101                } => {
102                    // If any bound is missing (infinite/partial range), always keep compressed.
103                    let has_unbounded = start_row.is_none()
104                        || end_row.is_none()
105                        || start_col.is_none()
106                        || end_col.is_none();
107                    if has_unbounded {
108                        if let Some(SharedRef::Range(range)) = reference.to_sheet_ref_lossy() {
109                            let owned = range.into_owned();
110                            let sheet_id = match owned.sheet {
111                                SharedSheetLocator::Id(id) => id,
112                                SharedSheetLocator::Current => current_sheet_id,
113                                SharedSheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
114                            };
115                            range_dependencies.push(SharedRangeRef {
116                                sheet: SharedSheetLocator::Id(sheet_id),
117                                start_row: owned.start_row,
118                                start_col: owned.start_col,
119                                end_row: owned.end_row,
120                                end_col: owned.end_col,
121                            });
122                        }
123                    } else {
124                        let sr = start_row.unwrap();
125                        let sc = start_col.unwrap();
126                        let er = end_row.unwrap();
127                        let ec = end_col.unwrap();
128
129                        if sr > er || sc > ec {
130                            return Err(ExcelError::new(ExcelErrorKind::Ref));
131                        }
132
133                        let height = er.saturating_sub(sr) + 1;
134                        let width = ec.saturating_sub(sc) + 1;
135                        let size = (width * height) as usize;
136
137                        if size <= self.config.range_expansion_limit {
138                            // Expand to individual cells.
139                            let sheet_id = match sheet {
140                                Some(name) => self.resolve_existing_sheet_id(name)?,
141                                None => current_sheet_id,
142                            };
143                            for row in sr..=er {
144                                for col in sc..=ec {
145                                    let coord = Coord::from_excel(row, col, true, true);
146                                    let addr = CellRef::new(sheet_id, coord);
147                                    let vertex_id =
148                                        self.get_or_create_vertex(&addr, created_placeholders);
149                                    dependencies.insert(vertex_id);
150                                }
151                            }
152                        } else {
153                            // Keep as a compressed range dependency.
154                            if let Some(SharedRef::Range(range)) = reference.to_sheet_ref_lossy() {
155                                let owned = range.into_owned();
156                                let sheet_id = match owned.sheet {
157                                    SharedSheetLocator::Id(id) => id,
158                                    SharedSheetLocator::Current => current_sheet_id,
159                                    SharedSheetLocator::Name(name) => {
160                                        self.sheet_id_mut(name.as_ref())
161                                    }
162                                };
163                                range_dependencies.push(SharedRangeRef {
164                                    sheet: SharedSheetLocator::Id(sheet_id),
165                                    start_row: owned.start_row,
166                                    start_col: owned.start_col,
167                                    end_row: owned.end_row,
168                                    end_col: owned.end_col,
169                                });
170                            }
171                        }
172                    }
173                }
174                ReferenceType::NamedRange(name) => {
175                    if let Some(named_range) = self.resolve_name_entry(name, current_sheet_id) {
176                        dependencies.insert(named_range.vertex);
177                        named_dependencies.push(named_range.vertex);
178                    } else if let Some(source) = self.resolve_source_scalar_entry(name) {
179                        dependencies.insert(source.vertex);
180                    } else {
181                        return Err(ExcelError::new(ExcelErrorKind::Name)
182                            .with_message(format!("Undefined name: {name}")));
183                    }
184                }
185                ReferenceType::Table(tref) => {
186                    if let Some(table) = self.resolve_table_entry(&tref.name) {
187                        dependencies.insert(table.vertex);
188                    } else if let Some(source) = self.resolve_source_table_entry(&tref.name) {
189                        dependencies.insert(source.vertex);
190                    } else {
191                        return Err(ExcelError::new(ExcelErrorKind::Name)
192                            .with_message(format!("Undefined table: {}", tref.name)));
193                    }
194                }
195            },
196            ASTNodeType::BinaryOp { left, right, .. } => {
197                self.extract_dependencies_recursive(
198                    left,
199                    current_sheet_id,
200                    dependencies,
201                    range_dependencies,
202                    created_placeholders,
203                    named_dependencies,
204                )?;
205                self.extract_dependencies_recursive(
206                    right,
207                    current_sheet_id,
208                    dependencies,
209                    range_dependencies,
210                    created_placeholders,
211                    named_dependencies,
212                )?;
213            }
214            ASTNodeType::UnaryOp { expr, .. } => {
215                self.extract_dependencies_recursive(
216                    expr,
217                    current_sheet_id,
218                    dependencies,
219                    range_dependencies,
220                    created_placeholders,
221                    named_dependencies,
222                )?;
223            }
224            ASTNodeType::Function { args, .. } => {
225                for arg in args {
226                    self.extract_dependencies_recursive(
227                        arg,
228                        current_sheet_id,
229                        dependencies,
230                        range_dependencies,
231                        created_placeholders,
232                        named_dependencies,
233                    )?;
234                }
235            }
236            ASTNodeType::Array(rows) => {
237                for row in rows {
238                    for cell in row {
239                        self.extract_dependencies_recursive(
240                            cell,
241                            current_sheet_id,
242                            dependencies,
243                            range_dependencies,
244                            created_placeholders,
245                            named_dependencies,
246                        )?;
247                    }
248                }
249            }
250            ASTNodeType::Literal(_) => {}
251        }
252        Ok(())
253    }
254
255    /// Gets the VertexId for a reference, creating a placeholder vertex if it doesn't exist.
256    fn get_or_create_vertex_for_reference(
257        &mut self,
258        reference: &ReferenceType,
259        current_sheet_id: SheetId,
260        created_placeholders: &mut Vec<CellRef>,
261    ) -> Result<VertexId, ExcelError> {
262        match reference {
263            ReferenceType::Cell {
264                sheet, row, col, ..
265            } => {
266                let sheet_id = match sheet {
267                    Some(name) => self.resolve_existing_sheet_id(name)?,
268                    None => current_sheet_id,
269                };
270                let coord = Coord::from_excel(*row, *col, true, true);
271                let addr = CellRef::new(sheet_id, coord);
272                Ok(self.get_or_create_vertex(&addr, created_placeholders))
273            }
274            _ => Err(ExcelError::new(ExcelErrorKind::Value)
275                .with_message("Expected a cell reference, but got a range or other type.")),
276        }
277    }
278
279    #[inline]
280    pub(super) fn is_ast_volatile(&self, ast: &ASTNode) -> bool {
281        if ast.contains_volatile() {
282            return true;
283        }
284
285        use formualizer_parse::parser::ASTNodeType;
286
287        match &ast.node_type {
288            ASTNodeType::Function { name, args } => {
289                if let Some(func) = crate::function_registry::get("", name)
290                    && func.caps().contains(crate::function::FnCaps::VOLATILE)
291                {
292                    return true;
293                }
294                args.iter().any(|arg| self.is_ast_volatile(arg))
295            }
296            ASTNodeType::BinaryOp { left, right, .. } => {
297                self.is_ast_volatile(left) || self.is_ast_volatile(right)
298            }
299            ASTNodeType::UnaryOp { expr, .. } => self.is_ast_volatile(expr),
300            ASTNodeType::Array(rows) => rows
301                .iter()
302                .any(|row| row.iter().any(|cell| self.is_ast_volatile(cell))),
303            _ => false,
304        }
305    }
306
307    pub fn is_ast_dynamic(&self, ast: &ASTNode) -> bool {
308        use formualizer_parse::parser::ASTNodeType;
309
310        match &ast.node_type {
311            ASTNodeType::Function { name, args } => {
312                if let Some(func) = crate::function_registry::get("", name)
313                    && func
314                        .caps()
315                        .contains(crate::function::FnCaps::DYNAMIC_DEPENDENCY)
316                {
317                    return true;
318                }
319                args.iter().any(|arg| self.is_ast_dynamic(arg))
320            }
321            ASTNodeType::BinaryOp { left, right, .. } => {
322                self.is_ast_dynamic(left) || self.is_ast_dynamic(right)
323            }
324            ASTNodeType::UnaryOp { expr, .. } => self.is_ast_dynamic(expr),
325            ASTNodeType::Array(rows) => rows
326                .iter()
327                .any(|row| row.iter().any(|cell| self.is_ast_dynamic(cell))),
328            _ => false,
329        }
330    }
331}