1use crate::model::FormulaGroup;
2use crate::utils::column_number_to_name;
3use anyhow::{Context, Result};
4use formualizer_parse::{
5 ASTNode,
6 parser::{BatchParser, CollectPolicy, ReferenceType},
7 pretty::canonical_formula,
8};
9use parking_lot::{Mutex, RwLock};
10use std::collections::{HashMap, HashSet};
11use std::sync::Arc;
12use umya_spreadsheet::{CellFormulaValues, Worksheet};
13
14const RANGE_EXPANSION_LIMIT: usize = 500;
15
16#[derive(Clone)]
17pub struct FormulaAtlas {
18 parser: Arc<Mutex<BatchParser>>,
19 cache: Arc<RwLock<HashMap<String, Arc<ParsedFormula>>>>,
20 _volatility: Arc<Vec<String>>,
21}
22
23#[derive(Debug, Clone)]
24pub struct ParsedFormula {
25 pub fingerprint: String,
26 pub canonical: String,
27 pub is_volatile: bool,
28 pub dependencies: Vec<String>,
29}
30
31impl FormulaAtlas {
32 pub fn new(volatility_functions: Vec<String>) -> Self {
33 let normalized: Vec<String> = volatility_functions
34 .into_iter()
35 .map(|s| s.to_ascii_uppercase())
36 .collect();
37 let lookup = Arc::new(normalized);
38 let closure_lookup = lookup.clone();
39 let parser = BatchParser::builder()
40 .with_volatility_classifier(move |name| {
41 let upper = name.to_ascii_uppercase();
42 closure_lookup.iter().any(|entry| entry == &upper)
43 })
44 .build();
45 Self {
46 parser: Arc::new(Mutex::new(parser)),
47 cache: Arc::new(RwLock::new(HashMap::new())),
48 _volatility: lookup,
49 }
50 }
51
52 pub fn parse(&self, formula: &str) -> Result<Arc<ParsedFormula>> {
53 if let Some(existing) = self.cache.read().get(formula) {
54 return Ok(existing.clone());
55 }
56
57 let ast = {
58 let mut parser = self.parser.lock();
59 parser
60 .parse(formula)
61 .with_context(|| format!("failed to parse formula: {formula}"))?
62 };
63 let parsed = Arc::new(parsed_from_ast(&ast));
64
65 self.cache
66 .write()
67 .insert(formula.to_string(), parsed.clone());
68 Ok(parsed)
69 }
70}
71
72impl Default for FormulaAtlas {
73 fn default() -> Self {
74 Self::new(default_volatility_functions())
75 }
76}
77
78fn unescape_formula_string(s: &str) -> String {
79 s.replace("\"\"", "\"")
80}
81
82fn parsed_from_ast(ast: &ASTNode) -> ParsedFormula {
83 let fingerprint = format!("{:016x}", ast.fingerprint());
84 let canonical = unescape_formula_string(&canonical_formula(ast));
85 let dependencies = ast
86 .get_dependencies()
87 .iter()
88 .map(|reference| reference_to_string(reference))
89 .collect();
90 ParsedFormula {
91 fingerprint,
92 canonical,
93 is_volatile: ast.contains_volatile(),
94 dependencies,
95 }
96}
97
98pub struct FormulaGraph {
99 precedents: HashMap<String, Vec<String>>,
100 dependents: HashMap<String, Vec<String>>,
101 groups: HashMap<String, FormulaGroupAccumulator>,
102 range_dependents: Vec<RangeDependentEntry>,
103 sheet_name: String,
104}
105
106#[derive(Debug, Clone)]
107struct RangeDependentEntry {
108 #[allow(dead_code)]
109 range_key: String,
110 reference: ReferenceType,
111 dependents: Vec<String>,
112}
113
114impl FormulaGraph {
115 pub fn build(sheet: &Worksheet, atlas: &FormulaAtlas) -> Result<Self> {
116 let sheet_name = sheet.get_name().to_string();
117 let mut precedents_build: HashMap<String, HashSet<String>> = HashMap::new();
118 let mut dependents_build: HashMap<String, HashSet<String>> = HashMap::new();
119 let mut groups: HashMap<String, FormulaGroupAccumulator> = HashMap::new();
120 let mut range_dependents_build: HashMap<String, (ReferenceType, HashSet<String>)> =
121 HashMap::new();
122
123 let collect_policy = CollectPolicy {
124 expand_small_ranges: true,
125 range_expansion_limit: RANGE_EXPANSION_LIMIT,
126 include_names: true,
127 };
128
129 for cell in sheet.get_cell_collection() {
130 if !cell.is_formula() {
131 continue;
132 }
133 let formula_text = cell.get_formula();
134 if formula_text.is_empty() {
135 continue;
136 }
137 let formula_with_prefix = if formula_text.starts_with('=') {
138 formula_text.to_string()
139 } else {
140 format!("={}", formula_text)
141 };
142
143 let ast = {
144 let mut parser = atlas.parser.lock();
145 parser
146 .parse(&formula_with_prefix)
147 .with_context(|| format!("failed to parse formula: {formula_with_prefix}"))?
148 };
149
150 let fingerprint = format!("{:016x}", ast.fingerprint());
151 let canonical = unescape_formula_string(&canonical_formula(&ast));
152 let is_volatile = ast.contains_volatile();
153
154 let coordinate = cell.get_coordinate();
155 let address = coordinate.get_coordinate();
156
157 let (is_array, is_shared_type) = cell
158 .get_formula_obj()
159 .map(|obj| match obj.get_formula_type() {
160 CellFormulaValues::Array => (true, false),
161 CellFormulaValues::Shared => (false, true),
162 _ => (false, false),
163 })
164 .unwrap_or((false, false));
165
166 let group =
167 groups
168 .entry(fingerprint.clone())
169 .or_insert_with(|| FormulaGroupAccumulator {
170 canonical: canonical.clone(),
171 addresses: Vec::new(),
172 is_volatile,
173 is_array,
174 is_shared: is_shared_type,
175 });
176 if cell.get_formula_shared_index().is_some() {
177 group.is_shared = true;
178 }
179 group.addresses.push(address.clone());
180 group.is_volatile |= is_volatile;
181
182 let refs = ast.collect_references(&collect_policy);
183 for reference in refs {
184 match &reference {
185 ReferenceType::Cell { sheet, row, col } => {
186 let dep_addr = format_cell_address(sheet.as_deref(), *row, *col);
187 precedents_build
188 .entry(address.clone())
189 .or_default()
190 .insert(dep_addr.clone());
191 dependents_build
192 .entry(dep_addr)
193 .or_default()
194 .insert(address.clone());
195 }
196 ReferenceType::Range {
197 start_row,
198 start_col,
199 end_row,
200 end_col,
201 ..
202 } => {
203 let prec_str = reference.to_string();
204 precedents_build
205 .entry(address.clone())
206 .or_default()
207 .insert(prec_str.clone());
208
209 if is_large_or_infinite_range(*start_row, *start_col, *end_row, *end_col) {
210 range_dependents_build
211 .entry(prec_str)
212 .or_insert_with(|| (reference.clone(), HashSet::new()))
213 .1
214 .insert(address.clone());
215 }
216 }
217 ReferenceType::NamedRange(name) => {
218 precedents_build
219 .entry(address.clone())
220 .or_default()
221 .insert(name.clone());
222 }
223 ReferenceType::Table(_) => {
224 let table_str = reference.to_string();
225 precedents_build
226 .entry(address.clone())
227 .or_default()
228 .insert(table_str);
229 }
230 }
231 }
232 }
233
234 let precedents = precedents_build
235 .into_iter()
236 .map(|(k, v)| (k, v.into_iter().collect()))
237 .collect();
238 let dependents = dependents_build
239 .into_iter()
240 .map(|(k, v)| (k, v.into_iter().collect()))
241 .collect();
242 let range_dependents = range_dependents_build
243 .into_iter()
244 .map(|(key, (ref_type, addrs))| RangeDependentEntry {
245 range_key: key,
246 reference: ref_type,
247 dependents: addrs.into_iter().collect(),
248 })
249 .collect();
250
251 Ok(Self {
252 precedents,
253 dependents,
254 groups,
255 range_dependents,
256 sheet_name,
257 })
258 }
259
260 pub fn groups(&self) -> Vec<FormulaGroup> {
261 self.groups
262 .iter()
263 .map(|(fingerprint, group)| FormulaGroup {
264 fingerprint: fingerprint.clone(),
265 addresses: group.addresses.clone(),
266 formula: group.canonical.clone(),
267 is_array: group.is_array,
268 is_shared: group.is_shared,
269 is_volatile: group.is_volatile,
270 })
271 .collect()
272 }
273
274 pub fn precedents(&self, address: &str) -> Vec<String> {
275 self.precedents.get(address).cloned().unwrap_or_default()
276 }
277
278 pub fn dependents(&self, address: &str) -> Vec<String> {
279 self.dependents_limited(address, None).0
280 }
281
282 pub fn dependents_limited(&self, address: &str, limit: Option<usize>) -> (Vec<String>, bool) {
290 let mut result = self.dependents.get(address).cloned().unwrap_or_default();
291 let limit = limit.unwrap_or(usize::MAX);
292
293 if result.len() >= limit {
294 result.truncate(limit);
295 return (result, true);
296 }
297
298 if let Some((row, col)) = parse_cell_address(address) {
299 let (query_sheet, _) = split_sheet_prefix(address);
300 'outer: for entry in &self.range_dependents {
301 if range_contains_cell(&entry.reference, query_sheet, &self.sheet_name, row, col) {
302 for addr in &entry.dependents {
303 if !result.contains(addr) {
304 result.push(addr.clone());
305 if result.len() >= limit {
306 break 'outer;
307 }
308 }
309 }
310 }
311 }
312 }
313
314 let truncated = result.len() >= limit;
315 (result, truncated)
316 }
317}
318
319fn format_cell_address(sheet: Option<&str>, row: u32, col: u32) -> String {
320 let col_str = column_number_to_name(col);
321 match sheet {
322 Some(s) => format!("{}!{}{}", s, col_str, row),
323 None => format!("{}{}", col_str, row),
324 }
325}
326
327fn is_large_or_infinite_range(
328 start_row: Option<u32>,
329 start_col: Option<u32>,
330 end_row: Option<u32>,
331 end_col: Option<u32>,
332) -> bool {
333 match (start_row, start_col, end_row, end_col) {
334 (Some(sr), Some(sc), Some(er), Some(ec)) => {
335 let rows = er.saturating_sub(sr) + 1;
336 let cols = ec.saturating_sub(sc) + 1;
337 (rows as usize) * (cols as usize) > RANGE_EXPANSION_LIMIT
338 }
339 _ => true,
340 }
341}
342
343fn range_contains_cell(
344 range: &ReferenceType,
345 query_sheet: Option<&str>,
346 current_sheet: &str,
347 row: u32,
348 col: u32,
349) -> bool {
350 match range {
351 ReferenceType::Range {
352 sheet: range_sheet,
353 start_row,
354 start_col,
355 end_row,
356 end_col,
357 } => {
358 let range_sheet_name = range_sheet.as_deref().unwrap_or(current_sheet);
359 let query_sheet_name = query_sheet.unwrap_or(current_sheet);
360 if !range_sheet_name.eq_ignore_ascii_case(query_sheet_name) {
361 return false;
362 }
363 let row_ok = match (start_row, end_row) {
364 (Some(sr), Some(er)) => row >= *sr && row <= *er,
365 (Some(sr), None) => row >= *sr,
366 (None, Some(er)) => row <= *er,
367 (None, None) => true,
368 };
369 let col_ok = match (start_col, end_col) {
370 (Some(sc), Some(ec)) => col >= *sc && col <= *ec,
371 (Some(sc), None) => col >= *sc,
372 (None, Some(ec)) => col <= *ec,
373 (None, None) => true,
374 };
375 row_ok && col_ok
376 }
377 _ => false,
378 }
379}
380
381fn parse_cell_address(address: &str) -> Option<(u32, u32)> {
382 let (_, cell_part) = split_sheet_prefix(address);
383 let cell_part = cell_part.trim_start_matches('$');
384
385 let mut col_str = String::new();
386 let mut row_str = String::new();
387
388 for ch in cell_part.chars() {
389 if ch == '$' {
390 continue;
391 }
392 if ch.is_ascii_alphabetic() && row_str.is_empty() {
393 col_str.push(ch.to_ascii_uppercase());
394 } else if ch.is_ascii_digit() {
395 row_str.push(ch);
396 }
397 }
398
399 if col_str.is_empty() || row_str.is_empty() {
400 return None;
401 }
402
403 let col = column_name_to_number(&col_str)?;
404 let row: u32 = row_str.parse().ok()?;
405 Some((row, col))
406}
407
408fn column_name_to_number(name: &str) -> Option<u32> {
409 let mut result: u32 = 0;
410 for ch in name.chars() {
411 if !ch.is_ascii_alphabetic() {
412 return None;
413 }
414 result = result * 26 + (ch.to_ascii_uppercase() as u32 - 'A' as u32 + 1);
415 }
416 Some(result)
417}
418
419fn split_sheet_prefix(address: &str) -> (Option<&str>, &str) {
420 if let Some(idx) = address.find('!') {
421 let sheet = &address[..idx];
422 let sheet = sheet.trim_start_matches('\'').trim_end_matches('\'');
423 let cell = &address[idx + 1..];
424 (Some(sheet), cell)
425 } else {
426 (None, address)
427 }
428}
429
430struct FormulaGroupAccumulator {
431 canonical: String,
432 addresses: Vec<String>,
433 is_volatile: bool,
434 is_array: bool,
435 is_shared: bool,
436}
437
438fn reference_to_string(reference: &ReferenceType) -> String {
439 reference.to_string()
440}
441
442pub fn normalize_cell_reference(sheet_name: &str, row: u32, col: u32) -> String {
443 format!("{}!{}{}", sheet_name, column_number_to_name(col), row)
444}
445
446fn default_volatility_functions() -> Vec<String> {
447 vec![
448 "NOW",
449 "TODAY",
450 "RAND",
451 "RANDBETWEEN",
452 "OFFSET",
453 "INDIRECT",
454 "INFO",
455 "CELL",
456 "AREAS",
457 "INDEX",
458 "MOD",
459 "ROW",
460 "COLUMN",
461 "ROWS",
462 "COLUMNS",
463 "HYPERLINK",
464 ]
465 .into_iter()
466 .map(|s| s.to_string())
467 .collect()
468}