spreadsheet_mcp/analysis/
formula.rs

1use crate::model::FormulaGroup;
2use crate::utils::column_number_to_name;
3use anyhow::{Context, Result};
4use formualizer_parse::{
5    ASTNode,
6    parser::{BatchParser, CollectPolicy, ReferenceType},
7    pretty::canonical_formula,
8};
9use parking_lot::{Mutex, RwLock};
10use std::collections::{HashMap, HashSet};
11use std::sync::Arc;
12use umya_spreadsheet::{CellFormulaValues, Worksheet};
13
14const RANGE_EXPANSION_LIMIT: usize = 500;
15
16#[derive(Clone)]
17pub struct FormulaAtlas {
18    parser: Arc<Mutex<BatchParser>>,
19    cache: Arc<RwLock<HashMap<String, Arc<ParsedFormula>>>>,
20    _volatility: Arc<Vec<String>>,
21}
22
23#[derive(Debug, Clone)]
24pub struct ParsedFormula {
25    pub fingerprint: String,
26    pub canonical: String,
27    pub is_volatile: bool,
28    pub dependencies: Vec<String>,
29}
30
31impl FormulaAtlas {
32    pub fn new(volatility_functions: Vec<String>) -> Self {
33        let normalized: Vec<String> = volatility_functions
34            .into_iter()
35            .map(|s| s.to_ascii_uppercase())
36            .collect();
37        let lookup = Arc::new(normalized);
38        let closure_lookup = lookup.clone();
39        let parser = BatchParser::builder()
40            .with_volatility_classifier(move |name| {
41                let upper = name.to_ascii_uppercase();
42                closure_lookup.iter().any(|entry| entry == &upper)
43            })
44            .build();
45        Self {
46            parser: Arc::new(Mutex::new(parser)),
47            cache: Arc::new(RwLock::new(HashMap::new())),
48            _volatility: lookup,
49        }
50    }
51
52    pub fn parse(&self, formula: &str) -> Result<Arc<ParsedFormula>> {
53        if let Some(existing) = self.cache.read().get(formula) {
54            return Ok(existing.clone());
55        }
56
57        let ast = {
58            let mut parser = self.parser.lock();
59            parser
60                .parse(formula)
61                .with_context(|| format!("failed to parse formula: {formula}"))?
62        };
63        let parsed = Arc::new(parsed_from_ast(&ast));
64
65        self.cache
66            .write()
67            .insert(formula.to_string(), parsed.clone());
68        Ok(parsed)
69    }
70}
71
72impl Default for FormulaAtlas {
73    fn default() -> Self {
74        Self::new(default_volatility_functions())
75    }
76}
77
78fn unescape_formula_string(s: &str) -> String {
79    s.replace("\"\"", "\"")
80}
81
82fn parsed_from_ast(ast: &ASTNode) -> ParsedFormula {
83    let fingerprint = format!("{:016x}", ast.fingerprint());
84    let canonical = unescape_formula_string(&canonical_formula(ast));
85    let dependencies = ast
86        .get_dependencies()
87        .iter()
88        .map(|reference| reference_to_string(reference))
89        .collect();
90    ParsedFormula {
91        fingerprint,
92        canonical,
93        is_volatile: ast.contains_volatile(),
94        dependencies,
95    }
96}
97
98pub struct FormulaGraph {
99    precedents: HashMap<String, Vec<String>>,
100    dependents: HashMap<String, Vec<String>>,
101    groups: HashMap<String, FormulaGroupAccumulator>,
102    range_dependents: Vec<RangeDependentEntry>,
103    sheet_name: String,
104}
105
106#[derive(Debug, Clone)]
107struct RangeDependentEntry {
108    #[allow(dead_code)]
109    range_key: String,
110    reference: ReferenceType,
111    dependents: Vec<String>,
112}
113
114impl FormulaGraph {
115    pub fn build(sheet: &Worksheet, atlas: &FormulaAtlas) -> Result<Self> {
116        let sheet_name = sheet.get_name().to_string();
117        let mut precedents_build: HashMap<String, HashSet<String>> = HashMap::new();
118        let mut dependents_build: HashMap<String, HashSet<String>> = HashMap::new();
119        let mut groups: HashMap<String, FormulaGroupAccumulator> = HashMap::new();
120        let mut range_dependents_build: HashMap<String, (ReferenceType, HashSet<String>)> =
121            HashMap::new();
122
123        let collect_policy = CollectPolicy {
124            expand_small_ranges: true,
125            range_expansion_limit: RANGE_EXPANSION_LIMIT,
126            include_names: true,
127        };
128
129        for cell in sheet.get_cell_collection() {
130            if !cell.is_formula() {
131                continue;
132            }
133            let formula_text = cell.get_formula();
134            if formula_text.is_empty() {
135                continue;
136            }
137            let formula_with_prefix = if formula_text.starts_with('=') {
138                formula_text.to_string()
139            } else {
140                format!("={}", formula_text)
141            };
142
143            let ast = {
144                let mut parser = atlas.parser.lock();
145                parser
146                    .parse(&formula_with_prefix)
147                    .with_context(|| format!("failed to parse formula: {formula_with_prefix}"))?
148            };
149
150            let fingerprint = format!("{:016x}", ast.fingerprint());
151            let canonical = unescape_formula_string(&canonical_formula(&ast));
152            let is_volatile = ast.contains_volatile();
153
154            let coordinate = cell.get_coordinate();
155            let address = coordinate.get_coordinate();
156
157            let (is_array, is_shared_type) = cell
158                .get_formula_obj()
159                .map(|obj| match obj.get_formula_type() {
160                    CellFormulaValues::Array => (true, false),
161                    CellFormulaValues::Shared => (false, true),
162                    _ => (false, false),
163                })
164                .unwrap_or((false, false));
165
166            let group =
167                groups
168                    .entry(fingerprint.clone())
169                    .or_insert_with(|| FormulaGroupAccumulator {
170                        canonical: canonical.clone(),
171                        addresses: Vec::new(),
172                        is_volatile,
173                        is_array,
174                        is_shared: is_shared_type,
175                    });
176            if cell.get_formula_shared_index().is_some() {
177                group.is_shared = true;
178            }
179            group.addresses.push(address.clone());
180            group.is_volatile |= is_volatile;
181
182            let refs = ast.collect_references(&collect_policy);
183            for reference in refs {
184                match &reference {
185                    ReferenceType::Cell { sheet, row, col } => {
186                        let dep_addr = format_cell_address(sheet.as_deref(), *row, *col);
187                        precedents_build
188                            .entry(address.clone())
189                            .or_default()
190                            .insert(dep_addr.clone());
191                        dependents_build
192                            .entry(dep_addr)
193                            .or_default()
194                            .insert(address.clone());
195                    }
196                    ReferenceType::Range {
197                        start_row,
198                        start_col,
199                        end_row,
200                        end_col,
201                        ..
202                    } => {
203                        let prec_str = reference.to_string();
204                        precedents_build
205                            .entry(address.clone())
206                            .or_default()
207                            .insert(prec_str.clone());
208
209                        if is_large_or_infinite_range(*start_row, *start_col, *end_row, *end_col) {
210                            range_dependents_build
211                                .entry(prec_str)
212                                .or_insert_with(|| (reference.clone(), HashSet::new()))
213                                .1
214                                .insert(address.clone());
215                        }
216                    }
217                    ReferenceType::NamedRange(name) => {
218                        precedents_build
219                            .entry(address.clone())
220                            .or_default()
221                            .insert(name.clone());
222                    }
223                    ReferenceType::Table(_) => {
224                        let table_str = reference.to_string();
225                        precedents_build
226                            .entry(address.clone())
227                            .or_default()
228                            .insert(table_str);
229                    }
230                }
231            }
232        }
233
234        let precedents = precedents_build
235            .into_iter()
236            .map(|(k, v)| (k, v.into_iter().collect()))
237            .collect();
238        let dependents = dependents_build
239            .into_iter()
240            .map(|(k, v)| (k, v.into_iter().collect()))
241            .collect();
242        let range_dependents = range_dependents_build
243            .into_iter()
244            .map(|(key, (ref_type, addrs))| RangeDependentEntry {
245                range_key: key,
246                reference: ref_type,
247                dependents: addrs.into_iter().collect(),
248            })
249            .collect();
250
251        Ok(Self {
252            precedents,
253            dependents,
254            groups,
255            range_dependents,
256            sheet_name,
257        })
258    }
259
260    pub fn groups(&self) -> Vec<FormulaGroup> {
261        self.groups
262            .iter()
263            .map(|(fingerprint, group)| FormulaGroup {
264                fingerprint: fingerprint.clone(),
265                addresses: group.addresses.clone(),
266                formula: group.canonical.clone(),
267                is_array: group.is_array,
268                is_shared: group.is_shared,
269                is_volatile: group.is_volatile,
270            })
271            .collect()
272    }
273
274    pub fn precedents(&self, address: &str) -> Vec<String> {
275        self.precedents.get(address).cloned().unwrap_or_default()
276    }
277
278    pub fn dependents(&self, address: &str) -> Vec<String> {
279        self.dependents_limited(address, None).0
280    }
281
282    /// Returns cells that depend on the given address, with optional limit.
283    ///
284    /// Returns (dependents, was_truncated). If limit is Some and exceeded,
285    /// was_truncated is true and only limit dependents are returned.
286    ///
287    /// Performance: O(n) where n = number of large range references in the sheet.
288    /// Early exits when limit reached to keep response times bounded.
289    pub fn dependents_limited(&self, address: &str, limit: Option<usize>) -> (Vec<String>, bool) {
290        let mut result = self.dependents.get(address).cloned().unwrap_or_default();
291        let limit = limit.unwrap_or(usize::MAX);
292
293        if result.len() >= limit {
294            result.truncate(limit);
295            return (result, true);
296        }
297
298        if let Some((row, col)) = parse_cell_address(address) {
299            let (query_sheet, _) = split_sheet_prefix(address);
300            'outer: for entry in &self.range_dependents {
301                if range_contains_cell(&entry.reference, query_sheet, &self.sheet_name, row, col) {
302                    for addr in &entry.dependents {
303                        if !result.contains(addr) {
304                            result.push(addr.clone());
305                            if result.len() >= limit {
306                                break 'outer;
307                            }
308                        }
309                    }
310                }
311            }
312        }
313
314        let truncated = result.len() >= limit;
315        (result, truncated)
316    }
317}
318
319fn format_cell_address(sheet: Option<&str>, row: u32, col: u32) -> String {
320    let col_str = column_number_to_name(col);
321    match sheet {
322        Some(s) => format!("{}!{}{}", s, col_str, row),
323        None => format!("{}{}", col_str, row),
324    }
325}
326
327fn is_large_or_infinite_range(
328    start_row: Option<u32>,
329    start_col: Option<u32>,
330    end_row: Option<u32>,
331    end_col: Option<u32>,
332) -> bool {
333    match (start_row, start_col, end_row, end_col) {
334        (Some(sr), Some(sc), Some(er), Some(ec)) => {
335            let rows = er.saturating_sub(sr) + 1;
336            let cols = ec.saturating_sub(sc) + 1;
337            (rows as usize) * (cols as usize) > RANGE_EXPANSION_LIMIT
338        }
339        _ => true,
340    }
341}
342
343fn range_contains_cell(
344    range: &ReferenceType,
345    query_sheet: Option<&str>,
346    current_sheet: &str,
347    row: u32,
348    col: u32,
349) -> bool {
350    match range {
351        ReferenceType::Range {
352            sheet: range_sheet,
353            start_row,
354            start_col,
355            end_row,
356            end_col,
357        } => {
358            let range_sheet_name = range_sheet.as_deref().unwrap_or(current_sheet);
359            let query_sheet_name = query_sheet.unwrap_or(current_sheet);
360            if !range_sheet_name.eq_ignore_ascii_case(query_sheet_name) {
361                return false;
362            }
363            let row_ok = match (start_row, end_row) {
364                (Some(sr), Some(er)) => row >= *sr && row <= *er,
365                (Some(sr), None) => row >= *sr,
366                (None, Some(er)) => row <= *er,
367                (None, None) => true,
368            };
369            let col_ok = match (start_col, end_col) {
370                (Some(sc), Some(ec)) => col >= *sc && col <= *ec,
371                (Some(sc), None) => col >= *sc,
372                (None, Some(ec)) => col <= *ec,
373                (None, None) => true,
374            };
375            row_ok && col_ok
376        }
377        _ => false,
378    }
379}
380
381fn parse_cell_address(address: &str) -> Option<(u32, u32)> {
382    let (_, cell_part) = split_sheet_prefix(address);
383    let cell_part = cell_part.trim_start_matches('$');
384
385    let mut col_str = String::new();
386    let mut row_str = String::new();
387
388    for ch in cell_part.chars() {
389        if ch == '$' {
390            continue;
391        }
392        if ch.is_ascii_alphabetic() && row_str.is_empty() {
393            col_str.push(ch.to_ascii_uppercase());
394        } else if ch.is_ascii_digit() {
395            row_str.push(ch);
396        }
397    }
398
399    if col_str.is_empty() || row_str.is_empty() {
400        return None;
401    }
402
403    let col = column_name_to_number(&col_str)?;
404    let row: u32 = row_str.parse().ok()?;
405    Some((row, col))
406}
407
408fn column_name_to_number(name: &str) -> Option<u32> {
409    let mut result: u32 = 0;
410    for ch in name.chars() {
411        if !ch.is_ascii_alphabetic() {
412            return None;
413        }
414        result = result * 26 + (ch.to_ascii_uppercase() as u32 - 'A' as u32 + 1);
415    }
416    Some(result)
417}
418
419fn split_sheet_prefix(address: &str) -> (Option<&str>, &str) {
420    if let Some(idx) = address.find('!') {
421        let sheet = &address[..idx];
422        let sheet = sheet.trim_start_matches('\'').trim_end_matches('\'');
423        let cell = &address[idx + 1..];
424        (Some(sheet), cell)
425    } else {
426        (None, address)
427    }
428}
429
430struct FormulaGroupAccumulator {
431    canonical: String,
432    addresses: Vec<String>,
433    is_volatile: bool,
434    is_array: bool,
435    is_shared: bool,
436}
437
438fn reference_to_string(reference: &ReferenceType) -> String {
439    reference.to_string()
440}
441
442pub fn normalize_cell_reference(sheet_name: &str, row: u32, col: u32) -> String {
443    format!("{}!{}{}", sheet_name, column_number_to_name(col), row)
444}
445
446fn default_volatility_functions() -> Vec<String> {
447    vec![
448        "NOW",
449        "TODAY",
450        "RAND",
451        "RANDBETWEEN",
452        "OFFSET",
453        "INDIRECT",
454        "INFO",
455        "CELL",
456        "AREAS",
457        "INDEX",
458        "MOD",
459        "ROW",
460        "COLUMN",
461        "ROWS",
462        "COLUMNS",
463        "HYPERLINK",
464    ]
465    .into_iter()
466    .map(|s| s.to_string())
467    .collect()
468}