Skip to main content

formualizer_parse/
parser.rs

1use crate::structured_ref;
2use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
3use crate::types::{FormulaDialect, ParsingError};
4use crate::{ExcelError, LiteralValue};
5
6#[cfg(feature = "serde")]
7use serde::{Deserialize, Serialize};
8
9use crate::hasher::FormulaHasher;
10use formualizer_common::coord::{
11    col_index_from_letters_1based, col_letters_from_1based, parse_a1_1based,
12};
13use formualizer_common::{
14    AxisBound, RelativeCoord, SheetCellRef, SheetLocator, SheetRangeRef, SheetRef,
15};
16use once_cell::sync::Lazy;
17use smallvec::SmallVec;
18use std::error::Error;
19use std::fmt::{self, Display};
20use std::hash::{Hash, Hasher};
21use std::str::FromStr;
22use std::sync::Arc;
23
24type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
25type VolatilityClassifierBox = Box<VolatilityFn>;
26type VolatilityClassifierArc = Arc<VolatilityFn>;
27
28/// A custom error type for the parser.
29#[derive(Debug)]
30pub struct ParserError {
31    pub message: String,
32    pub position: Option<usize>,
33}
34
35impl Display for ParserError {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        if let Some(pos) = self.position {
38            write!(f, "ParserError at position {}: {}", pos, self.message)
39        } else {
40            write!(f, "ParserError: {}", self.message)
41        }
42    }
43}
44
45impl Error for ParserError {}
46
47// Column lookup table for common columns (A-ZZ = 702 columns)
48static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
49    let mut cols = Vec::with_capacity(702);
50    // Single letters A-Z
51    for c in b'A'..=b'Z' {
52        cols.push(String::from(c as char));
53    }
54    // Double letters AA-ZZ
55    for c1 in b'A'..=b'Z' {
56        for c2 in b'A'..=b'Z' {
57            cols.push(format!("{}{}", c1 as char, c2 as char));
58        }
59    }
60    cols
61});
62
63/// A structured table reference specifier for accessing specific parts of a table
64#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
65#[derive(Debug, Clone, PartialEq, Hash)]
66pub enum TableSpecifier {
67    /// The entire table
68    All,
69    /// The data area of the table (no headers or totals)
70    Data,
71    /// The headers row
72    Headers,
73    /// The totals row
74    Totals,
75    /// A specific row
76    Row(TableRowSpecifier),
77    /// A specific column
78    Column(String),
79    /// A range of columns
80    ColumnRange(String, String),
81    /// Special items like #Headers, #Data, #Totals, etc.
82    SpecialItem(SpecialItem),
83    /// A combination of specifiers, for complex references
84    Combination(Vec<Box<TableSpecifier>>),
85}
86
87/// Specifies which row(s) to use in a table reference
88#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
89#[derive(Debug, Clone, PartialEq, Hash)]
90pub enum TableRowSpecifier {
91    /// The current row (context dependent)
92    Current,
93    /// All rows
94    All,
95    /// Data rows only
96    Data,
97    /// Headers row
98    Headers,
99    /// Totals row
100    Totals,
101    /// Specific row by index (1-based)
102    Index(u32),
103}
104
105/// Special items in structured references
106#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
107#[derive(Debug, Clone, PartialEq, Hash)]
108pub enum SpecialItem {
109    /// The #Headers item
110    Headers,
111    /// The #Data item
112    Data,
113    /// The #Totals item
114    Totals,
115    /// The #All item (the whole table)
116    All,
117    /// The @ item (current row)
118    ThisRow,
119}
120
121/// A reference to a table including specifiers
122#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
123#[derive(Debug, Clone, PartialEq, Hash)]
124pub struct TableReference {
125    /// The name of the table
126    pub name: String,
127    /// Optional specifier for which part of the table to use
128    pub specifier: Option<TableSpecifier>,
129}
130
131#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
132#[derive(Debug, Clone, PartialEq, Hash)]
133pub enum ExternalBookRef {
134    Token(String),
135}
136
137impl ExternalBookRef {
138    pub fn token(&self) -> &str {
139        match self {
140            ExternalBookRef::Token(s) => s,
141        }
142    }
143}
144
145#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
146#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
147pub enum ExternalRefKind {
148    Cell {
149        row: u32,
150        col: u32,
151        row_abs: bool,
152        col_abs: bool,
153    },
154    Range {
155        start_row: Option<u32>,
156        start_col: Option<u32>,
157        end_row: Option<u32>,
158        end_col: Option<u32>,
159        start_row_abs: bool,
160        start_col_abs: bool,
161        end_row_abs: bool,
162        end_col_abs: bool,
163    },
164}
165
166impl ExternalRefKind {
167    pub fn cell(row: u32, col: u32) -> Self {
168        Self::Cell {
169            row,
170            col,
171            row_abs: false,
172            col_abs: false,
173        }
174    }
175
176    pub fn cell_with_abs(row: u32, col: u32, row_abs: bool, col_abs: bool) -> Self {
177        Self::Cell {
178            row,
179            col,
180            row_abs,
181            col_abs,
182        }
183    }
184
185    pub fn range(
186        start_row: Option<u32>,
187        start_col: Option<u32>,
188        end_row: Option<u32>,
189        end_col: Option<u32>,
190    ) -> Self {
191        Self::Range {
192            start_row,
193            start_col,
194            end_row,
195            end_col,
196            start_row_abs: false,
197            start_col_abs: false,
198            end_row_abs: false,
199            end_col_abs: false,
200        }
201    }
202
203    // Constructor-style helper mirroring the enum fields.
204    // Keeping the signature explicit makes callers easier to read.
205    #[allow(clippy::too_many_arguments)]
206    pub fn range_with_abs(
207        start_row: Option<u32>,
208        start_col: Option<u32>,
209        end_row: Option<u32>,
210        end_col: Option<u32>,
211        start_row_abs: bool,
212        start_col_abs: bool,
213        end_row_abs: bool,
214        end_col_abs: bool,
215    ) -> Self {
216        Self::Range {
217            start_row,
218            start_col,
219            end_row,
220            end_col,
221            start_row_abs,
222            start_col_abs,
223            end_row_abs,
224            end_col_abs,
225        }
226    }
227}
228
229#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
230#[derive(Debug, Clone, PartialEq, Hash)]
231pub struct ExternalReference {
232    pub raw: String,
233    pub book: ExternalBookRef,
234    pub sheet: String,
235    pub kind: ExternalRefKind,
236}
237
238/// A reference to something outside the cell.
239#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
240#[derive(Debug, Clone, PartialEq, Hash)]
241pub enum ReferenceType {
242    Cell {
243        sheet: Option<String>,
244        row: u32,
245        col: u32,
246        row_abs: bool,
247        col_abs: bool,
248    },
249    Range {
250        sheet: Option<String>,
251        start_row: Option<u32>,
252        start_col: Option<u32>,
253        end_row: Option<u32>,
254        end_col: Option<u32>,
255        start_row_abs: bool,
256        start_col_abs: bool,
257        end_row_abs: bool,
258        end_col_abs: bool,
259    },
260    /// 3D cell reference (`Sheet1:Sheet3!A1`).
261    ///
262    /// Excel evaluates aggregating functions across each sheet between
263    /// `sheet_first` and `sheet_last` (inclusive) at the same cell address.
264    Cell3D {
265        sheet_first: String,
266        sheet_last: String,
267        row: u32,
268        col: u32,
269        row_abs: bool,
270        col_abs: bool,
271    },
272    /// 3D range reference (`Sheet1:Sheet3!A1:B2`).
273    Range3D {
274        sheet_first: String,
275        sheet_last: String,
276        start_row: Option<u32>,
277        start_col: Option<u32>,
278        end_row: Option<u32>,
279        end_col: Option<u32>,
280        start_row_abs: bool,
281        start_col_abs: bool,
282        end_row_abs: bool,
283        end_col_abs: bool,
284    },
285    External(ExternalReference),
286    Table(TableReference),
287    NamedRange(String),
288}
289
290impl Display for TableSpecifier {
291    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
292        match self {
293            TableSpecifier::All => write!(f, "#All"),
294            TableSpecifier::Data => write!(f, "#Data"),
295            TableSpecifier::Headers => write!(f, "#Headers"),
296            TableSpecifier::Totals => write!(f, "#Totals"),
297            TableSpecifier::Row(row) => write!(f, "{row}"),
298            TableSpecifier::Column(column) => write!(f, "{column}"),
299            TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
300            TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
301            TableSpecifier::Combination(specs) => {
302                // Emit nested bracketed parts so the surrounding Table formatter prints
303                // canonical structured refs like Table[[#Headers],[Column1]:[Column2]].
304                // ColumnRange children must split their bracket boundary across
305                // both endpoints (`[A]:[B]`) rather than wrapping the whole
306                // range in one bracket pair.
307                let mut first = true;
308                for spec in specs {
309                    if !first {
310                        write!(f, ",")?;
311                    }
312                    first = false;
313                    match spec.as_ref() {
314                        TableSpecifier::ColumnRange(start, end) => {
315                            write!(f, "[{start}]:[{end}]")?;
316                        }
317                        other => write!(f, "[{other}]")?,
318                    }
319                }
320                Ok(())
321            }
322        }
323    }
324}
325
326impl Display for TableRowSpecifier {
327    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
328        match self {
329            TableRowSpecifier::Current => write!(f, "@"),
330            TableRowSpecifier::All => write!(f, "#All"),
331            TableRowSpecifier::Data => write!(f, "#Data"),
332            TableRowSpecifier::Headers => write!(f, "#Headers"),
333            TableRowSpecifier::Totals => write!(f, "#Totals"),
334            TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
335        }
336    }
337}
338
339impl Display for SpecialItem {
340    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
341        match self {
342            SpecialItem::Headers => write!(f, "#Headers"),
343            SpecialItem::Data => write!(f, "#Data"),
344            SpecialItem::Totals => write!(f, "#Totals"),
345            SpecialItem::All => write!(f, "#All"),
346            SpecialItem::ThisRow => write!(f, "@"),
347        }
348    }
349}
350
351/// Check if a sheet name needs to be quoted in Excel formulas
352fn sheet_name_needs_quoting(name: &str) -> bool {
353    if name.is_empty() {
354        return false;
355    }
356
357    let bytes = name.as_bytes();
358
359    // Check if starts with a digit
360    if bytes[0].is_ascii_digit() {
361        return true;
362    }
363
364    // Check for any special characters that require quoting
365    // This includes: space, !, ", #, $, %, &, ', (, ), *, +, comma, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~
366    for &byte in bytes {
367        match byte {
368            b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
369            | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
370            | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
371            _ => {}
372        }
373    }
374
375    // Check for Excel reserved words (case-insensitive)
376    let upper = name.to_uppercase();
377    matches!(
378        upper.as_str(),
379        "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
380    )
381}
382
383#[derive(Debug, Clone)]
384struct OpenFormulaRefPart {
385    sheet: Option<String>,
386    coord: String,
387}
388
389type AxisPartWithAbs = Option<(u32, bool)>;
390type RangePartWithAbs = (AxisPartWithAbs, AxisPartWithAbs);
391
392/// Result of extracting the sheet portion of a reference string.
393#[derive(Debug, Clone)]
394enum SheetSpec {
395    /// No sheet segment was present (e.g. plain `A1`).
396    None,
397    /// Standard single-sheet reference (`Sheet1!A1`, `'Sheet 1'!A1`).
398    Single(String),
399    /// Excel 3D sheet range (`Sheet1:Sheet3!A1`, `'Sheet 1':'Sheet 3'!A1`).
400    Range { first: String, last: String },
401}
402
403impl ReferenceType {
404    /// Build a cell reference with relative anchors.
405    pub fn cell(sheet: Option<String>, row: u32, col: u32) -> Self {
406        Self::Cell {
407            sheet,
408            row,
409            col,
410            row_abs: false,
411            col_abs: false,
412        }
413    }
414
415    /// Build a cell reference with explicit anchors.
416    pub fn cell_with_abs(
417        sheet: Option<String>,
418        row: u32,
419        col: u32,
420        row_abs: bool,
421        col_abs: bool,
422    ) -> Self {
423        Self::Cell {
424            sheet,
425            row,
426            col,
427            row_abs,
428            col_abs,
429        }
430    }
431
432    /// Build a range reference with relative anchors.
433    pub fn range(
434        sheet: Option<String>,
435        start_row: Option<u32>,
436        start_col: Option<u32>,
437        end_row: Option<u32>,
438        end_col: Option<u32>,
439    ) -> Self {
440        Self::Range {
441            sheet,
442            start_row,
443            start_col,
444            end_row,
445            end_col,
446            start_row_abs: false,
447            start_col_abs: false,
448            end_row_abs: false,
449            end_col_abs: false,
450        }
451    }
452
453    /// Build a range reference with explicit anchors.
454    // Constructor-style helper mirroring the enum fields.
455    // Keeping the signature explicit makes callers easier to read.
456    #[allow(clippy::too_many_arguments)]
457    pub fn range_with_abs(
458        sheet: Option<String>,
459        start_row: Option<u32>,
460        start_col: Option<u32>,
461        end_row: Option<u32>,
462        end_col: Option<u32>,
463        start_row_abs: bool,
464        start_col_abs: bool,
465        end_row_abs: bool,
466        end_col_abs: bool,
467    ) -> Self {
468        Self::Range {
469            sheet,
470            start_row,
471            start_col,
472            end_row,
473            end_col,
474            start_row_abs,
475            start_col_abs,
476            end_row_abs,
477            end_col_abs,
478        }
479    }
480
481    /// Create a reference from a string. Can be A1, A:A, A1:B2, Table1[Column], etc.
482    pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
483        Self::parse_excel_reference(reference)
484    }
485
486    /// Create a reference from a string using the specified formula dialect.
487    pub fn from_string_with_dialect(
488        reference: &str,
489        dialect: FormulaDialect,
490    ) -> Result<Self, ParsingError> {
491        match dialect {
492            FormulaDialect::Excel => Self::parse_excel_reference(reference),
493            FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
494                .or_else(|_| Self::parse_excel_reference(reference)),
495        }
496    }
497
498    /// Parse a grid reference into a shared SheetRef, preserving $ anchors.
499    ///
500    /// Only cell and range references are supported. Table and named ranges return an error.
501    pub fn parse_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
502        Self::parse_sheet_ref_with_dialect(reference, FormulaDialect::Excel)
503    }
504
505    /// Parse a grid reference into a shared SheetRef using the specified dialect.
506    pub fn parse_sheet_ref_with_dialect(
507        reference: &str,
508        dialect: FormulaDialect,
509    ) -> Result<SheetRef<'static>, ParsingError> {
510        match dialect {
511            FormulaDialect::Excel => Self::parse_excel_sheet_ref(reference),
512            FormulaDialect::OpenFormula => Self::parse_openformula_sheet_ref(reference)
513                .or_else(|_| Self::parse_excel_sheet_ref(reference)),
514        }
515    }
516
517    /// Lossy conversion from parsed ReferenceType into SheetRef.
518    /// External, table, and named ranges are discarded; anchors are preserved.
519    pub fn to_sheet_ref_lossy(&self) -> Option<SheetRef<'_>> {
520        match self {
521            ReferenceType::Cell {
522                sheet,
523                row,
524                col,
525                row_abs,
526                col_abs,
527            } => {
528                let row0 = row.checked_sub(1)?;
529                let col0 = col.checked_sub(1)?;
530                let sheet_loc = match sheet.as_deref() {
531                    Some(name) => SheetLocator::from_name(name),
532                    None => SheetLocator::Current,
533                };
534                let coord = RelativeCoord::new(row0, col0, *row_abs, *col_abs);
535                Some(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
536            }
537            ReferenceType::Range {
538                sheet,
539                start_row,
540                start_col,
541                end_row,
542                end_col,
543                start_row_abs,
544                start_col_abs,
545                end_row_abs,
546                end_col_abs,
547            } => {
548                let sheet_loc = match sheet.as_deref() {
549                    Some(name) => SheetLocator::from_name(name),
550                    None => SheetLocator::Current,
551                };
552                let sr = start_row
553                    .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_row_abs)));
554                if start_row.is_some() && sr.is_none() {
555                    return None;
556                }
557                let sc = start_col
558                    .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_col_abs)));
559                if start_col.is_some() && sc.is_none() {
560                    return None;
561                }
562                let er =
563                    end_row.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_row_abs)));
564                if end_row.is_some() && er.is_none() {
565                    return None;
566                }
567                let ec =
568                    end_col.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_col_abs)));
569                if end_col.is_some() && ec.is_none() {
570                    return None;
571                }
572                let range = SheetRangeRef::from_parts(sheet_loc, sr, sc, er, ec).ok()?;
573                Some(SheetRef::Range(range))
574            }
575            _ => None,
576        }
577    }
578
579    fn parse_excel_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
580        let (spec, ref_part) = Self::extract_sheet_spec(reference);
581        if matches!(spec, SheetSpec::Range { .. }) {
582            return Err(ParsingError::InvalidReference(
583                "3D references are not supported for SheetRef".to_string(),
584            ));
585        }
586        let sheet = match spec {
587            SheetSpec::None => None,
588            SheetSpec::Single(name) => Some(name),
589            SheetSpec::Range { .. } => unreachable!(),
590        };
591
592        if ref_part.contains('[') {
593            return Err(ParsingError::InvalidReference(
594                "Table references are not supported for SheetRef".to_string(),
595            ));
596        }
597
598        let sheet_loc: SheetLocator<'static> = match sheet {
599            Some(name) => SheetLocator::from_name(name),
600            None => SheetLocator::Current,
601        };
602
603        if ref_part.contains(':') {
604            let mut parts = ref_part.splitn(2, ':');
605            let start = parts.next().unwrap();
606            let end = parts.next().ok_or_else(|| {
607                ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
608            })?;
609
610            let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
611            let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
612
613            let start_col = Self::axis_bound_from_1based(start_col)?;
614            let start_row = Self::axis_bound_from_1based(start_row)?;
615            let end_col = Self::axis_bound_from_1based(end_col)?;
616            let end_row = Self::axis_bound_from_1based(end_row)?;
617
618            let range =
619                SheetRangeRef::from_parts(sheet_loc, start_row, start_col, end_row, end_col)
620                    .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
621            Ok(SheetRef::Range(range))
622        } else {
623            let (row, col, row_abs, col_abs) = parse_a1_1based(&ref_part)
624                .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
625            let coord = RelativeCoord::new(row - 1, col - 1, row_abs, col_abs);
626            Ok(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
627        }
628    }
629
630    fn parse_openformula_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
631        Self::parse_excel_sheet_ref(reference)
632    }
633
634    fn axis_bound_from_1based(
635        bound: Option<(u32, bool)>,
636    ) -> Result<Option<AxisBound>, ParsingError> {
637        match bound {
638            Some((index, abs)) => AxisBound::from_excel_1based(index, abs)
639                .map(Some)
640                .map_err(|err| ParsingError::InvalidReference(err.to_string())),
641            None => Ok(None),
642        }
643    }
644
645    fn parse_range_part_with_abs(part: &str) -> Result<RangePartWithAbs, ParsingError> {
646        if let Ok((row, col, row_abs, col_abs)) = parse_a1_1based(part) {
647            return Ok((Some((col, col_abs)), Some((row, row_abs))));
648        }
649
650        let bytes = part.as_bytes();
651        let len = bytes.len();
652        let mut i = 0usize;
653
654        let mut col_abs = false;
655        let mut row_abs = false;
656
657        if i < len && bytes[i] == b'$' {
658            col_abs = true;
659            i += 1;
660        }
661
662        let col_start = i;
663        while i < len && bytes[i].is_ascii_alphabetic() {
664            i += 1;
665        }
666
667        if i > col_start {
668            let col_str = &part[col_start..i];
669            let col1 = Self::column_to_number(col_str)?;
670
671            if i == len {
672                return Ok((Some((col1, col_abs)), None));
673            }
674
675            if i < len && bytes[i] == b'$' {
676                row_abs = true;
677                i += 1;
678            }
679
680            if i >= len {
681                return Err(ParsingError::InvalidReference(format!(
682                    "Invalid range part: {part}"
683                )));
684            }
685
686            let row_start = i;
687            while i < len && bytes[i].is_ascii_digit() {
688                i += 1;
689            }
690
691            if row_start == i || i != len {
692                return Err(ParsingError::InvalidReference(format!(
693                    "Invalid range part: {part}"
694                )));
695            }
696
697            let row_str = &part[row_start..i];
698            let row1 = row_str
699                .parse::<u32>()
700                .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
701            if row1 == 0 {
702                return Err(ParsingError::InvalidReference(format!(
703                    "Invalid range part: {part}"
704                )));
705            }
706
707            return Ok((Some((col1, col_abs)), Some((row1, row_abs))));
708        }
709
710        i = 0;
711        if i < len && bytes[i] == b'$' {
712            row_abs = true;
713            i += 1;
714        }
715
716        let row_start = i;
717        while i < len && bytes[i].is_ascii_digit() {
718            i += 1;
719        }
720
721        if row_start == i || i != len {
722            return Err(ParsingError::InvalidReference(format!(
723                "Invalid range part: {part}"
724            )));
725        }
726
727        let row_str = &part[row_start..i];
728        let row1 = row_str
729            .parse::<u32>()
730            .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
731        if row1 == 0 {
732            return Err(ParsingError::InvalidReference(format!(
733                "Invalid range part: {part}"
734            )));
735        }
736
737        Ok((None, Some((row1, row_abs))))
738    }
739
740    fn parse_3d_reference(first: &str, last: &str, ref_part: &str) -> Result<Self, ParsingError> {
741        if first.is_empty() || last.is_empty() {
742            return Err(ParsingError::InvalidReference(format!(
743                "3D reference requires two sheet names: {first}:{last}!{ref_part}"
744            )));
745        }
746        if ref_part.is_empty() {
747            return Err(ParsingError::InvalidReference(format!(
748                "3D reference {first}:{last}! is missing a cell or range"
749            )));
750        }
751        // 3D refs cannot point at structured table tokens.
752        if ref_part.contains('[') {
753            return Err(ParsingError::InvalidReference(format!(
754                "3D reference {first}:{last}!{ref_part} cannot target a table"
755            )));
756        }
757
758        if ref_part.contains(':') {
759            let mut parts = ref_part.splitn(2, ':');
760            let start = parts.next().unwrap();
761            let end = parts.next().ok_or_else(|| {
762                ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
763            })?;
764            let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
765            let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
766
767            let split = |bound: Option<(u32, bool)>| match bound {
768                Some((index, abs)) => (Some(index), abs),
769                None => (None, false),
770            };
771            let (start_col, start_col_abs) = split(start_col);
772            let (start_row, start_row_abs) = split(start_row);
773            let (end_col, end_col_abs) = split(end_col);
774            let (end_row, end_row_abs) = split(end_row);
775
776            Ok(ReferenceType::Range3D {
777                sheet_first: first.to_string(),
778                sheet_last: last.to_string(),
779                start_row,
780                start_col,
781                end_row,
782                end_col,
783                start_row_abs,
784                start_col_abs,
785                end_row_abs,
786                end_col_abs,
787            })
788        } else {
789            let (col, row, col_abs, row_abs) =
790                Self::parse_cell_reference(ref_part).map_err(|_| {
791                    ParsingError::InvalidReference(format!(
792                        "Invalid 3D reference target: {ref_part}"
793                    ))
794                })?;
795            Ok(ReferenceType::Cell3D {
796                sheet_first: first.to_string(),
797                sheet_last: last.to_string(),
798                row,
799                col,
800                row_abs,
801                col_abs,
802            })
803        }
804    }
805
806    fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
807        // Excel structured reference shorthands that appear as a single bracketed token.
808        //
809        // We use these forms to avoid ambiguity with cell refs / named ranges:
810        // - `[TableName]` resolves to the table's data body (equivalent to `TableName[#Data]`).
811        // - `[@Column]` / `[@[Column Name]]` is a "This Row" selector; it requires table-aware
812        //   context during resolution and will be rewritten by the evaluator/graph builder.
813        if reference.starts_with('[') && reference.ends_with(']') && !reference.contains('!') {
814            return Self::parse_bracketed_structured_reference(reference);
815        }
816
817        // Extract sheet specification (none / single / 3D range) if present.
818        let (sheet_spec, ref_part) = Self::extract_sheet_spec(reference);
819
820        // 3D references (`Sheet1:Sheet3!A1` / `Sheet1:Sheet3!A1:B2`) take a
821        // dedicated path because they cannot reuse the 2D Cell/Range carriers.
822        if let SheetSpec::Range { first, last, .. } = &sheet_spec {
823            return Self::parse_3d_reference(first, last, &ref_part);
824        }
825
826        let sheet = match sheet_spec {
827            SheetSpec::None => None,
828            SheetSpec::Single(name) => Some(name),
829            // Already handled above.
830            SheetSpec::Range { .. } => unreachable!(),
831        };
832
833        // Table references live in the ref_part (e.g., "Table1[Column]").
834        // Sheet names can contain '[' for external workbook refs (e.g., "[1]Sheet1!A1").
835        if ref_part.contains('[') {
836            // Issue #76: R1C1-shaped operands like `R[1]C[2]`, `R1C[2]`, `RC[1]`
837            // contain `[` but are not table references. Without this gate they
838            // either misclassify as `Table { name: "R1C", specifier: Column("2") }`
839            // or get rejected by the structured-references trailing-garbage check.
840            // We don't add an R1C1 dialect; we just refuse to fabricate a table
841            // and fall back to the same `NamedRange` outcome that bracket-free
842            // R1C1 strings (e.g. `R1C1`, `RC`) already produce.
843            if Self::is_r1c1_shape(&ref_part) {
844                return Ok(ReferenceType::NamedRange(reference.to_string()));
845            }
846            return Self::parse_table_reference(&ref_part);
847        }
848
849        let external_sheet = sheet.as_deref().and_then(|s| {
850            // Excel external workbook refs embed a "[...]" token inside the sheet segment.
851            // Use the last '[' to allow paths/URIs that may contain earlier brackets, then
852            // take the first ']' after it to avoid being confused by ']' in the sheet name.
853            let lb = s.rfind('[')?;
854            let rb_rel = s[lb..].find(']')?;
855            let rb = lb + rb_rel;
856            if lb >= rb {
857                return None;
858            }
859
860            let token = &s[..=rb];
861            let sheet_name = &s[rb + 1..];
862            if sheet_name.is_empty() {
863                None
864            } else {
865                Some((token, sheet_name))
866            }
867        });
868
869        if ref_part.contains(':') {
870            // Range reference
871            let mut parts = ref_part.splitn(2, ':');
872            let start = parts.next().unwrap();
873            let end = parts.next().ok_or_else(|| {
874                ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
875            })?;
876            let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
877            let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
878
879            let split = |bound: Option<(u32, bool)>| match bound {
880                Some((index, abs)) => (Some(index), abs),
881                None => (None, false),
882            };
883            let (start_col, start_col_abs) = split(start_col);
884            let (start_row, start_row_abs) = split(start_row);
885            let (end_col, end_col_abs) = split(end_col);
886            let (end_row, end_row_abs) = split(end_row);
887
888            if let Some((book_token, sheet_name)) = external_sheet {
889                Ok(ReferenceType::External(ExternalReference {
890                    raw: reference.to_string(),
891                    book: ExternalBookRef::Token(book_token.to_string()),
892                    sheet: sheet_name.to_string(),
893                    kind: ExternalRefKind::Range {
894                        start_row,
895                        start_col,
896                        end_row,
897                        end_col,
898                        start_row_abs,
899                        start_col_abs,
900                        end_row_abs,
901                        end_col_abs,
902                    },
903                }))
904            } else {
905                Ok(ReferenceType::Range {
906                    sheet,
907                    start_row,
908                    start_col,
909                    end_row,
910                    end_col,
911                    start_row_abs,
912                    start_col_abs,
913                    end_row_abs,
914                    end_col_abs,
915                })
916            }
917        } else {
918            // Try to parse as a single cell reference
919            match Self::parse_cell_reference(&ref_part) {
920                Ok((col, row, col_abs, row_abs)) => {
921                    if let Some((book_token, sheet_name)) = external_sheet {
922                        Ok(ReferenceType::External(ExternalReference {
923                            raw: reference.to_string(),
924                            book: ExternalBookRef::Token(book_token.to_string()),
925                            sheet: sheet_name.to_string(),
926                            kind: ExternalRefKind::Cell {
927                                row,
928                                col,
929                                row_abs,
930                                col_abs,
931                            },
932                        }))
933                    } else {
934                        Ok(ReferenceType::Cell {
935                            sheet,
936                            row,
937                            col,
938                            row_abs,
939                            col_abs,
940                        })
941                    }
942                }
943                Err(_) => {
944                    // Treat it as a named range
945                    Ok(ReferenceType::NamedRange(reference.to_string()))
946                }
947            }
948        }
949    }
950
951    /// Parse a cell reference like "A1" into (column, row) using byte-based parsing.
952    fn parse_cell_reference(reference: &str) -> Result<(u32, u32, bool, bool), ParsingError> {
953        parse_a1_1based(reference)
954            .map(|(row, col, row_abs, col_abs)| (col, row, col_abs, row_abs))
955            .map_err(|_| {
956                ParsingError::InvalidReference(format!("Invalid cell reference: {reference}"))
957            })
958    }
959
960    /// Convert a column letter (e.g., "A", "BC") to a column number (1-based) using byte operations.
961    pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
962        col_index_from_letters_1based(column)
963            .map_err(|_| ParsingError::InvalidReference(format!("Invalid column: {column}")))
964    }
965
966    /// Convert a column number to a column letter using lookup table for common values.
967    pub(crate) fn number_to_column(num: u32) -> String {
968        if num == 0 {
969            return String::new();
970        }
971        // Use lookup table for common columns (1-702 covers A-ZZ)
972        if num > 0 && num <= 702 {
973            return COLUMN_LOOKUP[(num - 1) as usize].clone();
974        }
975
976        col_letters_from_1based(num).unwrap_or_default()
977    }
978
979    fn format_col(col: u32, abs: bool) -> String {
980        if abs {
981            format!("${}", Self::number_to_column(col))
982        } else {
983            Self::number_to_column(col)
984        }
985    }
986
987    fn format_row(row: u32, abs: bool) -> String {
988        if abs {
989            format!("${row}")
990        } else {
991            row.to_string()
992        }
993    }
994}
995
996impl Display for ReferenceType {
997    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
998        write!(
999            f,
1000            "{}",
1001            match self {
1002                ReferenceType::Cell {
1003                    sheet,
1004                    row,
1005                    col,
1006                    row_abs,
1007                    col_abs,
1008                } => {
1009                    let col_str = Self::format_col(*col, *col_abs);
1010                    let row_str = Self::format_row(*row, *row_abs);
1011
1012                    if let Some(sheet_name) = sheet {
1013                        if sheet_name_needs_quoting(sheet_name) {
1014                            // Escape any single quotes in the sheet name by doubling them
1015                            let escaped_name = sheet_name.replace('\'', "''");
1016                            format!("'{escaped_name}'!{col_str}{row_str}")
1017                        } else {
1018                            format!("{sheet_name}!{col_str}{row_str}")
1019                        }
1020                    } else {
1021                        format!("{col_str}{row_str}")
1022                    }
1023                }
1024                ReferenceType::Range {
1025                    sheet,
1026                    start_row,
1027                    start_col,
1028                    end_row,
1029                    end_col,
1030                    start_row_abs,
1031                    start_col_abs,
1032                    end_row_abs,
1033                    end_col_abs,
1034                } => {
1035                    // Format start reference
1036                    let start_ref = match (start_col, start_row) {
1037                        (Some(col), Some(row)) => format!(
1038                            "{}{}",
1039                            Self::format_col(*col, *start_col_abs),
1040                            Self::format_row(*row, *start_row_abs)
1041                        ),
1042                        (Some(col), None) => Self::format_col(*col, *start_col_abs),
1043                        (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1044                        (None, None) => "".to_string(), // Should not happen in normal usage
1045                    };
1046
1047                    // Format end reference
1048                    let end_ref = match (end_col, end_row) {
1049                        (Some(col), Some(row)) => format!(
1050                            "{}{}",
1051                            Self::format_col(*col, *end_col_abs),
1052                            Self::format_row(*row, *end_row_abs)
1053                        ),
1054                        (Some(col), None) => Self::format_col(*col, *end_col_abs),
1055                        (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1056                        (None, None) => "".to_string(), // Should not happen in normal usage
1057                    };
1058
1059                    let range_part = format!("{start_ref}:{end_ref}");
1060
1061                    if let Some(sheet_name) = sheet {
1062                        if sheet_name_needs_quoting(sheet_name) {
1063                            // Escape any single quotes in the sheet name by doubling them
1064                            let escaped_name = sheet_name.replace('\'', "''");
1065                            format!("'{escaped_name}'!{range_part}")
1066                        } else {
1067                            format!("{sheet_name}!{range_part}")
1068                        }
1069                    } else {
1070                        range_part
1071                    }
1072                }
1073                ReferenceType::Cell3D {
1074                    sheet_first,
1075                    sheet_last,
1076                    row,
1077                    col,
1078                    row_abs,
1079                    col_abs,
1080                } => {
1081                    let col_str = Self::format_col(*col, *col_abs);
1082                    let row_str = Self::format_row(*row, *row_abs);
1083                    let prefix = format_3d_sheet_prefix(sheet_first, sheet_last);
1084                    format!("{prefix}!{col_str}{row_str}")
1085                }
1086                ReferenceType::Range3D {
1087                    sheet_first,
1088                    sheet_last,
1089                    start_row,
1090                    start_col,
1091                    end_row,
1092                    end_col,
1093                    start_row_abs,
1094                    start_col_abs,
1095                    end_row_abs,
1096                    end_col_abs,
1097                } => {
1098                    let start_ref = match (start_col, start_row) {
1099                        (Some(col), Some(row)) => format!(
1100                            "{}{}",
1101                            Self::format_col(*col, *start_col_abs),
1102                            Self::format_row(*row, *start_row_abs)
1103                        ),
1104                        (Some(col), None) => Self::format_col(*col, *start_col_abs),
1105                        (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1106                        (None, None) => "".to_string(),
1107                    };
1108                    let end_ref = match (end_col, end_row) {
1109                        (Some(col), Some(row)) => format!(
1110                            "{}{}",
1111                            Self::format_col(*col, *end_col_abs),
1112                            Self::format_row(*row, *end_row_abs)
1113                        ),
1114                        (Some(col), None) => Self::format_col(*col, *end_col_abs),
1115                        (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1116                        (None, None) => "".to_string(),
1117                    };
1118                    let range_part = format!("{start_ref}:{end_ref}");
1119                    let prefix = format_3d_sheet_prefix(sheet_first, sheet_last);
1120                    format!("{prefix}!{range_part}")
1121                }
1122                ReferenceType::External(ext) => ext.raw.clone(),
1123                ReferenceType::Table(table_ref) => {
1124                    if let Some(specifier) = &table_ref.specifier {
1125                        // For table references, we need to handle column specifiers specially
1126                        // to remove leading/trailing whitespace
1127                        match specifier {
1128                            TableSpecifier::Column(column) => {
1129                                format!("{}[{}]", table_ref.name, column.trim())
1130                            }
1131                            TableSpecifier::ColumnRange(start, end) => {
1132                                format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
1133                            }
1134                            _ => {
1135                                // For other specifiers, use the standard formatting
1136                                format!("{}[{}]", table_ref.name, specifier)
1137                            }
1138                        }
1139                    } else {
1140                        table_ref.name.clone()
1141                    }
1142                }
1143                ReferenceType::NamedRange(name) => name.clone(),
1144            }
1145        )
1146    }
1147}
1148
1149/// Render the `Sheet1:SheetN` portion of a 3D reference. Either side may
1150/// require quoting independently; quoting one side does not force the other
1151/// to be quoted, matching Excel's behaviour.
1152fn format_3d_sheet_prefix(first: &str, last: &str) -> String {
1153    let format_one = |name: &str| -> String {
1154        if sheet_name_needs_quoting(name) {
1155            let escaped = name.replace('\'', "''");
1156            format!("'{escaped}'")
1157        } else {
1158            name.to_string()
1159        }
1160    };
1161    format!("{}:{}", format_one(first), format_one(last))
1162}
1163
1164impl TryFrom<&str> for ReferenceType {
1165    type Error = ParsingError;
1166
1167    fn try_from(value: &str) -> Result<Self, Self::Error> {
1168        ReferenceType::from_string(value)
1169    }
1170}
1171
1172impl FromStr for ReferenceType {
1173    type Err = ParsingError;
1174
1175    fn from_str(s: &str) -> Result<Self, Self::Err> {
1176        ReferenceType::from_string(s)
1177    }
1178}
1179
1180impl ReferenceType {
1181    /// Normalise the reference string (convert to canonical form)
1182    pub fn normalise(&self) -> String {
1183        format!("{self}")
1184    }
1185
1186    /// Read one sheet-name segment starting at `start`. Returns the parsed
1187    /// (unescaped) name, the byte offset directly after the closing quote
1188    /// (when quoted) or the last alphanumeric byte (when bare), and a flag
1189    /// indicating whether the segment was quoted.
1190    fn read_sheet_segment(reference: &str, start: usize) -> Option<(String, usize, bool)> {
1191        let bytes = reference.as_bytes();
1192        if start >= bytes.len() {
1193            return None;
1194        }
1195
1196        if bytes[start] == b'\'' {
1197            // Quoted segment. Excel doubles a literal `'` inside the name.
1198            let mut i = start + 1;
1199            let body_start = i;
1200            while i < bytes.len() {
1201                if bytes[i] == b'\'' {
1202                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
1203                        i += 2;
1204                        continue;
1205                    }
1206                    let raw = &reference[body_start..i];
1207                    let name = raw.replace("''", "'");
1208                    return Some((name, i + 1, true));
1209                }
1210                i += 1;
1211            }
1212            None
1213        } else {
1214            // Bare segment. Sheet names cannot contain ':', '!', '\'', or any
1215            // ASCII-whitespace/operator characters in unquoted form.
1216            let mut i = start;
1217            while i < bytes.len() {
1218                let b = bytes[i];
1219                match b {
1220                    b':' | b'!' | b'\'' | b' ' | b'\t' | b'\n' | b'\r' => break,
1221                    _ => i += 1,
1222                }
1223            }
1224            if i == start {
1225                None
1226            } else {
1227                Some((reference[start..i].to_string(), i, false))
1228            }
1229        }
1230    }
1231
1232    /// Extract sheet specification (none, single sheet, or 3D sheet range)
1233    /// from a reference string.
1234    fn extract_sheet_spec(reference: &str) -> (SheetSpec, String) {
1235        let Some((first_name, after_first, first_quoted)) = Self::read_sheet_segment(reference, 0)
1236        else {
1237            // No sheet segment recognised – fall back to looking for a bare
1238            // `!` separator (e.g. external book tokens such as `[1]Sheet!A1`).
1239            return Self::extract_sheet_spec_fallback(reference);
1240        };
1241        let _ = first_quoted;
1242
1243        let bytes = reference.as_bytes();
1244
1245        // 3D form: Name1:Name2!...
1246        if after_first < bytes.len() && bytes[after_first] == b':' {
1247            let second_start = after_first + 1;
1248            if let Some((second_name, after_second, _)) =
1249                Self::read_sheet_segment(reference, second_start)
1250                && after_second < bytes.len()
1251                && bytes[after_second] == b'!'
1252            {
1253                let ref_part = reference[after_second + 1..].to_string();
1254                return (
1255                    SheetSpec::Range {
1256                        first: first_name,
1257                        last: second_name,
1258                    },
1259                    ref_part,
1260                );
1261            }
1262
1263            // The reference looks like the start of a 3D ref but the second
1264            // segment is malformed (e.g. `Sheet1:!A1`). Surface the broken
1265            // form as a 3D range with an empty `last` so the parser layer
1266            // can report a precise error rather than silently treating it as
1267            // a sheet name containing `:`.
1268            if second_start < bytes.len() {
1269                if let Some(bang) = reference[second_start..].find('!') {
1270                    let ref_part = reference[second_start + bang + 1..].to_string();
1271                    return (
1272                        SheetSpec::Range {
1273                            first: first_name,
1274                            last: String::new(),
1275                        },
1276                        ref_part,
1277                    );
1278                }
1279            }
1280        }
1281
1282        // Single-sheet form: Name!...
1283        if after_first < bytes.len() && bytes[after_first] == b'!' {
1284            let ref_part = reference[after_first + 1..].to_string();
1285            return (SheetSpec::Single(first_name), ref_part);
1286        }
1287
1288        // The leading segment did not terminate in `!`; treat the whole input
1289        // as if no sheet were present and fall through to the legacy logic.
1290        Self::extract_sheet_spec_fallback(reference)
1291    }
1292
1293    fn extract_sheet_spec_fallback(reference: &str) -> (SheetSpec, String) {
1294        let bytes = reference.as_bytes();
1295        // Handle unquoted sheet names containing characters our segment
1296        // reader rejects (such as bracketed external workbook tokens, e.g.
1297        // `[1]Sheet1!A1`). The original implementation scanned for the first
1298        // `!` after byte 0; preserve that behaviour for compatibility.
1299        let mut i = 0;
1300        while i < bytes.len() {
1301            if bytes[i] == b'!' && i > 0 {
1302                let sheet = reference[..i].to_string();
1303                let ref_part = reference[i + 1..].to_string();
1304                return (SheetSpec::Single(sheet), ref_part);
1305            }
1306            i += 1;
1307        }
1308
1309        (SheetSpec::None, reference.to_string())
1310    }
1311
1312    /// Detect R1C1-shaped operands so they aren't routed through the table-
1313    /// reference parser (issue #76).
1314    ///
1315    /// Matches `^R\d*(\[-?\d+\])?C\d*(\[-?\d+\])?$` and additionally requires
1316    /// the operand to contain at least one digit or bracket so that bare `R`,
1317    /// `C`, and `RC` (which already classify cleanly as `NamedRange` via the
1318    /// non-bracket path) are not pulled in here. Plain A1 cells like `R1`,
1319    /// `C5`, and `RC1` never reach this function because they don't contain
1320    /// `[` and are handled by the cell-reference path.
1321    fn is_r1c1_shape(s: &str) -> bool {
1322        let bytes = s.as_bytes();
1323        let len = bytes.len();
1324        let mut i = 0usize;
1325        let mut anchored = false;
1326
1327        if i >= len || bytes[i] != b'R' {
1328            return false;
1329        }
1330        i += 1;
1331
1332        let row_digits_start = i;
1333        while i < len && bytes[i].is_ascii_digit() {
1334            i += 1;
1335        }
1336        if i > row_digits_start {
1337            anchored = true;
1338        }
1339
1340        if i < len && bytes[i] == b'[' {
1341            i += 1;
1342            if i < len && bytes[i] == b'-' {
1343                i += 1;
1344            }
1345            let n_start = i;
1346            while i < len && bytes[i].is_ascii_digit() {
1347                i += 1;
1348            }
1349            if i == n_start || i >= len || bytes[i] != b']' {
1350                return false;
1351            }
1352            i += 1;
1353            anchored = true;
1354        }
1355
1356        if i >= len || bytes[i] != b'C' {
1357            return false;
1358        }
1359        i += 1;
1360
1361        let col_digits_start = i;
1362        while i < len && bytes[i].is_ascii_digit() {
1363            i += 1;
1364        }
1365        if i > col_digits_start {
1366            anchored = true;
1367        }
1368
1369        if i < len && bytes[i] == b'[' {
1370            i += 1;
1371            if i < len && bytes[i] == b'-' {
1372                i += 1;
1373            }
1374            let n_start = i;
1375            while i < len && bytes[i].is_ascii_digit() {
1376                i += 1;
1377            }
1378            if i == n_start || i >= len || bytes[i] != b']' {
1379                return false;
1380            }
1381            i += 1;
1382            anchored = true;
1383        }
1384
1385        i == len && anchored
1386    }
1387
1388    /// Parse a table reference like "Table1[Column1]" or more complex ones
1389    /// like "Table1[[#All],[Column1]:[Column2]]".
1390    ///
1391    /// The specifier syntax is parsed by a real recursive-descent parser
1392    /// (`structured_ref::SpecifierParser`) following MS-XLSX §18.17.6.2.
1393    fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
1394        let bracket_pos = reference.find('[').ok_or_else(|| {
1395            ParsingError::InvalidReference(format!("Missing '[' in table reference: {reference}"))
1396        })?;
1397        let table_name = reference[..bracket_pos].trim();
1398        if table_name.is_empty() {
1399            return Err(ParsingError::InvalidReference(reference.to_string()));
1400        }
1401
1402        let specifier_str = &reference[bracket_pos..];
1403        let specifier = structured_ref::parse_full_specifier(specifier_str)?;
1404
1405        Ok(ReferenceType::Table(TableReference {
1406            name: table_name.to_string(),
1407            specifier,
1408        }))
1409    }
1410
1411    /// Handle the `[...]` shorthand that appears without a table name. The
1412    /// resolver/evaluator binds the implicit table from cell context.
1413    ///
1414    /// `[TableName]` is the data-body shorthand and is materialised as
1415    /// `Table { name = "TableName", specifier = #Data }`; everything else
1416    /// produces an unnamed `Table` carrying the parsed specifier verbatim.
1417    fn parse_bracketed_structured_reference(reference: &str) -> Result<Self, ParsingError> {
1418        debug_assert!(reference.starts_with('[') && reference.ends_with(']'));
1419        let specifier = structured_ref::parse_full_specifier(reference)?;
1420
1421        match specifier {
1422            Some(TableSpecifier::Column(name)) => Ok(ReferenceType::Table(TableReference {
1423                name,
1424                specifier: Some(TableSpecifier::SpecialItem(SpecialItem::Data)),
1425            })),
1426            other => Ok(ReferenceType::Table(TableReference {
1427                name: String::new(),
1428                specifier: other,
1429            })),
1430        }
1431    }
1432
1433    fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
1434        if reference.starts_with('[') && reference.ends_with(']') {
1435            let inner = &reference[1..reference.len() - 1];
1436            if inner.is_empty() {
1437                return Err(ParsingError::InvalidReference(
1438                    "Empty OpenFormula reference".to_string(),
1439                ));
1440            }
1441
1442            let mut parts = inner.splitn(2, ':');
1443            let start_part_str = parts.next().unwrap();
1444            let end_part_str = parts.next();
1445
1446            let start_part = Self::parse_openformula_part(start_part_str)?;
1447            let end_part = if let Some(part) = end_part_str {
1448                Some(Self::parse_openformula_part(part)?)
1449            } else {
1450                None
1451            };
1452
1453            let sheet = match (&start_part.sheet, &end_part) {
1454                (Some(sheet), Some(end)) => {
1455                    if let Some(end_sheet) = &end.sheet {
1456                        if end_sheet != sheet {
1457                            return Err(ParsingError::InvalidReference(format!(
1458                                "Mismatched sheets in reference: {sheet} vs {end_sheet}"
1459                            )));
1460                        }
1461                    }
1462                    Some(sheet.clone())
1463                }
1464                (Some(sheet), None) => Some(sheet.clone()),
1465                (None, Some(end)) => end.sheet.clone(),
1466                (None, None) => None,
1467            };
1468
1469            let mut excel_like = String::new();
1470            if let Some(sheet_name) = sheet {
1471                if sheet_name_needs_quoting(&sheet_name) {
1472                    let escaped = sheet_name.replace('\'', "''");
1473                    excel_like.push('\'');
1474                    excel_like.push_str(&escaped);
1475                    excel_like.push('\'');
1476                } else {
1477                    excel_like.push_str(&sheet_name);
1478                }
1479                excel_like.push('!');
1480            }
1481
1482            excel_like.push_str(&start_part.coord);
1483            if let Some(end) = end_part {
1484                excel_like.push(':');
1485                excel_like.push_str(&end.coord);
1486            }
1487
1488            return Self::parse_excel_reference(&excel_like);
1489        }
1490
1491        Err(ParsingError::InvalidReference(format!(
1492            "Unsupported OpenFormula reference: {reference}"
1493        )))
1494    }
1495
1496    fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
1497        let trimmed = part.trim();
1498        if trimmed.is_empty() {
1499            return Err(ParsingError::InvalidReference(
1500                "Empty component in OpenFormula reference".to_string(),
1501            ));
1502        }
1503
1504        if trimmed == "." {
1505            return Err(ParsingError::InvalidReference(
1506                "Incomplete OpenFormula reference component".to_string(),
1507            ));
1508        }
1509
1510        if trimmed.starts_with('[') {
1511            // Nested brackets are not expected here
1512            return Err(ParsingError::InvalidReference(format!(
1513                "Unexpected '[' in OpenFormula reference component: {trimmed}"
1514            )));
1515        }
1516
1517        let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
1518            (None, stripped.trim())
1519        } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
1520            let sheet_part = trimmed[..dot_idx].trim();
1521            let coord_part = trimmed[dot_idx + 1..].trim();
1522            if coord_part.is_empty() {
1523                return Err(ParsingError::InvalidReference(format!(
1524                    "Missing coordinate in OpenFormula reference component: {trimmed}"
1525                )));
1526            }
1527            let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
1528            (Some(sheet_name), coord_part)
1529        } else {
1530            (None, trimmed)
1531        };
1532
1533        let coord = coord_slice.trim_start_matches('.').trim().to_string();
1534
1535        if coord.is_empty() {
1536            return Err(ParsingError::InvalidReference(format!(
1537                "Missing coordinate in OpenFormula reference component: {trimmed}"
1538            )));
1539        }
1540
1541        Ok(OpenFormulaRefPart { sheet, coord })
1542    }
1543
1544    fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
1545        let without_abs = sheet.trim().trim_start_matches('$');
1546
1547        if without_abs.starts_with('\'') {
1548            if without_abs.len() < 2 || !without_abs.ends_with('\'') {
1549                return Err(ParsingError::InvalidReference(format!(
1550                    "Unterminated sheet name in OpenFormula reference: {sheet}"
1551                )));
1552            }
1553            let inner = &without_abs[1..without_abs.len() - 1];
1554            Ok(inner.replace("''", "'"))
1555        } else {
1556            Ok(without_abs.to_string())
1557        }
1558    }
1559
1560    fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
1561        let bytes = part.as_bytes();
1562        let mut i = 0;
1563        let mut in_quotes = false;
1564
1565        while i < bytes.len() {
1566            match bytes[i] {
1567                b'\'' => {
1568                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
1569                        i += 2;
1570                        continue;
1571                    }
1572                    in_quotes = !in_quotes;
1573                    i += 1;
1574                }
1575                b'.' if !in_quotes => return Some(i),
1576                _ => i += 1,
1577            }
1578        }
1579
1580        None
1581    }
1582
1583    // The structured-reference grammar lives in the `structured_ref`
1584    // submodule below; legacy `parse_special_item` /
1585    // `parse_complex_table_specifier` helpers were removed when the real
1586    // recursive-descent parser landed for issue #73.
1587
1588    /// Get the Excel-style string representation of this reference
1589    pub fn to_excel_string(&self) -> String {
1590        match self {
1591            ReferenceType::Cell {
1592                sheet,
1593                row,
1594                col,
1595                row_abs,
1596                col_abs,
1597            } => {
1598                let col_str = Self::format_col(*col, *col_abs);
1599                let row_str = Self::format_row(*row, *row_abs);
1600                if let Some(s) = sheet {
1601                    if sheet_name_needs_quoting(s) {
1602                        let escaped_name = s.replace('\'', "''");
1603                        format!("'{}'!{}{}", escaped_name, col_str, row_str)
1604                    } else {
1605                        format!("{}!{}{}", s, col_str, row_str)
1606                    }
1607                } else {
1608                    format!("{}{}", col_str, row_str)
1609                }
1610            }
1611            ReferenceType::Range {
1612                sheet,
1613                start_row,
1614                start_col,
1615                end_row,
1616                end_col,
1617                start_row_abs,
1618                start_col_abs,
1619                end_row_abs,
1620                end_col_abs,
1621            } => {
1622                // Format start reference
1623                let start_ref = match (start_col, start_row) {
1624                    (Some(col), Some(row)) => format!(
1625                        "{}{}",
1626                        Self::format_col(*col, *start_col_abs),
1627                        Self::format_row(*row, *start_row_abs)
1628                    ),
1629                    (Some(col), None) => Self::format_col(*col, *start_col_abs),
1630                    (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1631                    (None, None) => "".to_string(), // Should not happen in normal usage
1632                };
1633
1634                // Format end reference
1635                let end_ref = match (end_col, end_row) {
1636                    (Some(col), Some(row)) => format!(
1637                        "{}{}",
1638                        Self::format_col(*col, *end_col_abs),
1639                        Self::format_row(*row, *end_row_abs)
1640                    ),
1641                    (Some(col), None) => Self::format_col(*col, *end_col_abs),
1642                    (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1643                    (None, None) => "".to_string(), // Should not happen in normal usage
1644                };
1645
1646                let range_part = format!("{start_ref}:{end_ref}");
1647
1648                if let Some(s) = sheet {
1649                    if sheet_name_needs_quoting(s) {
1650                        let escaped_name = s.replace('\'', "''");
1651                        format!("'{escaped_name}'!{range_part}")
1652                    } else {
1653                        format!("{s}!{range_part}")
1654                    }
1655                } else {
1656                    range_part
1657                }
1658            }
1659            ReferenceType::Cell3D { .. } | ReferenceType::Range3D { .. } => format!("{self}"),
1660            ReferenceType::External(ext) => ext.raw.clone(),
1661            ReferenceType::Table(table_ref) => {
1662                if let Some(specifier) = &table_ref.specifier {
1663                    format!("{}[{}]", table_ref.name, specifier)
1664                } else {
1665                    table_ref.name.clone()
1666                }
1667            }
1668            ReferenceType::NamedRange(name) => name.clone(),
1669        }
1670    }
1671}
1672
1673/// The different types of AST nodes.
1674#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1675#[derive(Debug, Clone, PartialEq, Hash)]
1676pub enum ASTNodeType {
1677    Literal(LiteralValue),
1678    Reference {
1679        original: String, // Original reference string (preserved for display/debugging)
1680        reference: ReferenceType, // Parsed reference
1681    },
1682    UnaryOp {
1683        op: String,
1684        expr: Box<ASTNode>,
1685    },
1686    BinaryOp {
1687        op: String,
1688        left: Box<ASTNode>,
1689        right: Box<ASTNode>,
1690    },
1691    Function {
1692        name: String,
1693        args: Vec<ASTNode>, // Most functions have <= 4 args
1694    },
1695    /// Generic call where the callee is itself an expression that produces
1696    /// a callable value (e.g. LAMBDA immediate-invocation `LAMBDA(x, x+1)(5)`).
1697    Call {
1698        callee: Box<ASTNode>,
1699        args: Vec<ASTNode>,
1700    },
1701    Array(Vec<Vec<ASTNode>>), // Most arrays are small
1702}
1703
1704impl Display for ASTNodeType {
1705    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1706        match self {
1707            ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
1708            ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
1709            ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
1710            ASTNodeType::BinaryOp { op, left, right } => {
1711                write!(f, "BinaryOp({op}, {left}, {right})")
1712            }
1713            ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
1714            ASTNodeType::Call { callee, args } => write!(f, "Call({callee}, {args:?})"),
1715            ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
1716        }
1717    }
1718}
1719
1720/// An AST node represents a parsed formula element
1721#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1722#[derive(Debug, Clone, PartialEq)]
1723pub struct ASTNode {
1724    pub node_type: ASTNodeType,
1725    pub source_token: Option<Token>,
1726    /// True if this AST contains any volatile function calls.
1727    ///
1728    /// This is set by the parser when a volatility classifier is provided.
1729    /// For ASTs constructed manually (e.g., in tests), this defaults to false.
1730    pub contains_volatile: bool,
1731}
1732
1733impl ASTNode {
1734    pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1735        ASTNode {
1736            node_type,
1737            source_token,
1738            contains_volatile: false,
1739        }
1740    }
1741
1742    /// Create an ASTNode while explicitly setting contains_volatile.
1743    pub fn new_with_volatile(
1744        node_type: ASTNodeType,
1745        source_token: Option<Token>,
1746        contains_volatile: bool,
1747    ) -> Self {
1748        ASTNode {
1749            node_type,
1750            source_token,
1751            contains_volatile,
1752        }
1753    }
1754
1755    /// Whether this AST contains any volatile functions.
1756    pub fn contains_volatile(&self) -> bool {
1757        self.contains_volatile
1758    }
1759
1760    pub fn fingerprint(&self) -> u64 {
1761        self.calculate_hash()
1762    }
1763
1764    /// Calculate a hash for this ASTNode
1765    pub fn calculate_hash(&self) -> u64 {
1766        let mut hasher = FormulaHasher::new();
1767        self.hash_node(&mut hasher);
1768        hasher.finish()
1769    }
1770
1771    fn hash_node(&self, hasher: &mut FormulaHasher) {
1772        match &self.node_type {
1773            ASTNodeType::Literal(value) => {
1774                hasher.write(&[1]); // Discriminant for Literal
1775                value.hash(hasher);
1776            }
1777            ASTNodeType::Reference { reference, .. } => {
1778                hasher.write(&[2]); // Discriminant for Reference
1779                reference.hash(hasher);
1780            }
1781            ASTNodeType::UnaryOp { op, expr } => {
1782                hasher.write(&[3]); // Discriminant for UnaryOp
1783                hasher.write(op.as_bytes());
1784                expr.hash_node(hasher);
1785            }
1786            ASTNodeType::BinaryOp { op, left, right } => {
1787                hasher.write(&[4]); // Discriminant for BinaryOp
1788                hasher.write(op.as_bytes());
1789                left.hash_node(hasher);
1790                right.hash_node(hasher);
1791            }
1792            ASTNodeType::Function { name, args } => {
1793                hasher.write(&[5]); // Discriminant for Function
1794                // Use lowercase function name to be case-insensitive
1795                let name_lower = name.to_lowercase();
1796                hasher.write(name_lower.as_bytes());
1797                hasher.write_usize(args.len());
1798                for arg in args {
1799                    arg.hash_node(hasher);
1800                }
1801            }
1802            ASTNodeType::Call { callee, args } => {
1803                hasher.write(&[7]); // Discriminant for Call
1804                callee.hash_node(hasher);
1805                hasher.write_usize(args.len());
1806                for arg in args {
1807                    arg.hash_node(hasher);
1808                }
1809            }
1810            ASTNodeType::Array(rows) => {
1811                hasher.write(&[6]); // Discriminant for Array
1812                hasher.write_usize(rows.len());
1813                for row in rows {
1814                    hasher.write_usize(row.len());
1815                    for item in row {
1816                        item.hash_node(hasher);
1817                    }
1818                }
1819            }
1820        }
1821    }
1822
1823    pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1824        let mut dependencies = Vec::new();
1825        self.collect_dependencies(&mut dependencies);
1826        dependencies
1827    }
1828
1829    pub fn get_dependency_strings(&self) -> Vec<String> {
1830        self.get_dependencies()
1831            .into_iter()
1832            .map(|dep| format!("{dep}"))
1833            .collect()
1834    }
1835
1836    fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1837        match &self.node_type {
1838            ASTNodeType::Reference { reference, .. } => {
1839                dependencies.push(reference);
1840            }
1841            ASTNodeType::UnaryOp { expr, .. } => {
1842                expr.collect_dependencies(dependencies);
1843            }
1844            ASTNodeType::BinaryOp { left, right, .. } => {
1845                left.collect_dependencies(dependencies);
1846                right.collect_dependencies(dependencies);
1847            }
1848            ASTNodeType::Function { args, .. } => {
1849                for arg in args {
1850                    arg.collect_dependencies(dependencies);
1851                }
1852            }
1853            ASTNodeType::Call { callee, args } => {
1854                callee.collect_dependencies(dependencies);
1855                for arg in args {
1856                    arg.collect_dependencies(dependencies);
1857                }
1858            }
1859            ASTNodeType::Array(rows) => {
1860                for row in rows {
1861                    for item in row {
1862                        item.collect_dependencies(dependencies);
1863                    }
1864                }
1865            }
1866            _ => {}
1867        }
1868    }
1869
1870    /// Lightweight borrowed view of a reference encountered during AST traversal.
1871    /// This mirrors ReferenceType variants but borrows sheet/name strings to avoid allocation.
1872    pub fn refs(&self) -> RefIter<'_> {
1873        RefIter {
1874            stack: smallvec::smallvec![self],
1875        }
1876    }
1877
1878    /// Visit all references in this AST without allocating intermediates.
1879    pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1880        let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1881        stack.push(self);
1882        while let Some(node) = stack.pop() {
1883            match &node.node_type {
1884                ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1885                ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1886                ASTNodeType::BinaryOp { left, right, .. } => {
1887                    // Push right first so left is visited first (stable-ish order)
1888                    stack.push(right);
1889                    stack.push(left);
1890                }
1891                ASTNodeType::Function { args, .. } => {
1892                    for a in args.iter().rev() {
1893                        stack.push(a);
1894                    }
1895                }
1896                ASTNodeType::Call { callee, args } => {
1897                    for a in args.iter().rev() {
1898                        stack.push(a);
1899                    }
1900                    stack.push(callee);
1901                }
1902                ASTNodeType::Array(rows) => {
1903                    for r in rows.iter().rev() {
1904                        for item in r.iter().rev() {
1905                            stack.push(item);
1906                        }
1907                    }
1908                }
1909                ASTNodeType::Literal(_) => {}
1910            }
1911        }
1912    }
1913
1914    /// Convenience: collect references into a small, inline vector based on a policy.
1915    pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1916        let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1917        self.visit_refs(|rv| match rv {
1918            RefView::Cell {
1919                sheet,
1920                row,
1921                col,
1922                row_abs,
1923                col_abs,
1924            } => out.push(ReferenceType::Cell {
1925                sheet: sheet.map(|s| s.to_string()),
1926                row,
1927                col,
1928                row_abs,
1929                col_abs,
1930            }),
1931            RefView::Range {
1932                sheet,
1933                start_row,
1934                start_col,
1935                end_row,
1936                end_col,
1937                start_row_abs,
1938                start_col_abs,
1939                end_row_abs,
1940                end_col_abs,
1941            } => {
1942                // Optionally expand very small finite ranges into individual cells
1943                if policy.expand_small_ranges {
1944                    if let (Some(sr), Some(sc), Some(er), Some(ec)) =
1945                        (start_row, start_col, end_row, end_col)
1946                    {
1947                        let rows = er.saturating_sub(sr) + 1;
1948                        let cols = ec.saturating_sub(sc) + 1;
1949                        let area = rows.saturating_mul(cols);
1950                        if area as usize <= policy.range_expansion_limit {
1951                            let row_abs = start_row_abs && end_row_abs;
1952                            let col_abs = start_col_abs && end_col_abs;
1953                            for r in sr..=er {
1954                                for c in sc..=ec {
1955                                    out.push(ReferenceType::Cell {
1956                                        sheet: sheet.map(|s| s.to_string()),
1957                                        row: r,
1958                                        col: c,
1959                                        row_abs,
1960                                        col_abs,
1961                                    });
1962                                }
1963                            }
1964                            return; // handled
1965                        }
1966                    }
1967                }
1968                out.push(ReferenceType::Range {
1969                    sheet: sheet.map(|s| s.to_string()),
1970                    start_row,
1971                    start_col,
1972                    end_row,
1973                    end_col,
1974                    start_row_abs,
1975                    start_col_abs,
1976                    end_row_abs,
1977                    end_col_abs,
1978                });
1979            }
1980            RefView::Cell3D {
1981                sheet_first,
1982                sheet_last,
1983                row,
1984                col,
1985                row_abs,
1986                col_abs,
1987            } => out.push(ReferenceType::Cell3D {
1988                sheet_first: sheet_first.to_string(),
1989                sheet_last: sheet_last.to_string(),
1990                row,
1991                col,
1992                row_abs,
1993                col_abs,
1994            }),
1995            RefView::Range3D {
1996                sheet_first,
1997                sheet_last,
1998                start_row,
1999                start_col,
2000                end_row,
2001                end_col,
2002                start_row_abs,
2003                start_col_abs,
2004                end_row_abs,
2005                end_col_abs,
2006            } => out.push(ReferenceType::Range3D {
2007                sheet_first: sheet_first.to_string(),
2008                sheet_last: sheet_last.to_string(),
2009                start_row,
2010                start_col,
2011                end_row,
2012                end_col,
2013                start_row_abs,
2014                start_col_abs,
2015                end_row_abs,
2016                end_col_abs,
2017            }),
2018            RefView::External {
2019                raw,
2020                book,
2021                sheet,
2022                kind,
2023            } => out.push(ReferenceType::External(ExternalReference {
2024                raw: raw.to_string(),
2025                book: ExternalBookRef::Token(book.to_string()),
2026                sheet: sheet.to_string(),
2027                kind,
2028            })),
2029            RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
2030                name: name.to_string(),
2031                specifier: specifier.cloned(),
2032            })),
2033            RefView::NamedRange { name } => {
2034                if policy.include_names {
2035                    out.push(ReferenceType::NamedRange(name.to_string()));
2036                }
2037            }
2038        });
2039        out
2040    }
2041    /// Recursively updates sheet references within the AST.
2042    ///
2043    /// If `target_name` is provided, only references matching that sheet name are updated.
2044    /// This is used for "healing" specific broken references (Tombstone rescue).
2045    /// If `target_name` is None, it acts as a global rename (standard sheet rename).
2046    pub fn update_sheet_references(&mut self, target_name: Option<&str>, new_name: &str) {
2047        match &mut self.node_type {
2048            ASTNodeType::Reference {
2049                reference: ReferenceType::Cell { sheet, .. } | ReferenceType::Range { sheet, .. },
2050                ..
2051            } => {
2052                if let Some(current_sheet) = sheet
2053                    && (target_name.is_none() || target_name == Some(current_sheet.as_str()))
2054                {
2055                    *sheet = Some(new_name.to_string());
2056                }
2057            }
2058            ASTNodeType::Reference {
2059                reference:
2060                    ReferenceType::Cell3D {
2061                        sheet_first,
2062                        sheet_last,
2063                        ..
2064                    }
2065                    | ReferenceType::Range3D {
2066                        sheet_first,
2067                        sheet_last,
2068                        ..
2069                    },
2070                ..
2071            } => {
2072                if target_name.is_none() || target_name == Some(sheet_first.as_str()) {
2073                    *sheet_first = new_name.to_string();
2074                }
2075                if target_name.is_none() || target_name == Some(sheet_last.as_str()) {
2076                    *sheet_last = new_name.to_string();
2077                }
2078            }
2079            ASTNodeType::UnaryOp { expr, .. } => {
2080                expr.update_sheet_references(target_name, new_name);
2081            }
2082            ASTNodeType::BinaryOp { left, right, .. } => {
2083                left.update_sheet_references(target_name, new_name);
2084                right.update_sheet_references(target_name, new_name);
2085            }
2086            ASTNodeType::Function { args, .. } => {
2087                for arg in args {
2088                    arg.update_sheet_references(target_name, new_name);
2089                }
2090            }
2091            ASTNodeType::Call { callee, args } => {
2092                callee.update_sheet_references(target_name, new_name);
2093                for arg in args {
2094                    arg.update_sheet_references(target_name, new_name);
2095                }
2096            }
2097            ASTNodeType::Array(rows) => {
2098                for row in rows {
2099                    for cell in row {
2100                        cell.update_sheet_references(target_name, new_name);
2101                    }
2102                }
2103            }
2104            _ => {}
2105        }
2106    }
2107}
2108
2109/// A borrowing view over a ReferenceType. Avoids cloning sheet/names while walking.
2110#[derive(Clone, Copy, Debug)]
2111pub enum RefView<'a> {
2112    Cell {
2113        sheet: Option<&'a str>,
2114        row: u32,
2115        col: u32,
2116        row_abs: bool,
2117        col_abs: bool,
2118    },
2119    Range {
2120        sheet: Option<&'a str>,
2121        start_row: Option<u32>,
2122        start_col: Option<u32>,
2123        end_row: Option<u32>,
2124        end_col: Option<u32>,
2125        start_row_abs: bool,
2126        start_col_abs: bool,
2127        end_row_abs: bool,
2128        end_col_abs: bool,
2129    },
2130    /// 3D cell view (`Sheet1:Sheet3!A1`).
2131    Cell3D {
2132        sheet_first: &'a str,
2133        sheet_last: &'a str,
2134        row: u32,
2135        col: u32,
2136        row_abs: bool,
2137        col_abs: bool,
2138    },
2139    /// 3D range view (`Sheet1:Sheet3!A1:B2`).
2140    Range3D {
2141        sheet_first: &'a str,
2142        sheet_last: &'a str,
2143        start_row: Option<u32>,
2144        start_col: Option<u32>,
2145        end_row: Option<u32>,
2146        end_col: Option<u32>,
2147        start_row_abs: bool,
2148        start_col_abs: bool,
2149        end_row_abs: bool,
2150        end_col_abs: bool,
2151    },
2152    External {
2153        raw: &'a str,
2154        book: &'a str,
2155        sheet: &'a str,
2156        kind: ExternalRefKind,
2157    },
2158    Table {
2159        name: &'a str,
2160        specifier: Option<&'a TableSpecifier>,
2161    },
2162    NamedRange {
2163        name: &'a str,
2164    },
2165}
2166
2167impl<'a> From<&'a ReferenceType> for RefView<'a> {
2168    fn from(r: &'a ReferenceType) -> Self {
2169        match r {
2170            ReferenceType::Cell {
2171                sheet,
2172                row,
2173                col,
2174                row_abs,
2175                col_abs,
2176            } => RefView::Cell {
2177                sheet: sheet.as_deref(),
2178                row: *row,
2179                col: *col,
2180                row_abs: *row_abs,
2181                col_abs: *col_abs,
2182            },
2183            ReferenceType::Range {
2184                sheet,
2185                start_row,
2186                start_col,
2187                end_row,
2188                end_col,
2189                start_row_abs,
2190                start_col_abs,
2191                end_row_abs,
2192                end_col_abs,
2193            } => RefView::Range {
2194                sheet: sheet.as_deref(),
2195                start_row: *start_row,
2196                start_col: *start_col,
2197                end_row: *end_row,
2198                end_col: *end_col,
2199                start_row_abs: *start_row_abs,
2200                start_col_abs: *start_col_abs,
2201                end_row_abs: *end_row_abs,
2202                end_col_abs: *end_col_abs,
2203            },
2204            ReferenceType::Cell3D {
2205                sheet_first,
2206                sheet_last,
2207                row,
2208                col,
2209                row_abs,
2210                col_abs,
2211            } => RefView::Cell3D {
2212                sheet_first: sheet_first.as_str(),
2213                sheet_last: sheet_last.as_str(),
2214                row: *row,
2215                col: *col,
2216                row_abs: *row_abs,
2217                col_abs: *col_abs,
2218            },
2219            ReferenceType::Range3D {
2220                sheet_first,
2221                sheet_last,
2222                start_row,
2223                start_col,
2224                end_row,
2225                end_col,
2226                start_row_abs,
2227                start_col_abs,
2228                end_row_abs,
2229                end_col_abs,
2230            } => RefView::Range3D {
2231                sheet_first: sheet_first.as_str(),
2232                sheet_last: sheet_last.as_str(),
2233                start_row: *start_row,
2234                start_col: *start_col,
2235                end_row: *end_row,
2236                end_col: *end_col,
2237                start_row_abs: *start_row_abs,
2238                start_col_abs: *start_col_abs,
2239                end_row_abs: *end_row_abs,
2240                end_col_abs: *end_col_abs,
2241            },
2242            ReferenceType::External(ext) => RefView::External {
2243                raw: ext.raw.as_str(),
2244                book: ext.book.token(),
2245                sheet: ext.sheet.as_str(),
2246                kind: ext.kind,
2247            },
2248            ReferenceType::Table(tr) => RefView::Table {
2249                name: tr.name.as_str(),
2250                specifier: tr.specifier.as_ref(),
2251            },
2252            ReferenceType::NamedRange(name) => RefView::NamedRange { name },
2253        }
2254    }
2255}
2256
2257/// Iterator over RefView for an AST, implemented via an explicit stack to avoid recursion allocation.
2258pub struct RefIter<'a> {
2259    stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
2260}
2261
2262impl<'a> Iterator for RefIter<'a> {
2263    type Item = RefView<'a>;
2264    fn next(&mut self) -> Option<Self::Item> {
2265        while let Some(node) = self.stack.pop() {
2266            match &node.node_type {
2267                ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
2268                ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
2269                ASTNodeType::BinaryOp { left, right, .. } => {
2270                    self.stack.push(right);
2271                    self.stack.push(left);
2272                }
2273                ASTNodeType::Function { args, .. } => {
2274                    for a in args.iter().rev() {
2275                        self.stack.push(a);
2276                    }
2277                }
2278                ASTNodeType::Call { callee, args } => {
2279                    for a in args.iter().rev() {
2280                        self.stack.push(a);
2281                    }
2282                    self.stack.push(callee);
2283                }
2284                ASTNodeType::Array(rows) => {
2285                    for r in rows.iter().rev() {
2286                        for item in r.iter().rev() {
2287                            self.stack.push(item);
2288                        }
2289                    }
2290                }
2291                ASTNodeType::Literal(_) => {}
2292            }
2293        }
2294        None
2295    }
2296}
2297
2298/// Policy controlling how references are collected.
2299#[derive(Debug, Clone)]
2300pub struct CollectPolicy {
2301    pub expand_small_ranges: bool,
2302    pub range_expansion_limit: usize,
2303    pub include_names: bool,
2304}
2305
2306impl Default for CollectPolicy {
2307    fn default() -> Self {
2308        Self {
2309            expand_small_ranges: false,
2310            range_expansion_limit: 0,
2311            include_names: true,
2312        }
2313    }
2314}
2315
2316impl Display for ASTNode {
2317    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2318        write!(f, "{}", self.node_type)
2319    }
2320}
2321
2322impl std::hash::Hash for ASTNode {
2323    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
2324        let hash = self.calculate_hash();
2325        state.write_u64(hash);
2326    }
2327}
2328
2329/// A parser for converting tokens into an AST.
2330pub struct Parser {
2331    tokens: Arc<[Token]>,
2332    position: usize,
2333    /// Optional classifier to determine whether a function name is volatile.
2334    volatility_classifier: Option<VolatilityClassifierBox>,
2335    dialect: FormulaDialect,
2336    /// When > 0, treat a top-level `OpInfix(",")` as a terminator (call-arg
2337    /// separator) instead of the union/list operator. Used by `parse_call_arguments`.
2338    in_call_args_depth: usize,
2339}
2340
2341impl TryFrom<&str> for Parser {
2342    type Error = TokenizerError;
2343
2344    fn try_from(formula: &str) -> Result<Self, Self::Error> {
2345        let tokens = Tokenizer::new(formula)?.items;
2346        Ok(Self::new(tokens, false))
2347    }
2348}
2349
2350impl TryFrom<String> for Parser {
2351    type Error = TokenizerError;
2352
2353    fn try_from(formula: String) -> Result<Self, Self::Error> {
2354        Self::try_from(formula.as_str())
2355    }
2356}
2357
2358impl Parser {
2359    pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
2360        Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
2361    }
2362
2363    pub fn new_with_dialect(
2364        mut tokens: Vec<Token>,
2365        include_whitespace: bool,
2366        dialect: FormulaDialect,
2367    ) -> Self {
2368        if !include_whitespace {
2369            tokens.retain(|t| t.token_type != TokenType::Whitespace);
2370        }
2371
2372        Parser {
2373            tokens: Arc::from(tokens.into_boxed_slice()),
2374            position: 0,
2375            volatility_classifier: None,
2376            dialect,
2377            in_call_args_depth: 0,
2378        }
2379    }
2380
2381    pub fn try_from_formula(formula: &str) -> Result<Self, TokenizerError> {
2382        let tokens = Tokenizer::new(formula)?.items;
2383        Ok(Self::new(tokens, false))
2384    }
2385
2386    /// Provide a function-volatility classifier for this parser.
2387    /// If set, the parser will annotate ASTs with a contains_volatile bit.
2388    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
2389    where
2390        F: Fn(&str) -> bool + Send + Sync + 'static,
2391    {
2392        self.volatility_classifier = Some(Box::new(f));
2393        self
2394    }
2395
2396    /// Convenience constructor to set a classifier alongside other options.
2397    pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
2398    where
2399        F: Fn(&str) -> bool + Send + Sync + 'static,
2400    {
2401        Self::new(tokens, include_whitespace).with_volatility_classifier(f)
2402    }
2403
2404    pub fn new_with_classifier_and_dialect<F>(
2405        tokens: Vec<Token>,
2406        include_whitespace: bool,
2407        dialect: FormulaDialect,
2408        f: F,
2409    ) -> Self
2410    where
2411        F: Fn(&str) -> bool + Send + Sync + 'static,
2412    {
2413        Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
2414    }
2415
2416    fn skip_whitespace(&mut self) {
2417        while self.position < self.tokens.len()
2418            && self.tokens[self.position].token_type == TokenType::Whitespace
2419        {
2420            self.position += 1;
2421        }
2422    }
2423
2424    /// Parse the tokens into an AST.
2425    pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
2426        if self.tokens.is_empty() {
2427            return Err(ParserError {
2428                message: "No tokens to parse".to_string(),
2429                position: None,
2430            });
2431        }
2432
2433        self.skip_whitespace();
2434        if self.position >= self.tokens.len() {
2435            return Err(ParserError {
2436                message: "No tokens to parse".to_string(),
2437                position: None,
2438            });
2439        }
2440
2441        // Check for literal formula (doesn't start with '=')
2442        if self.tokens[self.position].token_type == TokenType::Literal {
2443            let token = self.tokens[self.position].clone();
2444            self.position += 1;
2445            self.skip_whitespace();
2446            if self.position < self.tokens.len() {
2447                return Err(ParserError {
2448                    message: format!(
2449                        "Unexpected token at position {}: {:?}",
2450                        self.position, self.tokens[self.position]
2451                    ),
2452                    position: Some(self.position),
2453                });
2454            }
2455            return Ok(ASTNode::new(
2456                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2457                Some(token),
2458            ));
2459        }
2460
2461        let ast = self.parse_expression()?;
2462        self.skip_whitespace();
2463        if self.position < self.tokens.len() {
2464            return Err(ParserError {
2465                message: format!(
2466                    "Unexpected token at position {}: {:?}",
2467                    self.position, self.tokens[self.position]
2468                ),
2469                position: Some(self.position),
2470            });
2471        }
2472        Ok(ast)
2473    }
2474
2475    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2476        self.parse_bp(0)
2477    }
2478
2479    // Pratt-style precedence parser. `min_precedence` is the minimum binding power
2480    // an operator must have to be consumed at this level.
2481    fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2482        let mut left = self.parse_prefix()?;
2483
2484        loop {
2485            self.skip_whitespace();
2486            if self.position >= self.tokens.len() {
2487                break;
2488            }
2489
2490            // Postfix call: a `(` directly following a closed expression denotes
2491            // immediate invocation of a callable result (e.g. LAMBDA IIFE).
2492            if self.tokens[self.position].token_type == TokenType::Paren
2493                && self.tokens[self.position].subtype == TokenSubType::Open
2494            {
2495                self.position += 1;
2496                let args = self.parse_call_arguments()?;
2497                let call_volatile =
2498                    left.contains_volatile || args.iter().any(|a| a.contains_volatile);
2499                left = ASTNode::new_with_volatile(
2500                    ASTNodeType::Call {
2501                        callee: Box::new(left),
2502                        args,
2503                    },
2504                    None,
2505                    call_volatile,
2506                );
2507                continue;
2508            }
2509
2510            // Postfix operators (e.g. percent).
2511            if self.tokens[self.position].token_type == TokenType::OpPostfix {
2512                let (precedence, _) = self.tokens[self.position]
2513                    .get_precedence()
2514                    .unwrap_or((0, Associativity::Left));
2515                if precedence < min_precedence {
2516                    break;
2517                }
2518
2519                let op_token = self.tokens[self.position].clone();
2520                self.position += 1;
2521                let contains_volatile = left.contains_volatile;
2522                left = ASTNode::new_with_volatile(
2523                    ASTNodeType::UnaryOp {
2524                        op: op_token.value.clone(),
2525                        expr: Box::new(left),
2526                    },
2527                    Some(op_token),
2528                    contains_volatile,
2529                );
2530                continue;
2531            }
2532
2533            let token = &self.tokens[self.position];
2534            if token.token_type != TokenType::OpInfix {
2535                break;
2536            }
2537
2538            // Inside a postfix call's argument list, treat top-level `,` as
2539            // an argument separator, not as the union operator.
2540            if self.in_call_args_depth > 0 && token.value == "," {
2541                break;
2542            }
2543
2544            let (precedence, associativity) =
2545                token.get_precedence().unwrap_or((0, Associativity::Left));
2546            if precedence < min_precedence {
2547                break;
2548            }
2549
2550            let op_token = self.tokens[self.position].clone();
2551            self.position += 1;
2552
2553            let next_min_precedence = if associativity == Associativity::Left {
2554                precedence + 1
2555            } else {
2556                precedence
2557            };
2558
2559            let right = self.parse_bp(next_min_precedence)?;
2560            let contains_volatile = left.contains_volatile || right.contains_volatile;
2561            left = ASTNode::new_with_volatile(
2562                ASTNodeType::BinaryOp {
2563                    op: op_token.value.clone(),
2564                    left: Box::new(left),
2565                    right: Box::new(right),
2566                },
2567                Some(op_token),
2568                contains_volatile,
2569            );
2570        }
2571
2572        Ok(left)
2573    }
2574
2575    fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2576        self.skip_whitespace();
2577        if self.position < self.tokens.len()
2578            && self.tokens[self.position].token_type == TokenType::OpPrefix
2579        {
2580            let op_token = self.tokens[self.position].clone();
2581            self.position += 1;
2582
2583            // Prefix unary binds tighter than exponent (Excel semantics),
2584            // so parse the RHS with min_precedence equal to unary's precedence.
2585            let (precedence, _) = op_token
2586                .get_precedence()
2587                .unwrap_or((0, Associativity::Right));
2588
2589            let expr = self.parse_bp(precedence)?;
2590            let contains_volatile = expr.contains_volatile;
2591            return Ok(ASTNode::new_with_volatile(
2592                ASTNodeType::UnaryOp {
2593                    op: op_token.value.clone(),
2594                    expr: Box::new(expr),
2595                },
2596                Some(op_token),
2597                contains_volatile,
2598            ));
2599        }
2600
2601        self.parse_primary()
2602    }
2603
2604    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2605        self.skip_whitespace();
2606        if self.position >= self.tokens.len() {
2607            return Err(ParserError {
2608                message: "Unexpected end of tokens".to_string(),
2609                position: Some(self.position),
2610            });
2611        }
2612
2613        let token = &self.tokens[self.position];
2614        match token.token_type {
2615            TokenType::Operand => {
2616                let operand_token = self.tokens[self.position].clone();
2617                self.position += 1;
2618                self.parse_operand(operand_token)
2619            }
2620            TokenType::Func => {
2621                let func_token = self.tokens[self.position].clone();
2622                self.position += 1;
2623                self.parse_function(func_token)
2624            }
2625            TokenType::Paren if token.subtype == TokenSubType::Open => {
2626                self.position += 1;
2627                let expr = self.parse_expression()?;
2628                if self.position >= self.tokens.len()
2629                    || self.tokens[self.position].token_type != TokenType::Paren
2630                    || self.tokens[self.position].subtype != TokenSubType::Close
2631                {
2632                    return Err(ParserError {
2633                        message: "Expected closing parenthesis".to_string(),
2634                        position: Some(self.position),
2635                    });
2636                }
2637                self.position += 1;
2638                Ok(expr)
2639            }
2640            TokenType::Array if token.subtype == TokenSubType::Open => {
2641                self.position += 1;
2642                self.parse_array()
2643            }
2644            _ => Err(ParserError {
2645                message: format!("Unexpected token: {token:?}"),
2646                position: Some(self.position),
2647            }),
2648        }
2649    }
2650
2651    fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
2652        match token.subtype {
2653            TokenSubType::Number => {
2654                let value = token.value.parse::<f64>().map_err(|_| ParserError {
2655                    message: format!("Invalid number: {}", token.value),
2656                    position: Some(self.position),
2657                })?;
2658                Ok(ASTNode::new(
2659                    ASTNodeType::Literal(LiteralValue::Number(value)),
2660                    Some(token),
2661                ))
2662            }
2663            TokenSubType::Text => {
2664                // Strip surrounding quotes from text literals
2665                let mut text = token.value.clone();
2666                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2667                    text = text[1..text.len() - 1].to_string();
2668                    // Handle escaped quotes
2669                    text = text.replace("\"\"", "\"");
2670                }
2671                Ok(ASTNode::new(
2672                    ASTNodeType::Literal(LiteralValue::Text(text)),
2673                    Some(token),
2674                ))
2675            }
2676            TokenSubType::Logical => {
2677                let value = token.value.eq_ignore_ascii_case("TRUE");
2678                Ok(ASTNode::new(
2679                    ASTNodeType::Literal(LiteralValue::Boolean(value)),
2680                    Some(token),
2681                ))
2682            }
2683            TokenSubType::Error => {
2684                let error = ExcelError::from_error_string(&token.value);
2685                Ok(ASTNode::new(
2686                    ASTNodeType::Literal(LiteralValue::Error(error)),
2687                    Some(token),
2688                ))
2689            }
2690            TokenSubType::Range => {
2691                let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
2692                    .map_err(|e| ParserError {
2693                        message: format!("Invalid reference '{}': {}", token.value, e),
2694                        position: Some(self.position),
2695                    })?;
2696                Ok(ASTNode::new(
2697                    ASTNodeType::Reference {
2698                        original: token.value.clone(),
2699                        reference,
2700                    },
2701                    Some(token),
2702                ))
2703            }
2704            _ => Err(ParserError {
2705                message: format!("Unexpected operand subtype: {:?}", token.subtype),
2706                position: Some(self.position),
2707            }),
2708        }
2709    }
2710
2711    fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
2712        let name = func_token.value[..func_token.value.len() - 1].to_string();
2713        let args = self.parse_function_arguments()?;
2714        // Determine volatility for this function
2715        let this_is_volatile = self
2716            .volatility_classifier
2717            .as_ref()
2718            .map(|f| f(name.as_str()))
2719            .unwrap_or(false);
2720        let args_volatile = args.iter().any(|a| a.contains_volatile);
2721
2722        Ok(ASTNode::new_with_volatile(
2723            ASTNodeType::Function { name, args },
2724            Some(func_token),
2725            this_is_volatile || args_volatile,
2726        ))
2727    }
2728
2729    /// Parse arguments for a postfix call (immediate invocation), where the
2730    /// opening `(` is a `Paren:Open` and the matching `)` is a `Paren:Close`.
2731    /// Caller has already consumed the opening paren.
2732    ///
2733    /// Inside this region the tokenizer emits a top-level `,` as `OpInfix`
2734    /// (Excel's union operator). For call arguments we want it to behave as a
2735    /// separator, so we bump `in_call_args_depth` while parsing each argument.
2736    fn parse_call_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2737        let mut args: Vec<ASTNode> = Vec::new();
2738
2739        self.skip_whitespace();
2740        // Empty argument list: `f()`
2741        if self.position < self.tokens.len()
2742            && self.tokens[self.position].token_type == TokenType::Paren
2743            && self.tokens[self.position].subtype == TokenSubType::Close
2744        {
2745            self.position += 1;
2746            return Ok(args);
2747        }
2748
2749        self.in_call_args_depth += 1;
2750        let result = (|| -> Result<Vec<ASTNode>, ParserError> {
2751            args.push(self.parse_expression()?);
2752            loop {
2753                self.skip_whitespace();
2754                if self.position >= self.tokens.len() {
2755                    return Err(ParserError {
2756                        message: "Unterminated call argument list".to_string(),
2757                        position: Some(self.position),
2758                    });
2759                }
2760                let token = &self.tokens[self.position];
2761                let is_separator = (token.token_type == TokenType::Sep
2762                    && token.subtype == TokenSubType::Arg)
2763                    || (token.token_type == TokenType::OpInfix && token.value == ",");
2764                if is_separator {
2765                    self.position += 1;
2766                    args.push(self.parse_expression()?);
2767                } else if token.token_type == TokenType::Paren
2768                    && token.subtype == TokenSubType::Close
2769                {
2770                    self.position += 1;
2771                    return Ok(std::mem::take(&mut args));
2772                } else {
2773                    return Err(ParserError {
2774                        message: format!("Expected ',' or ')' in call arguments, got {token:?}"),
2775                        position: Some(self.position),
2776                    });
2777                }
2778            }
2779        })();
2780        self.in_call_args_depth -= 1;
2781        result
2782    }
2783
2784    /// Parse function arguments.
2785    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2786        let mut args = Vec::new();
2787
2788        // Check for closing parenthesis (empty arguments)
2789        if self.position < self.tokens.len()
2790            && self.tokens[self.position].token_type == TokenType::Func
2791            && self.tokens[self.position].subtype == TokenSubType::Close
2792        {
2793            self.position += 1;
2794            return Ok(args);
2795        }
2796
2797        // Handle optional arguments (consecutive separators)
2798        // Check if we start with a separator (empty first argument)
2799        if self.position < self.tokens.len()
2800            && self.tokens[self.position].token_type == TokenType::Sep
2801            && self.tokens[self.position].subtype == TokenSubType::Arg
2802        {
2803            // Empty first argument - represented as empty text literal for compatibility
2804            args.push(ASTNode::new(
2805                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2806                None,
2807            ));
2808            self.position += 1;
2809        } else {
2810            // Parse first argument
2811            args.push(self.parse_expression()?);
2812        }
2813
2814        // Parse remaining arguments
2815        while self.position < self.tokens.len() {
2816            let token = &self.tokens[self.position];
2817
2818            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2819                self.position += 1;
2820                // Check for consecutive separators (empty argument)
2821                if self.position < self.tokens.len() {
2822                    let next_token = &self.tokens[self.position];
2823                    if next_token.token_type == TokenType::Sep
2824                        && next_token.subtype == TokenSubType::Arg
2825                    {
2826                        // Empty argument - represented as empty text literal for compatibility
2827                        args.push(ASTNode::new(
2828                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2829                            None,
2830                        ));
2831                    } else if next_token.token_type == TokenType::Func
2832                        && next_token.subtype == TokenSubType::Close
2833                    {
2834                        // Empty last argument
2835                        args.push(ASTNode::new(
2836                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2837                            None,
2838                        ));
2839                        self.position += 1;
2840                        break;
2841                    } else {
2842                        args.push(self.parse_expression()?);
2843                    }
2844                } else {
2845                    // Trailing separator at end of formula
2846                    args.push(ASTNode::new(
2847                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2848                        None,
2849                    ));
2850                }
2851            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2852                self.position += 1;
2853                break;
2854            } else {
2855                return Err(ParserError {
2856                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2857                    position: Some(self.position),
2858                });
2859            }
2860        }
2861
2862        Ok(args)
2863    }
2864
2865    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2866        let mut rows = Vec::new();
2867        let mut current_row = Vec::new();
2868
2869        // Check for empty array
2870        if self.position < self.tokens.len()
2871            && self.tokens[self.position].token_type == TokenType::Array
2872            && self.tokens[self.position].subtype == TokenSubType::Close
2873        {
2874            self.position += 1;
2875            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2876        }
2877
2878        // Parse first element
2879        current_row.push(self.parse_expression()?);
2880
2881        while self.position < self.tokens.len() {
2882            let token = &self.tokens[self.position];
2883
2884            if token.token_type == TokenType::Sep {
2885                if token.subtype == TokenSubType::Arg {
2886                    // Column separator
2887                    self.position += 1;
2888                    current_row.push(self.parse_expression()?);
2889                } else if token.subtype == TokenSubType::Row {
2890                    // Row separator
2891                    self.position += 1;
2892                    rows.push(current_row);
2893                    current_row = vec![self.parse_expression()?];
2894                }
2895            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2896                self.position += 1;
2897                rows.push(current_row);
2898                break;
2899            } else {
2900                return Err(ParserError {
2901                    message: format!("Unexpected token in array: {token:?}"),
2902                    position: Some(self.position),
2903                });
2904            }
2905        }
2906
2907        // Array volatility is the OR of element volatility
2908        let contains_volatile = rows
2909            .iter()
2910            .flat_map(|r| r.iter())
2911            .any(|n| n.contains_volatile);
2912        Ok(ASTNode::new_with_volatile(
2913            ASTNodeType::Array(rows),
2914            None,
2915            contains_volatile,
2916        ))
2917    }
2918}
2919
2920impl From<TokenizerError> for ParserError {
2921    fn from(err: TokenizerError) -> Self {
2922        ParserError {
2923            message: err.message,
2924            position: Some(err.pos),
2925        }
2926    }
2927}
2928
2929struct SpanParser<'a> {
2930    source: &'a str,
2931    tokens: &'a [crate::tokenizer::TokenSpan],
2932    position: usize,
2933    volatility_classifier: Option<VolatilityClassifierBox>,
2934    dialect: FormulaDialect,
2935    /// See `Parser::in_call_args_depth`.
2936    in_call_args_depth: usize,
2937}
2938
2939impl<'a> SpanParser<'a> {
2940    fn new(
2941        source: &'a str,
2942        tokens: &'a [crate::tokenizer::TokenSpan],
2943        dialect: FormulaDialect,
2944    ) -> Self {
2945        SpanParser {
2946            source,
2947            tokens,
2948            position: 0,
2949            volatility_classifier: None,
2950            dialect,
2951            in_call_args_depth: 0,
2952        }
2953    }
2954
2955    fn with_volatility_classifier<F>(mut self, f: F) -> Self
2956    where
2957        F: Fn(&str) -> bool + Send + Sync + 'static,
2958    {
2959        self.volatility_classifier = Some(Box::new(f));
2960        self
2961    }
2962
2963    fn skip_whitespace(&mut self) {
2964        while self.position < self.tokens.len()
2965            && self.tokens[self.position].token_type == TokenType::Whitespace
2966        {
2967            self.position += 1;
2968        }
2969    }
2970
2971    fn span_value(&self, span: &crate::tokenizer::TokenSpan) -> &str {
2972        &self.source[span.start..span.end]
2973    }
2974
2975    fn span_to_token(&self, span: &crate::tokenizer::TokenSpan) -> Token {
2976        Token::new_with_span(
2977            self.span_value(span).to_string(),
2978            span.token_type,
2979            span.subtype,
2980            span.start,
2981            span.end,
2982        )
2983    }
2984
2985    fn span_precedence(&self, span: &crate::tokenizer::TokenSpan) -> Option<(u8, Associativity)> {
2986        if !matches!(
2987            span.token_type,
2988            TokenType::OpPrefix | TokenType::OpInfix | TokenType::OpPostfix
2989        ) {
2990            return None;
2991        }
2992
2993        let op = if span.token_type == TokenType::OpPrefix {
2994            "u"
2995        } else {
2996            self.span_value(span)
2997        };
2998
2999        match op {
3000            "#" => Some((11, Associativity::Left)),
3001            ":" => Some((10, Associativity::Left)),
3002            " " => Some((9, Associativity::Left)),
3003            "," => Some((8, Associativity::Left)),
3004            "%" => Some((7, Associativity::Left)),
3005            "u" => Some((6, Associativity::Right)),
3006            "^" => Some((5, Associativity::Right)),
3007            "*" | "/" => Some((4, Associativity::Left)),
3008            "+" | "-" => Some((3, Associativity::Left)),
3009            "&" => Some((2, Associativity::Left)),
3010            "=" | "<" | ">" | "<=" | ">=" | "<>" => Some((1, Associativity::Left)),
3011            _ => None,
3012        }
3013    }
3014
3015    fn parse(&mut self) -> Result<ASTNode, ParserError> {
3016        if self.tokens.is_empty() {
3017            return Err(ParserError {
3018                message: "No tokens to parse".to_string(),
3019                position: None,
3020            });
3021        }
3022
3023        self.skip_whitespace();
3024        if self.position >= self.tokens.len() {
3025            return Err(ParserError {
3026                message: "No tokens to parse".to_string(),
3027                position: None,
3028            });
3029        }
3030
3031        if self.tokens[self.position].token_type == TokenType::Literal {
3032            let span = self.tokens[self.position];
3033            self.position += 1;
3034            self.skip_whitespace();
3035            if self.position < self.tokens.len() {
3036                return Err(ParserError {
3037                    message: format!(
3038                        "Unexpected token at position {}: {:?}",
3039                        self.position, self.tokens[self.position]
3040                    ),
3041                    position: Some(self.position),
3042                });
3043            }
3044
3045            let token = self.span_to_token(&span);
3046            return Ok(ASTNode::new(
3047                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
3048                Some(token),
3049            ));
3050        }
3051
3052        let ast = self.parse_expression()?;
3053        self.skip_whitespace();
3054        if self.position < self.tokens.len() {
3055            return Err(ParserError {
3056                message: format!(
3057                    "Unexpected token at position {}: {:?}",
3058                    self.position, self.tokens[self.position]
3059                ),
3060                position: Some(self.position),
3061            });
3062        }
3063        Ok(ast)
3064    }
3065
3066    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
3067        self.parse_bp(0)
3068    }
3069
3070    fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
3071        let mut left = self.parse_prefix()?;
3072
3073        loop {
3074            self.skip_whitespace();
3075            if self.position >= self.tokens.len() {
3076                break;
3077            }
3078
3079            // Postfix call: a `(` directly following a closed expression denotes
3080            // immediate invocation of a callable result (e.g. LAMBDA IIFE).
3081            if self.tokens[self.position].token_type == TokenType::Paren
3082                && self.tokens[self.position].subtype == TokenSubType::Open
3083            {
3084                self.position += 1;
3085                let args = self.parse_call_arguments()?;
3086                let call_volatile =
3087                    left.contains_volatile || args.iter().any(|a| a.contains_volatile);
3088                left = ASTNode::new_with_volatile(
3089                    ASTNodeType::Call {
3090                        callee: Box::new(left),
3091                        args,
3092                    },
3093                    None,
3094                    call_volatile,
3095                );
3096                continue;
3097            }
3098
3099            if self.tokens[self.position].token_type == TokenType::OpPostfix {
3100                let (precedence, _) = self
3101                    .span_precedence(&self.tokens[self.position])
3102                    .unwrap_or((0, Associativity::Left));
3103                if precedence < min_precedence {
3104                    break;
3105                }
3106
3107                let op_span = self.tokens[self.position];
3108                self.position += 1;
3109                let op_token = self.span_to_token(&op_span);
3110                let contains_volatile = left.contains_volatile;
3111                left = ASTNode::new_with_volatile(
3112                    ASTNodeType::UnaryOp {
3113                        op: op_token.value.clone(),
3114                        expr: Box::new(left),
3115                    },
3116                    Some(op_token),
3117                    contains_volatile,
3118                );
3119                continue;
3120            }
3121
3122            let token = &self.tokens[self.position];
3123            if token.token_type != TokenType::OpInfix {
3124                break;
3125            }
3126
3127            // Inside a postfix call's argument list, treat top-level `,` as
3128            // an argument separator, not as the union operator.
3129            if self.in_call_args_depth > 0 && self.span_value(token) == "," {
3130                break;
3131            }
3132
3133            let (precedence, associativity) = self
3134                .span_precedence(token)
3135                .unwrap_or((0, Associativity::Left));
3136            if precedence < min_precedence {
3137                break;
3138            }
3139
3140            let op_span = self.tokens[self.position];
3141            self.position += 1;
3142
3143            let next_min_precedence = if associativity == Associativity::Left {
3144                precedence + 1
3145            } else {
3146                precedence
3147            };
3148
3149            let right = self.parse_bp(next_min_precedence)?;
3150            let op_token = self.span_to_token(&op_span);
3151            let contains_volatile = left.contains_volatile || right.contains_volatile;
3152            left = ASTNode::new_with_volatile(
3153                ASTNodeType::BinaryOp {
3154                    op: op_token.value.clone(),
3155                    left: Box::new(left),
3156                    right: Box::new(right),
3157                },
3158                Some(op_token),
3159                contains_volatile,
3160            );
3161        }
3162
3163        Ok(left)
3164    }
3165
3166    fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
3167        self.skip_whitespace();
3168        if self.position < self.tokens.len()
3169            && self.tokens[self.position].token_type == TokenType::OpPrefix
3170        {
3171            let op_span = self.tokens[self.position];
3172            self.position += 1;
3173
3174            let (precedence, _) = self
3175                .span_precedence(&op_span)
3176                .unwrap_or((0, Associativity::Right));
3177
3178            let expr = self.parse_bp(precedence)?;
3179            let op_token = self.span_to_token(&op_span);
3180            let contains_volatile = expr.contains_volatile;
3181            return Ok(ASTNode::new_with_volatile(
3182                ASTNodeType::UnaryOp {
3183                    op: op_token.value.clone(),
3184                    expr: Box::new(expr),
3185                },
3186                Some(op_token),
3187                contains_volatile,
3188            ));
3189        }
3190
3191        self.parse_primary()
3192    }
3193
3194    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
3195        self.skip_whitespace();
3196        if self.position >= self.tokens.len() {
3197            return Err(ParserError {
3198                message: "Unexpected end of tokens".to_string(),
3199                position: Some(self.position),
3200            });
3201        }
3202
3203        let token = &self.tokens[self.position];
3204        match token.token_type {
3205            TokenType::Operand => {
3206                let span = self.tokens[self.position];
3207                self.position += 1;
3208                self.parse_operand(span)
3209            }
3210            TokenType::Func => {
3211                let span = self.tokens[self.position];
3212                self.position += 1;
3213                self.parse_function(span)
3214            }
3215            TokenType::Paren if token.subtype == TokenSubType::Open => {
3216                self.position += 1;
3217                let expr = self.parse_expression()?;
3218                self.skip_whitespace();
3219                if self.position >= self.tokens.len()
3220                    || self.tokens[self.position].token_type != TokenType::Paren
3221                    || self.tokens[self.position].subtype != TokenSubType::Close
3222                {
3223                    return Err(ParserError {
3224                        message: "Expected closing parenthesis".to_string(),
3225                        position: Some(self.position),
3226                    });
3227                }
3228                self.position += 1;
3229                Ok(expr)
3230            }
3231            TokenType::Array if token.subtype == TokenSubType::Open => {
3232                self.position += 1;
3233                self.parse_array()
3234            }
3235            _ => Err(ParserError {
3236                message: format!("Unexpected token: {token:?}"),
3237                position: Some(self.position),
3238            }),
3239        }
3240    }
3241
3242    fn parse_operand(&mut self, span: crate::tokenizer::TokenSpan) -> Result<ASTNode, ParserError> {
3243        let value = self.span_value(&span);
3244        let token = self.span_to_token(&span);
3245
3246        match span.subtype {
3247            TokenSubType::Number => {
3248                let value = value.parse::<f64>().map_err(|_| ParserError {
3249                    message: format!("Invalid number: {value}"),
3250                    position: Some(self.position),
3251                })?;
3252                Ok(ASTNode::new(
3253                    ASTNodeType::Literal(LiteralValue::Number(value)),
3254                    Some(token),
3255                ))
3256            }
3257            TokenSubType::Text => {
3258                let mut text = value.to_string();
3259                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
3260                    text = text[1..text.len() - 1].to_string();
3261                    text = text.replace("\"\"", "\"");
3262                }
3263                Ok(ASTNode::new(
3264                    ASTNodeType::Literal(LiteralValue::Text(text)),
3265                    Some(token),
3266                ))
3267            }
3268            TokenSubType::Logical => {
3269                let v = value.eq_ignore_ascii_case("TRUE");
3270                Ok(ASTNode::new(
3271                    ASTNodeType::Literal(LiteralValue::Boolean(v)),
3272                    Some(token),
3273                ))
3274            }
3275            TokenSubType::Error => {
3276                let error = ExcelError::from_error_string(value);
3277                Ok(ASTNode::new(
3278                    ASTNodeType::Literal(LiteralValue::Error(error)),
3279                    Some(token),
3280                ))
3281            }
3282            TokenSubType::Range => {
3283                let reference = ReferenceType::from_string_with_dialect(value, self.dialect)
3284                    .map_err(|e| ParserError {
3285                        message: format!("Invalid reference '{value}': {e}"),
3286                        position: Some(self.position),
3287                    })?;
3288                Ok(ASTNode::new(
3289                    ASTNodeType::Reference {
3290                        original: value.to_string(),
3291                        reference,
3292                    },
3293                    Some(token),
3294                ))
3295            }
3296            _ => Err(ParserError {
3297                message: format!("Unexpected operand subtype: {:?}", span.subtype),
3298                position: Some(self.position),
3299            }),
3300        }
3301    }
3302
3303    fn parse_function(
3304        &mut self,
3305        func_span: crate::tokenizer::TokenSpan,
3306    ) -> Result<ASTNode, ParserError> {
3307        let func_value = self.span_value(&func_span);
3308        if func_value.is_empty() {
3309            return Err(ParserError {
3310                message: "Invalid function token".to_string(),
3311                position: Some(self.position),
3312            });
3313        }
3314        let name = func_value[..func_value.len() - 1].to_string();
3315        let args = self.parse_function_arguments()?;
3316
3317        let this_is_volatile = self
3318            .volatility_classifier
3319            .as_ref()
3320            .map(|f| f(name.as_str()))
3321            .unwrap_or(false);
3322        let args_volatile = args.iter().any(|a| a.contains_volatile);
3323
3324        let func_token = self.span_to_token(&func_span);
3325        Ok(ASTNode::new_with_volatile(
3326            ASTNodeType::Function { name, args },
3327            Some(func_token),
3328            this_is_volatile || args_volatile,
3329        ))
3330    }
3331
3332    /// Parse arguments for a postfix call (immediate invocation), where the
3333    /// opening `(` is a `Paren:Open` and the matching `)` is a `Paren:Close`.
3334    /// Caller has already consumed the opening paren. See the classic parser
3335    /// version for details on how top-level `,` is handled.
3336    fn parse_call_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
3337        let mut args: Vec<ASTNode> = Vec::new();
3338
3339        self.skip_whitespace();
3340        if self.position < self.tokens.len()
3341            && self.tokens[self.position].token_type == TokenType::Paren
3342            && self.tokens[self.position].subtype == TokenSubType::Close
3343        {
3344            self.position += 1;
3345            return Ok(args);
3346        }
3347
3348        self.in_call_args_depth += 1;
3349        let result = (|| -> Result<Vec<ASTNode>, ParserError> {
3350            args.push(self.parse_expression()?);
3351            loop {
3352                self.skip_whitespace();
3353                if self.position >= self.tokens.len() {
3354                    return Err(ParserError {
3355                        message: "Unterminated call argument list".to_string(),
3356                        position: Some(self.position),
3357                    });
3358                }
3359                let token = &self.tokens[self.position];
3360                let is_separator = (token.token_type == TokenType::Sep
3361                    && token.subtype == TokenSubType::Arg)
3362                    || (token.token_type == TokenType::OpInfix && self.span_value(token) == ",");
3363                if is_separator {
3364                    self.position += 1;
3365                    args.push(self.parse_expression()?);
3366                } else if token.token_type == TokenType::Paren
3367                    && token.subtype == TokenSubType::Close
3368                {
3369                    self.position += 1;
3370                    return Ok(std::mem::take(&mut args));
3371                } else {
3372                    return Err(ParserError {
3373                        message: format!("Expected ',' or ')' in call arguments, got {token:?}"),
3374                        position: Some(self.position),
3375                    });
3376                }
3377            }
3378        })();
3379        self.in_call_args_depth -= 1;
3380        result
3381    }
3382
3383    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
3384        let mut args = Vec::new();
3385
3386        self.skip_whitespace();
3387        if self.position < self.tokens.len()
3388            && self.tokens[self.position].token_type == TokenType::Func
3389            && self.tokens[self.position].subtype == TokenSubType::Close
3390        {
3391            self.position += 1;
3392            return Ok(args);
3393        }
3394
3395        self.skip_whitespace();
3396        if self.position < self.tokens.len()
3397            && self.tokens[self.position].token_type == TokenType::Sep
3398            && self.tokens[self.position].subtype == TokenSubType::Arg
3399        {
3400            args.push(ASTNode::new(
3401                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
3402                None,
3403            ));
3404            self.position += 1;
3405        } else {
3406            args.push(self.parse_expression()?);
3407        }
3408
3409        while self.position < self.tokens.len() {
3410            self.skip_whitespace();
3411            if self.position >= self.tokens.len() {
3412                break;
3413            }
3414
3415            let token = &self.tokens[self.position];
3416            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
3417                self.position += 1;
3418                self.skip_whitespace();
3419                if self.position < self.tokens.len() {
3420                    let next_token = &self.tokens[self.position];
3421                    if next_token.token_type == TokenType::Sep
3422                        && next_token.subtype == TokenSubType::Arg
3423                    {
3424                        args.push(ASTNode::new(
3425                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
3426                            None,
3427                        ));
3428                    } else if next_token.token_type == TokenType::Func
3429                        && next_token.subtype == TokenSubType::Close
3430                    {
3431                        args.push(ASTNode::new(
3432                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
3433                            None,
3434                        ));
3435                        self.position += 1;
3436                        break;
3437                    } else {
3438                        args.push(self.parse_expression()?);
3439                    }
3440                } else {
3441                    args.push(ASTNode::new(
3442                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
3443                        None,
3444                    ));
3445                }
3446            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
3447                self.position += 1;
3448                break;
3449            } else {
3450                return Err(ParserError {
3451                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
3452                    position: Some(self.position),
3453                });
3454            }
3455        }
3456
3457        Ok(args)
3458    }
3459
3460    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
3461        let mut rows = Vec::new();
3462        let mut current_row = Vec::new();
3463
3464        self.skip_whitespace();
3465        if self.position < self.tokens.len()
3466            && self.tokens[self.position].token_type == TokenType::Array
3467            && self.tokens[self.position].subtype == TokenSubType::Close
3468        {
3469            self.position += 1;
3470            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
3471        }
3472
3473        current_row.push(self.parse_expression()?);
3474
3475        while self.position < self.tokens.len() {
3476            self.skip_whitespace();
3477            if self.position >= self.tokens.len() {
3478                break;
3479            }
3480            let token = &self.tokens[self.position];
3481
3482            if token.token_type == TokenType::Sep {
3483                if token.subtype == TokenSubType::Arg {
3484                    self.position += 1;
3485                    current_row.push(self.parse_expression()?);
3486                } else if token.subtype == TokenSubType::Row {
3487                    self.position += 1;
3488                    rows.push(current_row);
3489                    current_row = vec![self.parse_expression()?];
3490                }
3491            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
3492                self.position += 1;
3493                rows.push(current_row);
3494                break;
3495            } else {
3496                return Err(ParserError {
3497                    message: format!("Unexpected token in array: {token:?}"),
3498                    position: Some(self.position),
3499                });
3500            }
3501        }
3502
3503        let contains_volatile = rows
3504            .iter()
3505            .flat_map(|r| r.iter())
3506            .any(|n| n.contains_volatile);
3507
3508        Ok(ASTNode::new_with_volatile(
3509            ASTNodeType::Array(rows),
3510            None,
3511            contains_volatile,
3512        ))
3513    }
3514}
3515
3516/// Normalise a reference string to its canonical form
3517pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
3518    let ref_type = ReferenceType::from_string(reference)?;
3519    Ok(ref_type.to_string())
3520}
3521
3522pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
3523    parse_with_dialect(formula, FormulaDialect::Excel)
3524}
3525
3526pub fn parse_with_dialect<T: AsRef<str>>(
3527    formula: T,
3528    dialect: FormulaDialect,
3529) -> Result<ASTNode, ParserError> {
3530    let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
3531    let mut parser = SpanParser::new(formula.as_ref(), &spans, dialect);
3532    parser.parse()
3533}
3534
3535/// Parse a single formula and annotate volatility using the provided classifier.
3536/// This is a convenience wrapper around `Parser::new_with_classifier`.
3537pub fn parse_with_volatility_classifier<T, F>(
3538    formula: T,
3539    classifier: F,
3540) -> Result<ASTNode, ParserError>
3541where
3542    T: AsRef<str>,
3543    F: Fn(&str) -> bool + Send + Sync + 'static,
3544{
3545    parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
3546}
3547
3548pub fn parse_with_dialect_and_volatility_classifier<T, F>(
3549    formula: T,
3550    dialect: FormulaDialect,
3551    classifier: F,
3552) -> Result<ASTNode, ParserError>
3553where
3554    T: AsRef<str>,
3555    F: Fn(&str) -> bool + Send + Sync + 'static,
3556{
3557    let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
3558    let mut parser =
3559        SpanParser::new(formula.as_ref(), &spans, dialect).with_volatility_classifier(classifier);
3560    parser.parse()
3561}
3562
3563/// Efficient batch parser with an internal token cache and optional volatility classifier.
3564///
3565/// The cache is keyed by the original formula string; repeated formulas across a batch
3566/// (very common in spreadsheets) will avoid re-tokenization and whitespace filtering.
3567pub struct BatchParser {
3568    include_whitespace: bool,
3569    volatility_classifier: Option<VolatilityClassifierArc>,
3570    token_cache: std::collections::HashMap<String, Arc<[crate::tokenizer::TokenSpan]>>, // cached tokens
3571    dialect: FormulaDialect,
3572}
3573
3574impl BatchParser {
3575    pub fn builder() -> BatchParserBuilder {
3576        BatchParserBuilder::default()
3577    }
3578
3579    /// Parse a formula using the internal cache and configured classifier.
3580    pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
3581        let spans = if let Some(tokens) = self.token_cache.get(formula) {
3582            Arc::clone(tokens)
3583        } else {
3584            let mut spans = crate::tokenizer::tokenize_spans_with_dialect(formula, self.dialect)?;
3585            if !self.include_whitespace {
3586                spans.retain(|t| t.token_type != TokenType::Whitespace);
3587            }
3588
3589            let spans: Arc<[crate::tokenizer::TokenSpan]> = Arc::from(spans.into_boxed_slice());
3590            self.token_cache
3591                .insert(formula.to_string(), Arc::clone(&spans));
3592            spans
3593        };
3594
3595        let mut parser = SpanParser::new(formula, spans.as_ref(), self.dialect);
3596        if let Some(classifier) = self.volatility_classifier.clone() {
3597            parser = parser.with_volatility_classifier(move |name| classifier(name));
3598        }
3599        parser.parse()
3600    }
3601}
3602
3603#[derive(Default)]
3604pub struct BatchParserBuilder {
3605    include_whitespace: bool,
3606    volatility_classifier: Option<VolatilityClassifierArc>,
3607    dialect: FormulaDialect,
3608}
3609
3610impl BatchParserBuilder {
3611    pub fn include_whitespace(mut self, include: bool) -> Self {
3612        self.include_whitespace = include;
3613        self
3614    }
3615
3616    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
3617    where
3618        F: Fn(&str) -> bool + Send + Sync + 'static,
3619    {
3620        self.volatility_classifier = Some(Arc::new(f));
3621        self
3622    }
3623
3624    pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
3625        self.dialect = dialect;
3626        self
3627    }
3628
3629    pub fn build(self) -> BatchParser {
3630        BatchParser {
3631            include_whitespace: self.include_whitespace,
3632            volatility_classifier: self.volatility_classifier,
3633            token_cache: std::collections::HashMap::new(),
3634            dialect: self.dialect,
3635        }
3636    }
3637}