Skip to main content

formualizer_parse/
parser.rs

1use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
2use crate::types::{FormulaDialect, ParsingError};
3use crate::{ExcelError, LiteralValue};
4
5#[cfg(feature = "serde")]
6use serde::{Deserialize, Serialize};
7
8use crate::hasher::FormulaHasher;
9use formualizer_common::coord::{
10    col_index_from_letters_1based, col_letters_from_1based, parse_a1_1based,
11};
12use formualizer_common::{
13    AxisBound, RelativeCoord, SheetCellRef, SheetLocator, SheetRangeRef, SheetRef,
14};
15use once_cell::sync::Lazy;
16use smallvec::SmallVec;
17use std::error::Error;
18use std::fmt::{self, Display};
19use std::hash::{Hash, Hasher};
20use std::str::FromStr;
21use std::sync::Arc;
22
23type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
24type VolatilityClassifierBox = Box<VolatilityFn>;
25type VolatilityClassifierArc = Arc<VolatilityFn>;
26
27/// A custom error type for the parser.
28#[derive(Debug)]
29pub struct ParserError {
30    pub message: String,
31    pub position: Option<usize>,
32}
33
34impl Display for ParserError {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        if let Some(pos) = self.position {
37            write!(f, "ParserError at position {}: {}", pos, self.message)
38        } else {
39            write!(f, "ParserError: {}", self.message)
40        }
41    }
42}
43
44impl Error for ParserError {}
45
46// Column lookup table for common columns (A-ZZ = 702 columns)
47static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
48    let mut cols = Vec::with_capacity(702);
49    // Single letters A-Z
50    for c in b'A'..=b'Z' {
51        cols.push(String::from(c as char));
52    }
53    // Double letters AA-ZZ
54    for c1 in b'A'..=b'Z' {
55        for c2 in b'A'..=b'Z' {
56            cols.push(format!("{}{}", c1 as char, c2 as char));
57        }
58    }
59    cols
60});
61
62/// A structured table reference specifier for accessing specific parts of a table
63#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
64#[derive(Debug, Clone, PartialEq, Hash)]
65pub enum TableSpecifier {
66    /// The entire table
67    All,
68    /// The data area of the table (no headers or totals)
69    Data,
70    /// The headers row
71    Headers,
72    /// The totals row
73    Totals,
74    /// A specific row
75    Row(TableRowSpecifier),
76    /// A specific column
77    Column(String),
78    /// A range of columns
79    ColumnRange(String, String),
80    /// Special items like #Headers, #Data, #Totals, etc.
81    SpecialItem(SpecialItem),
82    /// A combination of specifiers, for complex references
83    Combination(Vec<Box<TableSpecifier>>),
84}
85
86/// Specifies which row(s) to use in a table reference
87#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
88#[derive(Debug, Clone, PartialEq, Hash)]
89pub enum TableRowSpecifier {
90    /// The current row (context dependent)
91    Current,
92    /// All rows
93    All,
94    /// Data rows only
95    Data,
96    /// Headers row
97    Headers,
98    /// Totals row
99    Totals,
100    /// Specific row by index (1-based)
101    Index(u32),
102}
103
104/// Special items in structured references
105#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
106#[derive(Debug, Clone, PartialEq, Hash)]
107pub enum SpecialItem {
108    /// The #Headers item
109    Headers,
110    /// The #Data item
111    Data,
112    /// The #Totals item
113    Totals,
114    /// The #All item (the whole table)
115    All,
116    /// The @ item (current row)
117    ThisRow,
118}
119
120/// A reference to a table including specifiers
121#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
122#[derive(Debug, Clone, PartialEq, Hash)]
123pub struct TableReference {
124    /// The name of the table
125    pub name: String,
126    /// Optional specifier for which part of the table to use
127    pub specifier: Option<TableSpecifier>,
128}
129
130#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
131#[derive(Debug, Clone, PartialEq, Hash)]
132pub enum ExternalBookRef {
133    Token(String),
134}
135
136impl ExternalBookRef {
137    pub fn token(&self) -> &str {
138        match self {
139            ExternalBookRef::Token(s) => s,
140        }
141    }
142}
143
144#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
146pub enum ExternalRefKind {
147    Cell {
148        row: u32,
149        col: u32,
150        row_abs: bool,
151        col_abs: bool,
152    },
153    Range {
154        start_row: Option<u32>,
155        start_col: Option<u32>,
156        end_row: Option<u32>,
157        end_col: Option<u32>,
158        start_row_abs: bool,
159        start_col_abs: bool,
160        end_row_abs: bool,
161        end_col_abs: bool,
162    },
163}
164
165impl ExternalRefKind {
166    pub fn cell(row: u32, col: u32) -> Self {
167        Self::Cell {
168            row,
169            col,
170            row_abs: false,
171            col_abs: false,
172        }
173    }
174
175    pub fn cell_with_abs(row: u32, col: u32, row_abs: bool, col_abs: bool) -> Self {
176        Self::Cell {
177            row,
178            col,
179            row_abs,
180            col_abs,
181        }
182    }
183
184    pub fn range(
185        start_row: Option<u32>,
186        start_col: Option<u32>,
187        end_row: Option<u32>,
188        end_col: Option<u32>,
189    ) -> Self {
190        Self::Range {
191            start_row,
192            start_col,
193            end_row,
194            end_col,
195            start_row_abs: false,
196            start_col_abs: false,
197            end_row_abs: false,
198            end_col_abs: false,
199        }
200    }
201
202    pub fn range_with_abs(
203        start_row: Option<u32>,
204        start_col: Option<u32>,
205        end_row: Option<u32>,
206        end_col: Option<u32>,
207        start_row_abs: bool,
208        start_col_abs: bool,
209        end_row_abs: bool,
210        end_col_abs: bool,
211    ) -> Self {
212        Self::Range {
213            start_row,
214            start_col,
215            end_row,
216            end_col,
217            start_row_abs,
218            start_col_abs,
219            end_row_abs,
220            end_col_abs,
221        }
222    }
223}
224
225#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
226#[derive(Debug, Clone, PartialEq, Hash)]
227pub struct ExternalReference {
228    pub raw: String,
229    pub book: ExternalBookRef,
230    pub sheet: String,
231    pub kind: ExternalRefKind,
232}
233
234/// A reference to something outside the cell.
235#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
236#[derive(Debug, Clone, PartialEq, Hash)]
237pub enum ReferenceType {
238    Cell {
239        sheet: Option<String>,
240        row: u32,
241        col: u32,
242        row_abs: bool,
243        col_abs: bool,
244    },
245    Range {
246        sheet: Option<String>,
247        start_row: Option<u32>,
248        start_col: Option<u32>,
249        end_row: Option<u32>,
250        end_col: Option<u32>,
251        start_row_abs: bool,
252        start_col_abs: bool,
253        end_row_abs: bool,
254        end_col_abs: bool,
255    },
256    External(ExternalReference),
257    Table(TableReference),
258    NamedRange(String),
259}
260
261impl Display for TableSpecifier {
262    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263        match self {
264            TableSpecifier::All => write!(f, "#All"),
265            TableSpecifier::Data => write!(f, "#Data"),
266            TableSpecifier::Headers => write!(f, "#Headers"),
267            TableSpecifier::Totals => write!(f, "#Totals"),
268            TableSpecifier::Row(row) => write!(f, "{row}"),
269            TableSpecifier::Column(column) => write!(f, "{column}"),
270            TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
271            TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
272            TableSpecifier::Combination(specs) => {
273                // Emit nested bracketed parts so the surrounding Table formatter prints
274                // canonical structured refs like Table[[#Headers],[Column1]:[Column2]]
275                let parts: Vec<String> = specs.iter().map(|s| format!("[{s}]")).collect();
276                write!(f, "{}", parts.join(","))
277            }
278        }
279    }
280}
281
282impl Display for TableRowSpecifier {
283    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
284        match self {
285            TableRowSpecifier::Current => write!(f, "@"),
286            TableRowSpecifier::All => write!(f, "#All"),
287            TableRowSpecifier::Data => write!(f, "#Data"),
288            TableRowSpecifier::Headers => write!(f, "#Headers"),
289            TableRowSpecifier::Totals => write!(f, "#Totals"),
290            TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
291        }
292    }
293}
294
295impl Display for SpecialItem {
296    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
297        match self {
298            SpecialItem::Headers => write!(f, "#Headers"),
299            SpecialItem::Data => write!(f, "#Data"),
300            SpecialItem::Totals => write!(f, "#Totals"),
301            SpecialItem::All => write!(f, "#All"),
302            SpecialItem::ThisRow => write!(f, "@"),
303        }
304    }
305}
306
307/// Check if a sheet name needs to be quoted in Excel formulas
308fn sheet_name_needs_quoting(name: &str) -> bool {
309    if name.is_empty() {
310        return false;
311    }
312
313    let bytes = name.as_bytes();
314
315    // Check if starts with a digit
316    if bytes[0].is_ascii_digit() {
317        return true;
318    }
319
320    // Check for any special characters that require quoting
321    // This includes: space, !, ", #, $, %, &, ', (, ), *, +, comma, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~
322    for &byte in bytes {
323        match byte {
324            b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
325            | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
326            | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
327            _ => {}
328        }
329    }
330
331    // Check for Excel reserved words (case-insensitive)
332    let upper = name.to_uppercase();
333    matches!(
334        upper.as_str(),
335        "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
336    )
337}
338
339#[derive(Debug, Clone)]
340struct OpenFormulaRefPart {
341    sheet: Option<String>,
342    coord: String,
343}
344
345impl ReferenceType {
346    /// Build a cell reference with relative anchors.
347    pub fn cell(sheet: Option<String>, row: u32, col: u32) -> Self {
348        Self::Cell {
349            sheet,
350            row,
351            col,
352            row_abs: false,
353            col_abs: false,
354        }
355    }
356
357    /// Build a cell reference with explicit anchors.
358    pub fn cell_with_abs(
359        sheet: Option<String>,
360        row: u32,
361        col: u32,
362        row_abs: bool,
363        col_abs: bool,
364    ) -> Self {
365        Self::Cell {
366            sheet,
367            row,
368            col,
369            row_abs,
370            col_abs,
371        }
372    }
373
374    /// Build a range reference with relative anchors.
375    pub fn range(
376        sheet: Option<String>,
377        start_row: Option<u32>,
378        start_col: Option<u32>,
379        end_row: Option<u32>,
380        end_col: Option<u32>,
381    ) -> Self {
382        Self::Range {
383            sheet,
384            start_row,
385            start_col,
386            end_row,
387            end_col,
388            start_row_abs: false,
389            start_col_abs: false,
390            end_row_abs: false,
391            end_col_abs: false,
392        }
393    }
394
395    /// Build a range reference with explicit anchors.
396    pub fn range_with_abs(
397        sheet: Option<String>,
398        start_row: Option<u32>,
399        start_col: Option<u32>,
400        end_row: Option<u32>,
401        end_col: Option<u32>,
402        start_row_abs: bool,
403        start_col_abs: bool,
404        end_row_abs: bool,
405        end_col_abs: bool,
406    ) -> Self {
407        Self::Range {
408            sheet,
409            start_row,
410            start_col,
411            end_row,
412            end_col,
413            start_row_abs,
414            start_col_abs,
415            end_row_abs,
416            end_col_abs,
417        }
418    }
419
420    /// Create a reference from a string. Can be A1, A:A, A1:B2, Table1[Column], etc.
421    pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
422        Self::parse_excel_reference(reference)
423    }
424
425    /// Create a reference from a string using the specified formula dialect.
426    pub fn from_string_with_dialect(
427        reference: &str,
428        dialect: FormulaDialect,
429    ) -> Result<Self, ParsingError> {
430        match dialect {
431            FormulaDialect::Excel => Self::parse_excel_reference(reference),
432            FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
433                .or_else(|_| Self::parse_excel_reference(reference)),
434        }
435    }
436
437    /// Parse a grid reference into a shared SheetRef, preserving $ anchors.
438    ///
439    /// Only cell and range references are supported. Table and named ranges return an error.
440    pub fn parse_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
441        Self::parse_sheet_ref_with_dialect(reference, FormulaDialect::Excel)
442    }
443
444    /// Parse a grid reference into a shared SheetRef using the specified dialect.
445    pub fn parse_sheet_ref_with_dialect(
446        reference: &str,
447        dialect: FormulaDialect,
448    ) -> Result<SheetRef<'static>, ParsingError> {
449        match dialect {
450            FormulaDialect::Excel => Self::parse_excel_sheet_ref(reference),
451            FormulaDialect::OpenFormula => Self::parse_openformula_sheet_ref(reference)
452                .or_else(|_| Self::parse_excel_sheet_ref(reference)),
453        }
454    }
455
456    /// Lossy conversion from parsed ReferenceType into SheetRef.
457    /// External, table, and named ranges are discarded; anchors are preserved.
458    pub fn to_sheet_ref_lossy(&self) -> Option<SheetRef<'_>> {
459        match self {
460            ReferenceType::Cell {
461                sheet,
462                row,
463                col,
464                row_abs,
465                col_abs,
466            } => {
467                let row0 = row.checked_sub(1)?;
468                let col0 = col.checked_sub(1)?;
469                let sheet_loc = match sheet.as_deref() {
470                    Some(name) => SheetLocator::from_name(name),
471                    None => SheetLocator::Current,
472                };
473                let coord = RelativeCoord::new(row0, col0, *row_abs, *col_abs);
474                Some(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
475            }
476            ReferenceType::Range {
477                sheet,
478                start_row,
479                start_col,
480                end_row,
481                end_col,
482                start_row_abs,
483                start_col_abs,
484                end_row_abs,
485                end_col_abs,
486            } => {
487                let sheet_loc = match sheet.as_deref() {
488                    Some(name) => SheetLocator::from_name(name),
489                    None => SheetLocator::Current,
490                };
491                let sr = start_row
492                    .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_row_abs)));
493                if start_row.is_some() && sr.is_none() {
494                    return None;
495                }
496                let sc = start_col
497                    .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_col_abs)));
498                if start_col.is_some() && sc.is_none() {
499                    return None;
500                }
501                let er =
502                    end_row.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_row_abs)));
503                if end_row.is_some() && er.is_none() {
504                    return None;
505                }
506                let ec =
507                    end_col.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_col_abs)));
508                if end_col.is_some() && ec.is_none() {
509                    return None;
510                }
511                let range = SheetRangeRef::from_parts(sheet_loc, sr, sc, er, ec).ok()?;
512                Some(SheetRef::Range(range))
513            }
514            _ => None,
515        }
516    }
517
518    fn parse_excel_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
519        let (sheet, ref_part) = Self::extract_sheet_name(reference);
520
521        if ref_part.contains('[') {
522            return Err(ParsingError::InvalidReference(
523                "Table references are not supported for SheetRef".to_string(),
524            ));
525        }
526
527        let sheet_loc: SheetLocator<'static> = match sheet {
528            Some(name) => SheetLocator::from_name(name),
529            None => SheetLocator::Current,
530        };
531
532        if ref_part.contains(':') {
533            let mut parts = ref_part.splitn(2, ':');
534            let start = parts.next().unwrap();
535            let end = parts.next().ok_or_else(|| {
536                ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
537            })?;
538
539            let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
540            let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
541
542            let start_col = Self::axis_bound_from_1based(start_col)?;
543            let start_row = Self::axis_bound_from_1based(start_row)?;
544            let end_col = Self::axis_bound_from_1based(end_col)?;
545            let end_row = Self::axis_bound_from_1based(end_row)?;
546
547            let range =
548                SheetRangeRef::from_parts(sheet_loc, start_row, start_col, end_row, end_col)
549                    .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
550            Ok(SheetRef::Range(range))
551        } else {
552            let (row, col, row_abs, col_abs) = parse_a1_1based(&ref_part)
553                .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
554            let coord = RelativeCoord::new(row - 1, col - 1, row_abs, col_abs);
555            Ok(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
556        }
557    }
558
559    fn parse_openformula_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
560        Self::parse_excel_sheet_ref(reference)
561    }
562
563    fn axis_bound_from_1based(
564        bound: Option<(u32, bool)>,
565    ) -> Result<Option<AxisBound>, ParsingError> {
566        match bound {
567            Some((index, abs)) => AxisBound::from_excel_1based(index, abs)
568                .map(Some)
569                .map_err(|err| ParsingError::InvalidReference(err.to_string())),
570            None => Ok(None),
571        }
572    }
573
574    fn parse_range_part_with_abs(
575        part: &str,
576    ) -> Result<(Option<(u32, bool)>, Option<(u32, bool)>), ParsingError> {
577        if let Ok((row, col, row_abs, col_abs)) = parse_a1_1based(part) {
578            return Ok((Some((col, col_abs)), Some((row, row_abs))));
579        }
580
581        let bytes = part.as_bytes();
582        let len = bytes.len();
583        let mut i = 0usize;
584
585        let mut col_abs = false;
586        let mut row_abs = false;
587
588        if i < len && bytes[i] == b'$' {
589            col_abs = true;
590            i += 1;
591        }
592
593        let col_start = i;
594        while i < len && bytes[i].is_ascii_alphabetic() {
595            i += 1;
596        }
597
598        if i > col_start {
599            let col_str = &part[col_start..i];
600            let col1 = Self::column_to_number(col_str)?;
601
602            if i == len {
603                return Ok((Some((col1, col_abs)), None));
604            }
605
606            if i < len && bytes[i] == b'$' {
607                row_abs = true;
608                i += 1;
609            }
610
611            if i >= len {
612                return Err(ParsingError::InvalidReference(format!(
613                    "Invalid range part: {part}"
614                )));
615            }
616
617            let row_start = i;
618            while i < len && bytes[i].is_ascii_digit() {
619                i += 1;
620            }
621
622            if row_start == i || i != len {
623                return Err(ParsingError::InvalidReference(format!(
624                    "Invalid range part: {part}"
625                )));
626            }
627
628            let row_str = &part[row_start..i];
629            let row1 = row_str
630                .parse::<u32>()
631                .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
632            if row1 == 0 {
633                return Err(ParsingError::InvalidReference(format!(
634                    "Invalid range part: {part}"
635                )));
636            }
637
638            return Ok((Some((col1, col_abs)), Some((row1, row_abs))));
639        }
640
641        i = 0;
642        if i < len && bytes[i] == b'$' {
643            row_abs = true;
644            i += 1;
645        }
646
647        let row_start = i;
648        while i < len && bytes[i].is_ascii_digit() {
649            i += 1;
650        }
651
652        if row_start == i || i != len {
653            return Err(ParsingError::InvalidReference(format!(
654                "Invalid range part: {part}"
655            )));
656        }
657
658        let row_str = &part[row_start..i];
659        let row1 = row_str
660            .parse::<u32>()
661            .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
662        if row1 == 0 {
663            return Err(ParsingError::InvalidReference(format!(
664                "Invalid range part: {part}"
665            )));
666        }
667
668        Ok((None, Some((row1, row_abs))))
669    }
670
671    fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
672        // Extract sheet name if present
673        let (sheet, ref_part) = Self::extract_sheet_name(reference);
674
675        // Table references live in the ref_part (e.g., "Table1[Column]").
676        // Sheet names can contain '[' for external workbook refs (e.g., "[1]Sheet1!A1").
677        if ref_part.contains('[') {
678            return Self::parse_table_reference(&ref_part);
679        }
680
681        let external_sheet = sheet.as_deref().and_then(|s| {
682            // Excel external workbook refs embed a "[...]" token inside the sheet segment.
683            // Use the last '[' to allow paths/URIs that may contain earlier brackets, then
684            // take the first ']' after it to avoid being confused by ']' in the sheet name.
685            let lb = s.rfind('[')?;
686            let rb_rel = s[lb..].find(']')?;
687            let rb = lb + rb_rel;
688            if lb >= rb {
689                return None;
690            }
691
692            let token = &s[..=rb];
693            let sheet_name = &s[rb + 1..];
694            if sheet_name.is_empty() {
695                None
696            } else {
697                Some((token, sheet_name))
698            }
699        });
700
701        if ref_part.contains(':') {
702            // Range reference
703            let mut parts = ref_part.splitn(2, ':');
704            let start = parts.next().unwrap();
705            let end = parts.next().ok_or_else(|| {
706                ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
707            })?;
708            let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
709            let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
710
711            let split = |bound: Option<(u32, bool)>| match bound {
712                Some((index, abs)) => (Some(index), abs),
713                None => (None, false),
714            };
715            let (start_col, start_col_abs) = split(start_col);
716            let (start_row, start_row_abs) = split(start_row);
717            let (end_col, end_col_abs) = split(end_col);
718            let (end_row, end_row_abs) = split(end_row);
719
720            if let Some((book_token, sheet_name)) = external_sheet {
721                Ok(ReferenceType::External(ExternalReference {
722                    raw: reference.to_string(),
723                    book: ExternalBookRef::Token(book_token.to_string()),
724                    sheet: sheet_name.to_string(),
725                    kind: ExternalRefKind::Range {
726                        start_row,
727                        start_col,
728                        end_row,
729                        end_col,
730                        start_row_abs,
731                        start_col_abs,
732                        end_row_abs,
733                        end_col_abs,
734                    },
735                }))
736            } else {
737                Ok(ReferenceType::Range {
738                    sheet,
739                    start_row,
740                    start_col,
741                    end_row,
742                    end_col,
743                    start_row_abs,
744                    start_col_abs,
745                    end_row_abs,
746                    end_col_abs,
747                })
748            }
749        } else {
750            // Try to parse as a single cell reference
751            match Self::parse_cell_reference(&ref_part) {
752                Ok((col, row, col_abs, row_abs)) => {
753                    if let Some((book_token, sheet_name)) = external_sheet {
754                        Ok(ReferenceType::External(ExternalReference {
755                            raw: reference.to_string(),
756                            book: ExternalBookRef::Token(book_token.to_string()),
757                            sheet: sheet_name.to_string(),
758                            kind: ExternalRefKind::Cell {
759                                row,
760                                col,
761                                row_abs,
762                                col_abs,
763                            },
764                        }))
765                    } else {
766                        Ok(ReferenceType::Cell {
767                            sheet,
768                            row,
769                            col,
770                            row_abs,
771                            col_abs,
772                        })
773                    }
774                }
775                Err(_) => {
776                    // Treat it as a named range
777                    Ok(ReferenceType::NamedRange(reference.to_string()))
778                }
779            }
780        }
781    }
782
783    /// Parse a cell reference like "A1" into (column, row) using byte-based parsing.
784    fn parse_cell_reference(reference: &str) -> Result<(u32, u32, bool, bool), ParsingError> {
785        parse_a1_1based(reference)
786            .map(|(row, col, row_abs, col_abs)| (col, row, col_abs, row_abs))
787            .map_err(|_| {
788                ParsingError::InvalidReference(format!("Invalid cell reference: {reference}"))
789            })
790    }
791
792    /// Convert a column letter (e.g., "A", "BC") to a column number (1-based) using byte operations.
793    pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
794        col_index_from_letters_1based(column)
795            .map_err(|_| ParsingError::InvalidReference(format!("Invalid column: {column}")))
796    }
797
798    /// Convert a column number to a column letter using lookup table for common values.
799    pub(crate) fn number_to_column(num: u32) -> String {
800        if num == 0 {
801            return String::new();
802        }
803        // Use lookup table for common columns (1-702 covers A-ZZ)
804        if num > 0 && num <= 702 {
805            return COLUMN_LOOKUP[(num - 1) as usize].clone();
806        }
807
808        col_letters_from_1based(num).unwrap_or_default()
809    }
810
811    fn format_col(col: u32, abs: bool) -> String {
812        if abs {
813            format!("${}", Self::number_to_column(col))
814        } else {
815            Self::number_to_column(col)
816        }
817    }
818
819    fn format_row(row: u32, abs: bool) -> String {
820        if abs {
821            format!("${row}")
822        } else {
823            row.to_string()
824        }
825    }
826}
827
828impl Display for ReferenceType {
829    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
830        write!(
831            f,
832            "{}",
833            match self {
834                ReferenceType::Cell {
835                    sheet,
836                    row,
837                    col,
838                    row_abs,
839                    col_abs,
840                } => {
841                    let col_str = Self::format_col(*col, *col_abs);
842                    let row_str = Self::format_row(*row, *row_abs);
843
844                    if let Some(sheet_name) = sheet {
845                        if sheet_name_needs_quoting(sheet_name) {
846                            // Escape any single quotes in the sheet name by doubling them
847                            let escaped_name = sheet_name.replace('\'', "''");
848                            format!("'{escaped_name}'!{col_str}{row_str}")
849                        } else {
850                            format!("{sheet_name}!{col_str}{row_str}")
851                        }
852                    } else {
853                        format!("{col_str}{row_str}")
854                    }
855                }
856                ReferenceType::Range {
857                    sheet,
858                    start_row,
859                    start_col,
860                    end_row,
861                    end_col,
862                    start_row_abs,
863                    start_col_abs,
864                    end_row_abs,
865                    end_col_abs,
866                } => {
867                    // Format start reference
868                    let start_ref = match (start_col, start_row) {
869                        (Some(col), Some(row)) => format!(
870                            "{}{}",
871                            Self::format_col(*col, *start_col_abs),
872                            Self::format_row(*row, *start_row_abs)
873                        ),
874                        (Some(col), None) => Self::format_col(*col, *start_col_abs),
875                        (None, Some(row)) => Self::format_row(*row, *start_row_abs),
876                        (None, None) => "".to_string(), // Should not happen in normal usage
877                    };
878
879                    // Format end reference
880                    let end_ref = match (end_col, end_row) {
881                        (Some(col), Some(row)) => format!(
882                            "{}{}",
883                            Self::format_col(*col, *end_col_abs),
884                            Self::format_row(*row, *end_row_abs)
885                        ),
886                        (Some(col), None) => Self::format_col(*col, *end_col_abs),
887                        (None, Some(row)) => Self::format_row(*row, *end_row_abs),
888                        (None, None) => "".to_string(), // Should not happen in normal usage
889                    };
890
891                    let range_part = format!("{start_ref}:{end_ref}");
892
893                    if let Some(sheet_name) = sheet {
894                        if sheet_name_needs_quoting(sheet_name) {
895                            // Escape any single quotes in the sheet name by doubling them
896                            let escaped_name = sheet_name.replace('\'', "''");
897                            format!("'{escaped_name}'!{range_part}")
898                        } else {
899                            format!("{sheet_name}!{range_part}")
900                        }
901                    } else {
902                        range_part
903                    }
904                }
905                ReferenceType::External(ext) => ext.raw.clone(),
906                ReferenceType::Table(table_ref) => {
907                    if let Some(specifier) = &table_ref.specifier {
908                        // For table references, we need to handle column specifiers specially
909                        // to remove leading/trailing whitespace
910                        match specifier {
911                            TableSpecifier::Column(column) => {
912                                format!("{}[{}]", table_ref.name, column.trim())
913                            }
914                            TableSpecifier::ColumnRange(start, end) => {
915                                format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
916                            }
917                            _ => {
918                                // For other specifiers, use the standard formatting
919                                format!("{}[{}]", table_ref.name, specifier)
920                            }
921                        }
922                    } else {
923                        table_ref.name.clone()
924                    }
925                }
926                ReferenceType::NamedRange(name) => name.clone(),
927            }
928        )
929    }
930}
931
932impl TryFrom<&str> for ReferenceType {
933    type Error = ParsingError;
934
935    fn try_from(value: &str) -> Result<Self, Self::Error> {
936        ReferenceType::from_string(value)
937    }
938}
939
940impl FromStr for ReferenceType {
941    type Err = ParsingError;
942
943    fn from_str(s: &str) -> Result<Self, Self::Err> {
944        ReferenceType::from_string(s)
945    }
946}
947
948impl ReferenceType {
949    /// Normalise the reference string (convert to canonical form)
950    pub fn normalise(&self) -> String {
951        format!("{self}")
952    }
953
954    /// Extract a sheet name from a reference using byte operations.
955    fn extract_sheet_name(reference: &str) -> (Option<String>, String) {
956        let bytes = reference.as_bytes();
957        let mut i = 0;
958
959        // Handle quoted sheet names.
960        // Excel escapes a single quote inside a quoted sheet name by doubling it.
961        // Example: 'Bob''s Sheet'!A1
962        if i < bytes.len() && bytes[i] == b'\'' {
963            i += 1;
964            let start = i;
965
966            while i < bytes.len() {
967                if bytes[i] == b'\'' {
968                    // Escaped quote inside sheet name: ''
969                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
970                        i += 2;
971                        continue;
972                    }
973
974                    // Closing quote followed by '!'
975                    if i + 1 < bytes.len() && bytes[i + 1] == b'!' {
976                        let raw = &reference[start..i];
977                        let sheet = raw.replace("''", "'");
978                        let ref_part = String::from(&reference[i + 2..]);
979                        return (Some(sheet), ref_part);
980                    }
981                }
982
983                i += 1;
984            }
985        }
986
987        // Handle unquoted sheet names
988        i = 0;
989        while i < bytes.len() {
990            if bytes[i] == b'!' && i > 0 {
991                let sheet = String::from(&reference[0..i]);
992                let ref_part = String::from(&reference[i + 1..]);
993                return (Some(sheet), ref_part);
994            }
995            i += 1;
996        }
997
998        (None, reference.to_string())
999    }
1000
1001    /// Parse a table reference like "Table1[Column1]" or more complex ones like "Table1[[#All],[Column1]:[Column2]]".
1002    fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
1003        // Find the first '[' to separate table name from specifier
1004        if let Some(bracket_pos) = reference.find('[') {
1005            let table_name = reference[..bracket_pos].trim();
1006            if table_name.is_empty() {
1007                return Err(ParsingError::InvalidReference(reference.to_string()));
1008            }
1009
1010            let specifier_str = &reference[bracket_pos..];
1011            let specifier = Self::parse_table_specifier(specifier_str)?;
1012
1013            Ok(ReferenceType::Table(TableReference {
1014                name: table_name.to_string(),
1015                specifier,
1016            }))
1017        } else {
1018            Err(ParsingError::InvalidReference(reference.to_string()))
1019        }
1020    }
1021
1022    /// Parse a table specifier like "[Column1]" or "[[#All],[Column1]:[Column2]]"
1023    fn parse_table_specifier(specifier_str: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1024        if specifier_str.is_empty() || !specifier_str.starts_with('[') {
1025            return Ok(None);
1026        }
1027
1028        // Find balanced closing bracket
1029        let mut depth = 0;
1030        let mut end_pos = 0;
1031
1032        for (i, c) in specifier_str.chars().enumerate() {
1033            if c == '[' {
1034                depth += 1;
1035            } else if c == ']' {
1036                depth -= 1;
1037                if depth == 0 {
1038                    end_pos = i;
1039                    break;
1040                }
1041            }
1042        }
1043
1044        if depth != 0 || end_pos == 0 {
1045            return Err(ParsingError::InvalidReference(format!(
1046                "Unbalanced brackets in table specifier: {specifier_str}"
1047            )));
1048        }
1049
1050        // Extract content between outermost brackets
1051        let content = &specifier_str[1..end_pos];
1052
1053        // Handle different types of specifiers
1054        if content.is_empty() {
1055            // Empty brackets means the whole table
1056            return Ok(Some(TableSpecifier::All));
1057        }
1058
1059        // Handle special items
1060        if content.starts_with("#") {
1061            return Self::parse_special_item(content);
1062        }
1063
1064        // Handle column references
1065        if !content.contains('[') && !content.contains('#') {
1066            // Check for column range using iterator instead of split().collect()
1067            if let Some(colon_pos) = content.find(':') {
1068                let start = content[..colon_pos].trim();
1069                let end = content[colon_pos + 1..].trim();
1070                return Ok(Some(TableSpecifier::ColumnRange(
1071                    start.to_string(),
1072                    end.to_string(),
1073                )));
1074            } else {
1075                // Single column
1076                return Ok(Some(TableSpecifier::Column(content.trim().to_string())));
1077            }
1078        }
1079
1080        // Handle complex structured references with nested brackets
1081        if content.contains('[') {
1082            return Self::parse_complex_table_specifier(content);
1083        }
1084
1085        // If we can't determine the type, just use the raw specifier
1086        Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1087    }
1088
1089    fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
1090        if reference.starts_with('[') && reference.ends_with(']') {
1091            let inner = &reference[1..reference.len() - 1];
1092            if inner.is_empty() {
1093                return Err(ParsingError::InvalidReference(
1094                    "Empty OpenFormula reference".to_string(),
1095                ));
1096            }
1097
1098            let mut parts = inner.splitn(2, ':');
1099            let start_part_str = parts.next().unwrap();
1100            let end_part_str = parts.next();
1101
1102            let start_part = Self::parse_openformula_part(start_part_str)?;
1103            let end_part = if let Some(part) = end_part_str {
1104                Some(Self::parse_openformula_part(part)?)
1105            } else {
1106                None
1107            };
1108
1109            let sheet = match (&start_part.sheet, &end_part) {
1110                (Some(sheet), Some(end)) => {
1111                    if let Some(end_sheet) = &end.sheet
1112                        && end_sheet != sheet
1113                    {
1114                        return Err(ParsingError::InvalidReference(format!(
1115                            "Mismatched sheets in reference: {sheet} vs {end_sheet}"
1116                        )));
1117                    }
1118                    Some(sheet.clone())
1119                }
1120                (Some(sheet), None) => Some(sheet.clone()),
1121                (None, Some(end)) => end.sheet.clone(),
1122                (None, None) => None,
1123            };
1124
1125            let mut excel_like = String::new();
1126            if let Some(sheet_name) = sheet {
1127                if sheet_name_needs_quoting(&sheet_name) {
1128                    let escaped = sheet_name.replace('\'', "''");
1129                    excel_like.push('\'');
1130                    excel_like.push_str(&escaped);
1131                    excel_like.push('\'');
1132                } else {
1133                    excel_like.push_str(&sheet_name);
1134                }
1135                excel_like.push('!');
1136            }
1137
1138            excel_like.push_str(&start_part.coord);
1139            if let Some(end) = end_part {
1140                excel_like.push(':');
1141                excel_like.push_str(&end.coord);
1142            }
1143
1144            return Self::parse_excel_reference(&excel_like);
1145        }
1146
1147        Err(ParsingError::InvalidReference(format!(
1148            "Unsupported OpenFormula reference: {reference}"
1149        )))
1150    }
1151
1152    fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
1153        let trimmed = part.trim();
1154        if trimmed.is_empty() {
1155            return Err(ParsingError::InvalidReference(
1156                "Empty component in OpenFormula reference".to_string(),
1157            ));
1158        }
1159
1160        if trimmed == "." {
1161            return Err(ParsingError::InvalidReference(
1162                "Incomplete OpenFormula reference component".to_string(),
1163            ));
1164        }
1165
1166        if trimmed.starts_with('[') {
1167            // Nested brackets are not expected here
1168            return Err(ParsingError::InvalidReference(format!(
1169                "Unexpected '[' in OpenFormula reference component: {trimmed}"
1170            )));
1171        }
1172
1173        let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
1174            (None, stripped.trim())
1175        } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
1176            let sheet_part = trimmed[..dot_idx].trim();
1177            let coord_part = trimmed[dot_idx + 1..].trim();
1178            if coord_part.is_empty() {
1179                return Err(ParsingError::InvalidReference(format!(
1180                    "Missing coordinate in OpenFormula reference component: {trimmed}"
1181                )));
1182            }
1183            let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
1184            (Some(sheet_name), coord_part)
1185        } else {
1186            (None, trimmed)
1187        };
1188
1189        let coord = coord_slice.trim_start_matches('.').trim().to_string();
1190
1191        if coord.is_empty() {
1192            return Err(ParsingError::InvalidReference(format!(
1193                "Missing coordinate in OpenFormula reference component: {trimmed}"
1194            )));
1195        }
1196
1197        Ok(OpenFormulaRefPart { sheet, coord })
1198    }
1199
1200    fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
1201        let without_abs = sheet.trim().trim_start_matches('$');
1202
1203        if without_abs.starts_with('\'') {
1204            if without_abs.len() < 2 || !without_abs.ends_with('\'') {
1205                return Err(ParsingError::InvalidReference(format!(
1206                    "Unterminated sheet name in OpenFormula reference: {sheet}"
1207                )));
1208            }
1209            let inner = &without_abs[1..without_abs.len() - 1];
1210            Ok(inner.replace("''", "'"))
1211        } else {
1212            Ok(without_abs.to_string())
1213        }
1214    }
1215
1216    fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
1217        let bytes = part.as_bytes();
1218        let mut i = 0;
1219        let mut in_quotes = false;
1220
1221        while i < bytes.len() {
1222            match bytes[i] {
1223                b'\'' => {
1224                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
1225                        i += 2;
1226                        continue;
1227                    }
1228                    in_quotes = !in_quotes;
1229                    i += 1;
1230                }
1231                b'.' if !in_quotes => return Some(i),
1232                _ => i += 1,
1233            }
1234        }
1235
1236        None
1237    }
1238
1239    /// Parse a special item specifier like "#Headers", "#Data", etc.
1240    fn parse_special_item(content: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1241        match content {
1242            "#All" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::All))),
1243            "#Headers" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Headers))),
1244            "#Data" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Data))),
1245            "#Totals" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Totals))),
1246            "@" => Ok(Some(TableSpecifier::Row(TableRowSpecifier::Current))),
1247            _ => Err(ParsingError::InvalidReference(format!(
1248                "Unknown special item: {content}"
1249            ))),
1250        }
1251    }
1252
1253    /// Parse complex table specifiers with nested brackets
1254    fn parse_complex_table_specifier(
1255        content: &str,
1256    ) -> Result<Option<TableSpecifier>, ParsingError> {
1257        // This is a more complex case like [[#Headers],[Column1]:[Column2]]
1258        // For now, we'll just store the raw specifier and enhance this in the future
1259
1260        // Try to identify common patterns
1261        if content.contains("[#Headers]")
1262            || content.contains("[#All]")
1263            || content.contains("[#Data]")
1264            || content.contains("[#Totals]")
1265            || content.contains("[@]")
1266        {
1267            // This is a combination of specifiers
1268            // Parse them into a vector
1269            let mut specifiers = Vec::new();
1270
1271            // Simple parsing - this would need enhancement for full support
1272            if content.contains("[#Headers]") {
1273                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Headers)));
1274            }
1275            if content.contains("[#Data]") {
1276                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Data)));
1277            }
1278            if content.contains("[#Totals]") {
1279                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Totals)));
1280            }
1281            if content.contains("[#All]") {
1282                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::All)));
1283            }
1284
1285            if !specifiers.is_empty() {
1286                return Ok(Some(TableSpecifier::Combination(specifiers)));
1287            }
1288        }
1289
1290        // Fallback to storing as a column specifier
1291        Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1292    }
1293
1294    /// Get the Excel-style string representation of this reference
1295    pub fn to_excel_string(&self) -> String {
1296        match self {
1297            ReferenceType::Cell {
1298                sheet,
1299                row,
1300                col,
1301                row_abs,
1302                col_abs,
1303            } => {
1304                let col_str = Self::format_col(*col, *col_abs);
1305                let row_str = Self::format_row(*row, *row_abs);
1306                if let Some(s) = sheet {
1307                    if sheet_name_needs_quoting(s) {
1308                        let escaped_name = s.replace('\'', "''");
1309                        format!("'{}'!{}{}", escaped_name, col_str, row_str)
1310                    } else {
1311                        format!("{}!{}{}", s, col_str, row_str)
1312                    }
1313                } else {
1314                    format!("{}{}", col_str, row_str)
1315                }
1316            }
1317            ReferenceType::Range {
1318                sheet,
1319                start_row,
1320                start_col,
1321                end_row,
1322                end_col,
1323                start_row_abs,
1324                start_col_abs,
1325                end_row_abs,
1326                end_col_abs,
1327            } => {
1328                // Format start reference
1329                let start_ref = match (start_col, start_row) {
1330                    (Some(col), Some(row)) => format!(
1331                        "{}{}",
1332                        Self::format_col(*col, *start_col_abs),
1333                        Self::format_row(*row, *start_row_abs)
1334                    ),
1335                    (Some(col), None) => Self::format_col(*col, *start_col_abs),
1336                    (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1337                    (None, None) => "".to_string(), // Should not happen in normal usage
1338                };
1339
1340                // Format end reference
1341                let end_ref = match (end_col, end_row) {
1342                    (Some(col), Some(row)) => format!(
1343                        "{}{}",
1344                        Self::format_col(*col, *end_col_abs),
1345                        Self::format_row(*row, *end_row_abs)
1346                    ),
1347                    (Some(col), None) => Self::format_col(*col, *end_col_abs),
1348                    (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1349                    (None, None) => "".to_string(), // Should not happen in normal usage
1350                };
1351
1352                let range_part = format!("{start_ref}:{end_ref}");
1353
1354                if let Some(s) = sheet {
1355                    if sheet_name_needs_quoting(s) {
1356                        let escaped_name = s.replace('\'', "''");
1357                        format!("'{escaped_name}'!{range_part}")
1358                    } else {
1359                        format!("{s}!{range_part}")
1360                    }
1361                } else {
1362                    range_part
1363                }
1364            }
1365            ReferenceType::External(ext) => ext.raw.clone(),
1366            ReferenceType::Table(table_ref) => {
1367                if let Some(specifier) = &table_ref.specifier {
1368                    format!("{}[{}]", table_ref.name, specifier)
1369                } else {
1370                    table_ref.name.clone()
1371                }
1372            }
1373            ReferenceType::NamedRange(name) => name.clone(),
1374        }
1375    }
1376}
1377
1378/// The different types of AST nodes.
1379#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1380#[derive(Debug, Clone, PartialEq, Hash)]
1381pub enum ASTNodeType {
1382    Literal(LiteralValue),
1383    Reference {
1384        original: String, // Original reference string (preserved for display/debugging)
1385        reference: ReferenceType, // Parsed reference
1386    },
1387    UnaryOp {
1388        op: String,
1389        expr: Box<ASTNode>,
1390    },
1391    BinaryOp {
1392        op: String,
1393        left: Box<ASTNode>,
1394        right: Box<ASTNode>,
1395    },
1396    Function {
1397        name: String,
1398        args: Vec<ASTNode>, // Most functions have <= 4 args
1399    },
1400    Array(Vec<Vec<ASTNode>>), // Most arrays are small
1401}
1402
1403impl Display for ASTNodeType {
1404    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1405        match self {
1406            ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
1407            ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
1408            ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
1409            ASTNodeType::BinaryOp { op, left, right } => {
1410                write!(f, "BinaryOp({op}, {left}, {right})")
1411            }
1412            ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
1413            ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
1414        }
1415    }
1416}
1417
1418/// An AST node represents a parsed formula element
1419#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1420#[derive(Debug, Clone, PartialEq)]
1421pub struct ASTNode {
1422    pub node_type: ASTNodeType,
1423    pub source_token: Option<Token>,
1424    /// True if this AST contains any volatile function calls.
1425    ///
1426    /// This is set by the parser when a volatility classifier is provided.
1427    /// For ASTs constructed manually (e.g., in tests), this defaults to false.
1428    pub contains_volatile: bool,
1429}
1430
1431impl ASTNode {
1432    pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1433        ASTNode {
1434            node_type,
1435            source_token,
1436            contains_volatile: false,
1437        }
1438    }
1439
1440    /// Create an ASTNode while explicitly setting contains_volatile.
1441    pub fn new_with_volatile(
1442        node_type: ASTNodeType,
1443        source_token: Option<Token>,
1444        contains_volatile: bool,
1445    ) -> Self {
1446        ASTNode {
1447            node_type,
1448            source_token,
1449            contains_volatile,
1450        }
1451    }
1452
1453    /// Whether this AST contains any volatile functions.
1454    pub fn contains_volatile(&self) -> bool {
1455        self.contains_volatile
1456    }
1457
1458    pub fn fingerprint(&self) -> u64 {
1459        self.calculate_hash()
1460    }
1461
1462    /// Calculate a hash for this ASTNode
1463    pub fn calculate_hash(&self) -> u64 {
1464        let mut hasher = FormulaHasher::new();
1465        self.hash_node(&mut hasher);
1466        hasher.finish()
1467    }
1468
1469    fn hash_node(&self, hasher: &mut FormulaHasher) {
1470        match &self.node_type {
1471            ASTNodeType::Literal(value) => {
1472                hasher.write(&[1]); // Discriminant for Literal
1473                value.hash(hasher);
1474            }
1475            ASTNodeType::Reference { reference, .. } => {
1476                hasher.write(&[2]); // Discriminant for Reference
1477                reference.hash(hasher);
1478            }
1479            ASTNodeType::UnaryOp { op, expr } => {
1480                hasher.write(&[3]); // Discriminant for UnaryOp
1481                hasher.write(op.as_bytes());
1482                expr.hash_node(hasher);
1483            }
1484            ASTNodeType::BinaryOp { op, left, right } => {
1485                hasher.write(&[4]); // Discriminant for BinaryOp
1486                hasher.write(op.as_bytes());
1487                left.hash_node(hasher);
1488                right.hash_node(hasher);
1489            }
1490            ASTNodeType::Function { name, args } => {
1491                hasher.write(&[5]); // Discriminant for Function
1492                // Use lowercase function name to be case-insensitive
1493                let name_lower = name.to_lowercase();
1494                hasher.write(name_lower.as_bytes());
1495                hasher.write_usize(args.len());
1496                for arg in args {
1497                    arg.hash_node(hasher);
1498                }
1499            }
1500            ASTNodeType::Array(rows) => {
1501                hasher.write(&[6]); // Discriminant for Array
1502                hasher.write_usize(rows.len());
1503                for row in rows {
1504                    hasher.write_usize(row.len());
1505                    for item in row {
1506                        item.hash_node(hasher);
1507                    }
1508                }
1509            }
1510        }
1511    }
1512
1513    pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1514        let mut dependencies = Vec::new();
1515        self.collect_dependencies(&mut dependencies);
1516        dependencies
1517    }
1518
1519    pub fn get_dependency_strings(&self) -> Vec<String> {
1520        self.get_dependencies()
1521            .into_iter()
1522            .map(|dep| format!("{dep}"))
1523            .collect()
1524    }
1525
1526    fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1527        match &self.node_type {
1528            ASTNodeType::Reference { reference, .. } => {
1529                dependencies.push(reference);
1530            }
1531            ASTNodeType::UnaryOp { expr, .. } => {
1532                expr.collect_dependencies(dependencies);
1533            }
1534            ASTNodeType::BinaryOp { left, right, .. } => {
1535                left.collect_dependencies(dependencies);
1536                right.collect_dependencies(dependencies);
1537            }
1538            ASTNodeType::Function { args, .. } => {
1539                for arg in args {
1540                    arg.collect_dependencies(dependencies);
1541                }
1542            }
1543            ASTNodeType::Array(rows) => {
1544                for row in rows {
1545                    for item in row {
1546                        item.collect_dependencies(dependencies);
1547                    }
1548                }
1549            }
1550            _ => {}
1551        }
1552    }
1553
1554    /// Lightweight borrowed view of a reference encountered during AST traversal.
1555    /// This mirrors ReferenceType variants but borrows sheet/name strings to avoid allocation.
1556    pub fn refs(&self) -> RefIter<'_> {
1557        RefIter {
1558            stack: smallvec::smallvec![self],
1559        }
1560    }
1561
1562    /// Visit all references in this AST without allocating intermediates.
1563    pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1564        let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1565        stack.push(self);
1566        while let Some(node) = stack.pop() {
1567            match &node.node_type {
1568                ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1569                ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1570                ASTNodeType::BinaryOp { left, right, .. } => {
1571                    // Push right first so left is visited first (stable-ish order)
1572                    stack.push(right);
1573                    stack.push(left);
1574                }
1575                ASTNodeType::Function { args, .. } => {
1576                    for a in args.iter().rev() {
1577                        stack.push(a);
1578                    }
1579                }
1580                ASTNodeType::Array(rows) => {
1581                    for r in rows.iter().rev() {
1582                        for item in r.iter().rev() {
1583                            stack.push(item);
1584                        }
1585                    }
1586                }
1587                ASTNodeType::Literal(_) => {}
1588            }
1589        }
1590    }
1591
1592    /// Convenience: collect references into a small, inline vector based on a policy.
1593    pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1594        let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1595        self.visit_refs(|rv| match rv {
1596            RefView::Cell {
1597                sheet,
1598                row,
1599                col,
1600                row_abs,
1601                col_abs,
1602            } => out.push(ReferenceType::Cell {
1603                sheet: sheet.map(|s| s.to_string()),
1604                row,
1605                col,
1606                row_abs,
1607                col_abs,
1608            }),
1609            RefView::Range {
1610                sheet,
1611                start_row,
1612                start_col,
1613                end_row,
1614                end_col,
1615                start_row_abs,
1616                start_col_abs,
1617                end_row_abs,
1618                end_col_abs,
1619            } => {
1620                // Optionally expand very small finite ranges into individual cells
1621                if policy.expand_small_ranges
1622                    && let (Some(sr), Some(sc), Some(er), Some(ec)) =
1623                        (start_row, start_col, end_row, end_col)
1624                {
1625                    let rows = er.saturating_sub(sr) + 1;
1626                    let cols = ec.saturating_sub(sc) + 1;
1627                    let area = rows.saturating_mul(cols);
1628                    if area as usize <= policy.range_expansion_limit {
1629                        let row_abs = start_row_abs && end_row_abs;
1630                        let col_abs = start_col_abs && end_col_abs;
1631                        for r in sr..=er {
1632                            for c in sc..=ec {
1633                                out.push(ReferenceType::Cell {
1634                                    sheet: sheet.map(|s| s.to_string()),
1635                                    row: r,
1636                                    col: c,
1637                                    row_abs,
1638                                    col_abs,
1639                                });
1640                            }
1641                        }
1642                        return; // handled
1643                    }
1644                }
1645                out.push(ReferenceType::Range {
1646                    sheet: sheet.map(|s| s.to_string()),
1647                    start_row,
1648                    start_col,
1649                    end_row,
1650                    end_col,
1651                    start_row_abs,
1652                    start_col_abs,
1653                    end_row_abs,
1654                    end_col_abs,
1655                });
1656            }
1657            RefView::External {
1658                raw,
1659                book,
1660                sheet,
1661                kind,
1662            } => out.push(ReferenceType::External(ExternalReference {
1663                raw: raw.to_string(),
1664                book: ExternalBookRef::Token(book.to_string()),
1665                sheet: sheet.to_string(),
1666                kind,
1667            })),
1668            RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
1669                name: name.to_string(),
1670                specifier: specifier.cloned(),
1671            })),
1672            RefView::NamedRange { name } => {
1673                if policy.include_names {
1674                    out.push(ReferenceType::NamedRange(name.to_string()));
1675                }
1676            }
1677        });
1678        out
1679    }
1680}
1681
1682/// A borrowing view over a ReferenceType. Avoids cloning sheet/names while walking.
1683#[derive(Clone, Copy, Debug)]
1684pub enum RefView<'a> {
1685    Cell {
1686        sheet: Option<&'a str>,
1687        row: u32,
1688        col: u32,
1689        row_abs: bool,
1690        col_abs: bool,
1691    },
1692    Range {
1693        sheet: Option<&'a str>,
1694        start_row: Option<u32>,
1695        start_col: Option<u32>,
1696        end_row: Option<u32>,
1697        end_col: Option<u32>,
1698        start_row_abs: bool,
1699        start_col_abs: bool,
1700        end_row_abs: bool,
1701        end_col_abs: bool,
1702    },
1703    External {
1704        raw: &'a str,
1705        book: &'a str,
1706        sheet: &'a str,
1707        kind: ExternalRefKind,
1708    },
1709    Table {
1710        name: &'a str,
1711        specifier: Option<&'a TableSpecifier>,
1712    },
1713    NamedRange {
1714        name: &'a str,
1715    },
1716}
1717
1718impl<'a> From<&'a ReferenceType> for RefView<'a> {
1719    fn from(r: &'a ReferenceType) -> Self {
1720        match r {
1721            ReferenceType::Cell {
1722                sheet,
1723                row,
1724                col,
1725                row_abs,
1726                col_abs,
1727            } => RefView::Cell {
1728                sheet: sheet.as_deref(),
1729                row: *row,
1730                col: *col,
1731                row_abs: *row_abs,
1732                col_abs: *col_abs,
1733            },
1734            ReferenceType::Range {
1735                sheet,
1736                start_row,
1737                start_col,
1738                end_row,
1739                end_col,
1740                start_row_abs,
1741                start_col_abs,
1742                end_row_abs,
1743                end_col_abs,
1744            } => RefView::Range {
1745                sheet: sheet.as_deref(),
1746                start_row: *start_row,
1747                start_col: *start_col,
1748                end_row: *end_row,
1749                end_col: *end_col,
1750                start_row_abs: *start_row_abs,
1751                start_col_abs: *start_col_abs,
1752                end_row_abs: *end_row_abs,
1753                end_col_abs: *end_col_abs,
1754            },
1755            ReferenceType::External(ext) => RefView::External {
1756                raw: ext.raw.as_str(),
1757                book: ext.book.token(),
1758                sheet: ext.sheet.as_str(),
1759                kind: ext.kind,
1760            },
1761            ReferenceType::Table(tr) => RefView::Table {
1762                name: tr.name.as_str(),
1763                specifier: tr.specifier.as_ref(),
1764            },
1765            ReferenceType::NamedRange(name) => RefView::NamedRange { name },
1766        }
1767    }
1768}
1769
1770/// Iterator over RefView for an AST, implemented via an explicit stack to avoid recursion allocation.
1771pub struct RefIter<'a> {
1772    stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
1773}
1774
1775impl<'a> Iterator for RefIter<'a> {
1776    type Item = RefView<'a>;
1777    fn next(&mut self) -> Option<Self::Item> {
1778        while let Some(node) = self.stack.pop() {
1779            match &node.node_type {
1780                ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
1781                ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
1782                ASTNodeType::BinaryOp { left, right, .. } => {
1783                    self.stack.push(right);
1784                    self.stack.push(left);
1785                }
1786                ASTNodeType::Function { args, .. } => {
1787                    for a in args.iter().rev() {
1788                        self.stack.push(a);
1789                    }
1790                }
1791                ASTNodeType::Array(rows) => {
1792                    for r in rows.iter().rev() {
1793                        for item in r.iter().rev() {
1794                            self.stack.push(item);
1795                        }
1796                    }
1797                }
1798                ASTNodeType::Literal(_) => {}
1799            }
1800        }
1801        None
1802    }
1803}
1804
1805/// Policy controlling how references are collected.
1806#[derive(Debug, Clone)]
1807pub struct CollectPolicy {
1808    pub expand_small_ranges: bool,
1809    pub range_expansion_limit: usize,
1810    pub include_names: bool,
1811}
1812
1813impl Default for CollectPolicy {
1814    fn default() -> Self {
1815        Self {
1816            expand_small_ranges: false,
1817            range_expansion_limit: 0,
1818            include_names: true,
1819        }
1820    }
1821}
1822
1823impl Display for ASTNode {
1824    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1825        write!(f, "{}", self.node_type)
1826    }
1827}
1828
1829impl std::hash::Hash for ASTNode {
1830    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1831        let hash = self.calculate_hash();
1832        state.write_u64(hash);
1833    }
1834}
1835
1836/// A parser for converting tokens into an AST.
1837pub struct Parser {
1838    tokens: Arc<[Token]>,
1839    position: usize,
1840    /// Optional classifier to determine whether a function name is volatile.
1841    volatility_classifier: Option<VolatilityClassifierBox>,
1842    dialect: FormulaDialect,
1843}
1844
1845impl TryFrom<&str> for Parser {
1846    type Error = TokenizerError;
1847
1848    fn try_from(formula: &str) -> Result<Self, Self::Error> {
1849        let tokens = Tokenizer::new(formula)?.items;
1850        Ok(Self::new(tokens, false))
1851    }
1852}
1853
1854impl TryFrom<String> for Parser {
1855    type Error = TokenizerError;
1856
1857    fn try_from(formula: String) -> Result<Self, Self::Error> {
1858        Self::try_from(formula.as_str())
1859    }
1860}
1861
1862impl Parser {
1863    pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
1864        Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
1865    }
1866
1867    pub fn new_with_dialect(
1868        mut tokens: Vec<Token>,
1869        include_whitespace: bool,
1870        dialect: FormulaDialect,
1871    ) -> Self {
1872        if !include_whitespace {
1873            tokens.retain(|t| t.token_type != TokenType::Whitespace);
1874        }
1875
1876        Parser {
1877            tokens: Arc::from(tokens.into_boxed_slice()),
1878            position: 0,
1879            volatility_classifier: None,
1880            dialect,
1881        }
1882    }
1883
1884    pub fn try_from_formula(formula: &str) -> Result<Self, TokenizerError> {
1885        let tokens = Tokenizer::new(formula)?.items;
1886        Ok(Self::new(tokens, false))
1887    }
1888
1889    /// Provide a function-volatility classifier for this parser.
1890    /// If set, the parser will annotate ASTs with a contains_volatile bit.
1891    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1892    where
1893        F: Fn(&str) -> bool + Send + Sync + 'static,
1894    {
1895        self.volatility_classifier = Some(Box::new(f));
1896        self
1897    }
1898
1899    /// Convenience constructor to set a classifier alongside other options.
1900    pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
1901    where
1902        F: Fn(&str) -> bool + Send + Sync + 'static,
1903    {
1904        Self::new(tokens, include_whitespace).with_volatility_classifier(f)
1905    }
1906
1907    pub fn new_with_classifier_and_dialect<F>(
1908        tokens: Vec<Token>,
1909        include_whitespace: bool,
1910        dialect: FormulaDialect,
1911        f: F,
1912    ) -> Self
1913    where
1914        F: Fn(&str) -> bool + Send + Sync + 'static,
1915    {
1916        Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
1917    }
1918
1919    fn skip_whitespace(&mut self) {
1920        while self.position < self.tokens.len()
1921            && self.tokens[self.position].token_type == TokenType::Whitespace
1922        {
1923            self.position += 1;
1924        }
1925    }
1926
1927    /// Parse the tokens into an AST.
1928    pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
1929        if self.tokens.is_empty() {
1930            return Err(ParserError {
1931                message: "No tokens to parse".to_string(),
1932                position: None,
1933            });
1934        }
1935
1936        self.skip_whitespace();
1937        if self.position >= self.tokens.len() {
1938            return Err(ParserError {
1939                message: "No tokens to parse".to_string(),
1940                position: None,
1941            });
1942        }
1943
1944        // Check for literal formula (doesn't start with '=')
1945        if self.tokens[self.position].token_type == TokenType::Literal {
1946            let token = self.tokens[self.position].clone();
1947            self.position += 1;
1948            self.skip_whitespace();
1949            if self.position < self.tokens.len() {
1950                return Err(ParserError {
1951                    message: format!(
1952                        "Unexpected token at position {}: {:?}",
1953                        self.position, self.tokens[self.position]
1954                    ),
1955                    position: Some(self.position),
1956                });
1957            }
1958            return Ok(ASTNode::new(
1959                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
1960                Some(token),
1961            ));
1962        }
1963
1964        let ast = self.parse_expression()?;
1965        self.skip_whitespace();
1966        if self.position < self.tokens.len() {
1967            return Err(ParserError {
1968                message: format!(
1969                    "Unexpected token at position {}: {:?}",
1970                    self.position, self.tokens[self.position]
1971                ),
1972                position: Some(self.position),
1973            });
1974        }
1975        Ok(ast)
1976    }
1977
1978    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
1979        self.parse_bp(0)
1980    }
1981
1982    // Pratt-style precedence parser. `min_precedence` is the minimum binding power
1983    // an operator must have to be consumed at this level.
1984    fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
1985        let mut left = self.parse_prefix()?;
1986
1987        loop {
1988            self.skip_whitespace();
1989            if self.position >= self.tokens.len() {
1990                break;
1991            }
1992
1993            // Postfix operators (e.g. percent).
1994            if self.tokens[self.position].token_type == TokenType::OpPostfix {
1995                let (precedence, _) = self.tokens[self.position]
1996                    .get_precedence()
1997                    .unwrap_or((0, Associativity::Left));
1998                if precedence < min_precedence {
1999                    break;
2000                }
2001
2002                let op_token = self.tokens[self.position].clone();
2003                self.position += 1;
2004                let contains_volatile = left.contains_volatile;
2005                left = ASTNode::new_with_volatile(
2006                    ASTNodeType::UnaryOp {
2007                        op: op_token.value.clone(),
2008                        expr: Box::new(left),
2009                    },
2010                    Some(op_token),
2011                    contains_volatile,
2012                );
2013                continue;
2014            }
2015
2016            let token = &self.tokens[self.position];
2017            if token.token_type != TokenType::OpInfix {
2018                break;
2019            }
2020
2021            let (precedence, associativity) =
2022                token.get_precedence().unwrap_or((0, Associativity::Left));
2023            if precedence < min_precedence {
2024                break;
2025            }
2026
2027            let op_token = self.tokens[self.position].clone();
2028            self.position += 1;
2029
2030            let next_min_precedence = if associativity == Associativity::Left {
2031                precedence + 1
2032            } else {
2033                precedence
2034            };
2035
2036            let right = self.parse_bp(next_min_precedence)?;
2037            let contains_volatile = left.contains_volatile || right.contains_volatile;
2038            left = ASTNode::new_with_volatile(
2039                ASTNodeType::BinaryOp {
2040                    op: op_token.value.clone(),
2041                    left: Box::new(left),
2042                    right: Box::new(right),
2043                },
2044                Some(op_token),
2045                contains_volatile,
2046            );
2047        }
2048
2049        Ok(left)
2050    }
2051
2052    fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2053        self.skip_whitespace();
2054        if self.position < self.tokens.len()
2055            && self.tokens[self.position].token_type == TokenType::OpPrefix
2056        {
2057            let op_token = self.tokens[self.position].clone();
2058            self.position += 1;
2059
2060            // Prefix unary binds looser than exponent, so parse the RHS with
2061            // min_precedence equal to unary's precedence.
2062            let (precedence, _) = op_token
2063                .get_precedence()
2064                .unwrap_or((0, Associativity::Right));
2065
2066            let expr = self.parse_bp(precedence)?;
2067            let contains_volatile = expr.contains_volatile;
2068            return Ok(ASTNode::new_with_volatile(
2069                ASTNodeType::UnaryOp {
2070                    op: op_token.value.clone(),
2071                    expr: Box::new(expr),
2072                },
2073                Some(op_token),
2074                contains_volatile,
2075            ));
2076        }
2077
2078        self.parse_primary()
2079    }
2080
2081    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2082        self.skip_whitespace();
2083        if self.position >= self.tokens.len() {
2084            return Err(ParserError {
2085                message: "Unexpected end of tokens".to_string(),
2086                position: Some(self.position),
2087            });
2088        }
2089
2090        let token = &self.tokens[self.position];
2091        match token.token_type {
2092            TokenType::Operand => {
2093                let operand_token = self.tokens[self.position].clone();
2094                self.position += 1;
2095                self.parse_operand(operand_token)
2096            }
2097            TokenType::Func => {
2098                let func_token = self.tokens[self.position].clone();
2099                self.position += 1;
2100                self.parse_function(func_token)
2101            }
2102            TokenType::Paren if token.subtype == TokenSubType::Open => {
2103                self.position += 1;
2104                let expr = self.parse_expression()?;
2105                if self.position >= self.tokens.len()
2106                    || self.tokens[self.position].token_type != TokenType::Paren
2107                    || self.tokens[self.position].subtype != TokenSubType::Close
2108                {
2109                    return Err(ParserError {
2110                        message: "Expected closing parenthesis".to_string(),
2111                        position: Some(self.position),
2112                    });
2113                }
2114                self.position += 1;
2115                Ok(expr)
2116            }
2117            TokenType::Array if token.subtype == TokenSubType::Open => {
2118                self.position += 1;
2119                self.parse_array()
2120            }
2121            _ => Err(ParserError {
2122                message: format!("Unexpected token: {token:?}"),
2123                position: Some(self.position),
2124            }),
2125        }
2126    }
2127
2128    fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
2129        match token.subtype {
2130            TokenSubType::Number => {
2131                let value = token.value.parse::<f64>().map_err(|_| ParserError {
2132                    message: format!("Invalid number: {}", token.value),
2133                    position: Some(self.position),
2134                })?;
2135                Ok(ASTNode::new(
2136                    ASTNodeType::Literal(LiteralValue::Number(value)),
2137                    Some(token),
2138                ))
2139            }
2140            TokenSubType::Text => {
2141                // Strip surrounding quotes from text literals
2142                let mut text = token.value.clone();
2143                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2144                    text = text[1..text.len() - 1].to_string();
2145                    // Handle escaped quotes
2146                    text = text.replace("\"\"", "\"");
2147                }
2148                Ok(ASTNode::new(
2149                    ASTNodeType::Literal(LiteralValue::Text(text)),
2150                    Some(token),
2151                ))
2152            }
2153            TokenSubType::Logical => {
2154                let value = token.value.to_uppercase() == "TRUE";
2155                Ok(ASTNode::new(
2156                    ASTNodeType::Literal(LiteralValue::Boolean(value)),
2157                    Some(token),
2158                ))
2159            }
2160            TokenSubType::Error => {
2161                let error = ExcelError::from_error_string(&token.value);
2162                Ok(ASTNode::new(
2163                    ASTNodeType::Literal(LiteralValue::Error(error)),
2164                    Some(token),
2165                ))
2166            }
2167            TokenSubType::Range => {
2168                let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
2169                    .map_err(|e| ParserError {
2170                        message: format!("Invalid reference '{}': {}", token.value, e),
2171                        position: Some(self.position),
2172                    })?;
2173                Ok(ASTNode::new(
2174                    ASTNodeType::Reference {
2175                        original: token.value.clone(),
2176                        reference,
2177                    },
2178                    Some(token),
2179                ))
2180            }
2181            _ => Err(ParserError {
2182                message: format!("Unexpected operand subtype: {:?}", token.subtype),
2183                position: Some(self.position),
2184            }),
2185        }
2186    }
2187
2188    fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
2189        let name = func_token.value[..func_token.value.len() - 1].to_string();
2190        let args = self.parse_function_arguments()?;
2191        // Determine volatility for this function
2192        let this_is_volatile = self
2193            .volatility_classifier
2194            .as_ref()
2195            .map(|f| f(name.as_str()))
2196            .unwrap_or(false);
2197        let args_volatile = args.iter().any(|a| a.contains_volatile);
2198
2199        Ok(ASTNode::new_with_volatile(
2200            ASTNodeType::Function { name, args },
2201            Some(func_token),
2202            this_is_volatile || args_volatile,
2203        ))
2204    }
2205
2206    /// Parse function arguments.
2207    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2208        let mut args = Vec::new();
2209
2210        // Check for closing parenthesis (empty arguments)
2211        if self.position < self.tokens.len()
2212            && self.tokens[self.position].token_type == TokenType::Func
2213            && self.tokens[self.position].subtype == TokenSubType::Close
2214        {
2215            self.position += 1;
2216            return Ok(args);
2217        }
2218
2219        // Handle optional arguments (consecutive separators)
2220        // Check if we start with a separator (empty first argument)
2221        if self.position < self.tokens.len()
2222            && self.tokens[self.position].token_type == TokenType::Sep
2223            && self.tokens[self.position].subtype == TokenSubType::Arg
2224        {
2225            // Empty first argument - represented as empty text literal for compatibility
2226            args.push(ASTNode::new(
2227                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2228                None,
2229            ));
2230            self.position += 1;
2231        } else {
2232            // Parse first argument
2233            args.push(self.parse_expression()?);
2234        }
2235
2236        // Parse remaining arguments
2237        while self.position < self.tokens.len() {
2238            let token = &self.tokens[self.position];
2239
2240            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2241                self.position += 1;
2242                // Check for consecutive separators (empty argument)
2243                if self.position < self.tokens.len() {
2244                    let next_token = &self.tokens[self.position];
2245                    if next_token.token_type == TokenType::Sep
2246                        && next_token.subtype == TokenSubType::Arg
2247                    {
2248                        // Empty argument - represented as empty text literal for compatibility
2249                        args.push(ASTNode::new(
2250                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2251                            None,
2252                        ));
2253                    } else if next_token.token_type == TokenType::Func
2254                        && next_token.subtype == TokenSubType::Close
2255                    {
2256                        // Empty last argument
2257                        args.push(ASTNode::new(
2258                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2259                            None,
2260                        ));
2261                        self.position += 1;
2262                        break;
2263                    } else {
2264                        args.push(self.parse_expression()?);
2265                    }
2266                } else {
2267                    // Trailing separator at end of formula
2268                    args.push(ASTNode::new(
2269                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2270                        None,
2271                    ));
2272                }
2273            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2274                self.position += 1;
2275                break;
2276            } else {
2277                return Err(ParserError {
2278                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2279                    position: Some(self.position),
2280                });
2281            }
2282        }
2283
2284        Ok(args)
2285    }
2286
2287    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2288        let mut rows = Vec::new();
2289        let mut current_row = Vec::new();
2290
2291        // Check for empty array
2292        if self.position < self.tokens.len()
2293            && self.tokens[self.position].token_type == TokenType::Array
2294            && self.tokens[self.position].subtype == TokenSubType::Close
2295        {
2296            self.position += 1;
2297            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2298        }
2299
2300        // Parse first element
2301        current_row.push(self.parse_expression()?);
2302
2303        while self.position < self.tokens.len() {
2304            let token = &self.tokens[self.position];
2305
2306            if token.token_type == TokenType::Sep {
2307                if token.subtype == TokenSubType::Arg {
2308                    // Column separator
2309                    self.position += 1;
2310                    current_row.push(self.parse_expression()?);
2311                } else if token.subtype == TokenSubType::Row {
2312                    // Row separator
2313                    self.position += 1;
2314                    rows.push(current_row);
2315                    current_row = vec![self.parse_expression()?];
2316                }
2317            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2318                self.position += 1;
2319                rows.push(current_row);
2320                break;
2321            } else {
2322                return Err(ParserError {
2323                    message: format!("Unexpected token in array: {token:?}"),
2324                    position: Some(self.position),
2325                });
2326            }
2327        }
2328
2329        // Array volatility is the OR of element volatility
2330        let contains_volatile = rows
2331            .iter()
2332            .flat_map(|r| r.iter())
2333            .any(|n| n.contains_volatile);
2334        Ok(ASTNode::new_with_volatile(
2335            ASTNodeType::Array(rows),
2336            None,
2337            contains_volatile,
2338        ))
2339    }
2340}
2341
2342impl From<TokenizerError> for ParserError {
2343    fn from(err: TokenizerError) -> Self {
2344        ParserError {
2345            message: err.message,
2346            position: Some(err.pos),
2347        }
2348    }
2349}
2350
2351struct SpanParser<'a> {
2352    source: &'a str,
2353    tokens: &'a [crate::tokenizer::TokenSpan],
2354    position: usize,
2355    volatility_classifier: Option<VolatilityClassifierBox>,
2356    dialect: FormulaDialect,
2357}
2358
2359impl<'a> SpanParser<'a> {
2360    fn new(
2361        source: &'a str,
2362        tokens: &'a [crate::tokenizer::TokenSpan],
2363        dialect: FormulaDialect,
2364    ) -> Self {
2365        SpanParser {
2366            source,
2367            tokens,
2368            position: 0,
2369            volatility_classifier: None,
2370            dialect,
2371        }
2372    }
2373
2374    fn with_volatility_classifier<F>(mut self, f: F) -> Self
2375    where
2376        F: Fn(&str) -> bool + Send + Sync + 'static,
2377    {
2378        self.volatility_classifier = Some(Box::new(f));
2379        self
2380    }
2381
2382    fn skip_whitespace(&mut self) {
2383        while self.position < self.tokens.len()
2384            && self.tokens[self.position].token_type == TokenType::Whitespace
2385        {
2386            self.position += 1;
2387        }
2388    }
2389
2390    fn span_value(&self, span: &crate::tokenizer::TokenSpan) -> &str {
2391        &self.source[span.start..span.end]
2392    }
2393
2394    fn span_to_token(&self, span: &crate::tokenizer::TokenSpan) -> Token {
2395        Token::new_with_span(
2396            self.span_value(span).to_string(),
2397            span.token_type,
2398            span.subtype,
2399            span.start,
2400            span.end,
2401        )
2402    }
2403
2404    fn span_precedence(&self, span: &crate::tokenizer::TokenSpan) -> Option<(u8, Associativity)> {
2405        if !matches!(
2406            span.token_type,
2407            TokenType::OpPrefix | TokenType::OpInfix | TokenType::OpPostfix
2408        ) {
2409            return None;
2410        }
2411
2412        let op = if span.token_type == TokenType::OpPrefix {
2413            "u"
2414        } else {
2415            self.span_value(span)
2416        };
2417
2418        match op {
2419            ":" | " " | "," => Some((8, Associativity::Left)),
2420            "%" => Some((7, Associativity::Left)),
2421            "^" => Some((6, Associativity::Right)),
2422            "u" => Some((5, Associativity::Right)),
2423            "*" | "/" => Some((4, Associativity::Left)),
2424            "+" | "-" => Some((3, Associativity::Left)),
2425            "&" => Some((2, Associativity::Left)),
2426            "=" | "<" | ">" | "<=" | ">=" | "<>" => Some((1, Associativity::Left)),
2427            _ => None,
2428        }
2429    }
2430
2431    fn parse(&mut self) -> Result<ASTNode, ParserError> {
2432        if self.tokens.is_empty() {
2433            return Err(ParserError {
2434                message: "No tokens to parse".to_string(),
2435                position: None,
2436            });
2437        }
2438
2439        self.skip_whitespace();
2440        if self.position >= self.tokens.len() {
2441            return Err(ParserError {
2442                message: "No tokens to parse".to_string(),
2443                position: None,
2444            });
2445        }
2446
2447        if self.tokens[self.position].token_type == TokenType::Literal {
2448            let span = self.tokens[self.position];
2449            self.position += 1;
2450            self.skip_whitespace();
2451            if self.position < self.tokens.len() {
2452                return Err(ParserError {
2453                    message: format!(
2454                        "Unexpected token at position {}: {:?}",
2455                        self.position, self.tokens[self.position]
2456                    ),
2457                    position: Some(self.position),
2458                });
2459            }
2460
2461            let token = self.span_to_token(&span);
2462            return Ok(ASTNode::new(
2463                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2464                Some(token),
2465            ));
2466        }
2467
2468        let ast = self.parse_expression()?;
2469        self.skip_whitespace();
2470        if self.position < self.tokens.len() {
2471            return Err(ParserError {
2472                message: format!(
2473                    "Unexpected token at position {}: {:?}",
2474                    self.position, self.tokens[self.position]
2475                ),
2476                position: Some(self.position),
2477            });
2478        }
2479        Ok(ast)
2480    }
2481
2482    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2483        self.parse_bp(0)
2484    }
2485
2486    fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2487        let mut left = self.parse_prefix()?;
2488
2489        loop {
2490            self.skip_whitespace();
2491            if self.position >= self.tokens.len() {
2492                break;
2493            }
2494
2495            if self.tokens[self.position].token_type == TokenType::OpPostfix {
2496                let (precedence, _) = self
2497                    .span_precedence(&self.tokens[self.position])
2498                    .unwrap_or((0, Associativity::Left));
2499                if precedence < min_precedence {
2500                    break;
2501                }
2502
2503                let op_span = self.tokens[self.position];
2504                self.position += 1;
2505                let op_token = self.span_to_token(&op_span);
2506                let contains_volatile = left.contains_volatile;
2507                left = ASTNode::new_with_volatile(
2508                    ASTNodeType::UnaryOp {
2509                        op: op_token.value.clone(),
2510                        expr: Box::new(left),
2511                    },
2512                    Some(op_token),
2513                    contains_volatile,
2514                );
2515                continue;
2516            }
2517
2518            let token = &self.tokens[self.position];
2519            if token.token_type != TokenType::OpInfix {
2520                break;
2521            }
2522
2523            let (precedence, associativity) = self
2524                .span_precedence(token)
2525                .unwrap_or((0, Associativity::Left));
2526            if precedence < min_precedence {
2527                break;
2528            }
2529
2530            let op_span = self.tokens[self.position];
2531            self.position += 1;
2532
2533            let next_min_precedence = if associativity == Associativity::Left {
2534                precedence + 1
2535            } else {
2536                precedence
2537            };
2538
2539            let right = self.parse_bp(next_min_precedence)?;
2540            let op_token = self.span_to_token(&op_span);
2541            let contains_volatile = left.contains_volatile || right.contains_volatile;
2542            left = ASTNode::new_with_volatile(
2543                ASTNodeType::BinaryOp {
2544                    op: op_token.value.clone(),
2545                    left: Box::new(left),
2546                    right: Box::new(right),
2547                },
2548                Some(op_token),
2549                contains_volatile,
2550            );
2551        }
2552
2553        Ok(left)
2554    }
2555
2556    fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2557        self.skip_whitespace();
2558        if self.position < self.tokens.len()
2559            && self.tokens[self.position].token_type == TokenType::OpPrefix
2560        {
2561            let op_span = self.tokens[self.position];
2562            self.position += 1;
2563
2564            let (precedence, _) = self
2565                .span_precedence(&op_span)
2566                .unwrap_or((0, Associativity::Right));
2567
2568            let expr = self.parse_bp(precedence)?;
2569            let op_token = self.span_to_token(&op_span);
2570            let contains_volatile = expr.contains_volatile;
2571            return Ok(ASTNode::new_with_volatile(
2572                ASTNodeType::UnaryOp {
2573                    op: op_token.value.clone(),
2574                    expr: Box::new(expr),
2575                },
2576                Some(op_token),
2577                contains_volatile,
2578            ));
2579        }
2580
2581        self.parse_primary()
2582    }
2583
2584    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2585        self.skip_whitespace();
2586        if self.position >= self.tokens.len() {
2587            return Err(ParserError {
2588                message: "Unexpected end of tokens".to_string(),
2589                position: Some(self.position),
2590            });
2591        }
2592
2593        let token = &self.tokens[self.position];
2594        match token.token_type {
2595            TokenType::Operand => {
2596                let span = self.tokens[self.position];
2597                self.position += 1;
2598                self.parse_operand(span)
2599            }
2600            TokenType::Func => {
2601                let span = self.tokens[self.position];
2602                self.position += 1;
2603                self.parse_function(span)
2604            }
2605            TokenType::Paren if token.subtype == TokenSubType::Open => {
2606                self.position += 1;
2607                let expr = self.parse_expression()?;
2608                self.skip_whitespace();
2609                if self.position >= self.tokens.len()
2610                    || self.tokens[self.position].token_type != TokenType::Paren
2611                    || self.tokens[self.position].subtype != TokenSubType::Close
2612                {
2613                    return Err(ParserError {
2614                        message: "Expected closing parenthesis".to_string(),
2615                        position: Some(self.position),
2616                    });
2617                }
2618                self.position += 1;
2619                Ok(expr)
2620            }
2621            TokenType::Array if token.subtype == TokenSubType::Open => {
2622                self.position += 1;
2623                self.parse_array()
2624            }
2625            _ => Err(ParserError {
2626                message: format!("Unexpected token: {token:?}"),
2627                position: Some(self.position),
2628            }),
2629        }
2630    }
2631
2632    fn parse_operand(&mut self, span: crate::tokenizer::TokenSpan) -> Result<ASTNode, ParserError> {
2633        let value = self.span_value(&span);
2634        let token = self.span_to_token(&span);
2635
2636        match span.subtype {
2637            TokenSubType::Number => {
2638                let value = value.parse::<f64>().map_err(|_| ParserError {
2639                    message: format!("Invalid number: {value}"),
2640                    position: Some(self.position),
2641                })?;
2642                Ok(ASTNode::new(
2643                    ASTNodeType::Literal(LiteralValue::Number(value)),
2644                    Some(token),
2645                ))
2646            }
2647            TokenSubType::Text => {
2648                let mut text = value.to_string();
2649                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2650                    text = text[1..text.len() - 1].to_string();
2651                    text = text.replace("\"\"", "\"");
2652                }
2653                Ok(ASTNode::new(
2654                    ASTNodeType::Literal(LiteralValue::Text(text)),
2655                    Some(token),
2656                ))
2657            }
2658            TokenSubType::Logical => {
2659                let v = value.to_uppercase() == "TRUE";
2660                Ok(ASTNode::new(
2661                    ASTNodeType::Literal(LiteralValue::Boolean(v)),
2662                    Some(token),
2663                ))
2664            }
2665            TokenSubType::Error => {
2666                let error = ExcelError::from_error_string(value);
2667                Ok(ASTNode::new(
2668                    ASTNodeType::Literal(LiteralValue::Error(error)),
2669                    Some(token),
2670                ))
2671            }
2672            TokenSubType::Range => {
2673                let reference = ReferenceType::from_string_with_dialect(value, self.dialect)
2674                    .map_err(|e| ParserError {
2675                        message: format!("Invalid reference '{value}': {e}"),
2676                        position: Some(self.position),
2677                    })?;
2678                Ok(ASTNode::new(
2679                    ASTNodeType::Reference {
2680                        original: value.to_string(),
2681                        reference,
2682                    },
2683                    Some(token),
2684                ))
2685            }
2686            _ => Err(ParserError {
2687                message: format!("Unexpected operand subtype: {:?}", span.subtype),
2688                position: Some(self.position),
2689            }),
2690        }
2691    }
2692
2693    fn parse_function(
2694        &mut self,
2695        func_span: crate::tokenizer::TokenSpan,
2696    ) -> Result<ASTNode, ParserError> {
2697        let func_value = self.span_value(&func_span);
2698        if func_value.is_empty() {
2699            return Err(ParserError {
2700                message: "Invalid function token".to_string(),
2701                position: Some(self.position),
2702            });
2703        }
2704        let name = func_value[..func_value.len() - 1].to_string();
2705        let args = self.parse_function_arguments()?;
2706
2707        let this_is_volatile = self
2708            .volatility_classifier
2709            .as_ref()
2710            .map(|f| f(name.as_str()))
2711            .unwrap_or(false);
2712        let args_volatile = args.iter().any(|a| a.contains_volatile);
2713
2714        let func_token = self.span_to_token(&func_span);
2715        Ok(ASTNode::new_with_volatile(
2716            ASTNodeType::Function { name, args },
2717            Some(func_token),
2718            this_is_volatile || args_volatile,
2719        ))
2720    }
2721
2722    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2723        let mut args = Vec::new();
2724
2725        self.skip_whitespace();
2726        if self.position < self.tokens.len()
2727            && self.tokens[self.position].token_type == TokenType::Func
2728            && self.tokens[self.position].subtype == TokenSubType::Close
2729        {
2730            self.position += 1;
2731            return Ok(args);
2732        }
2733
2734        self.skip_whitespace();
2735        if self.position < self.tokens.len()
2736            && self.tokens[self.position].token_type == TokenType::Sep
2737            && self.tokens[self.position].subtype == TokenSubType::Arg
2738        {
2739            args.push(ASTNode::new(
2740                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2741                None,
2742            ));
2743            self.position += 1;
2744        } else {
2745            args.push(self.parse_expression()?);
2746        }
2747
2748        while self.position < self.tokens.len() {
2749            self.skip_whitespace();
2750            if self.position >= self.tokens.len() {
2751                break;
2752            }
2753
2754            let token = &self.tokens[self.position];
2755            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2756                self.position += 1;
2757                self.skip_whitespace();
2758                if self.position < self.tokens.len() {
2759                    let next_token = &self.tokens[self.position];
2760                    if next_token.token_type == TokenType::Sep
2761                        && next_token.subtype == TokenSubType::Arg
2762                    {
2763                        args.push(ASTNode::new(
2764                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2765                            None,
2766                        ));
2767                    } else if next_token.token_type == TokenType::Func
2768                        && next_token.subtype == TokenSubType::Close
2769                    {
2770                        args.push(ASTNode::new(
2771                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2772                            None,
2773                        ));
2774                        self.position += 1;
2775                        break;
2776                    } else {
2777                        args.push(self.parse_expression()?);
2778                    }
2779                } else {
2780                    args.push(ASTNode::new(
2781                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2782                        None,
2783                    ));
2784                }
2785            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2786                self.position += 1;
2787                break;
2788            } else {
2789                return Err(ParserError {
2790                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2791                    position: Some(self.position),
2792                });
2793            }
2794        }
2795
2796        Ok(args)
2797    }
2798
2799    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2800        let mut rows = Vec::new();
2801        let mut current_row = Vec::new();
2802
2803        self.skip_whitespace();
2804        if self.position < self.tokens.len()
2805            && self.tokens[self.position].token_type == TokenType::Array
2806            && self.tokens[self.position].subtype == TokenSubType::Close
2807        {
2808            self.position += 1;
2809            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2810        }
2811
2812        current_row.push(self.parse_expression()?);
2813
2814        while self.position < self.tokens.len() {
2815            self.skip_whitespace();
2816            if self.position >= self.tokens.len() {
2817                break;
2818            }
2819            let token = &self.tokens[self.position];
2820
2821            if token.token_type == TokenType::Sep {
2822                if token.subtype == TokenSubType::Arg {
2823                    self.position += 1;
2824                    current_row.push(self.parse_expression()?);
2825                } else if token.subtype == TokenSubType::Row {
2826                    self.position += 1;
2827                    rows.push(current_row);
2828                    current_row = vec![self.parse_expression()?];
2829                }
2830            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2831                self.position += 1;
2832                rows.push(current_row);
2833                break;
2834            } else {
2835                return Err(ParserError {
2836                    message: format!("Unexpected token in array: {token:?}"),
2837                    position: Some(self.position),
2838                });
2839            }
2840        }
2841
2842        let contains_volatile = rows
2843            .iter()
2844            .flat_map(|r| r.iter())
2845            .any(|n| n.contains_volatile);
2846
2847        Ok(ASTNode::new_with_volatile(
2848            ASTNodeType::Array(rows),
2849            None,
2850            contains_volatile,
2851        ))
2852    }
2853}
2854
2855/// Normalise a reference string to its canonical form
2856pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
2857    let ref_type = ReferenceType::from_string(reference)?;
2858    Ok(ref_type.to_string())
2859}
2860
2861pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
2862    parse_with_dialect(formula, FormulaDialect::Excel)
2863}
2864
2865pub fn parse_with_dialect<T: AsRef<str>>(
2866    formula: T,
2867    dialect: FormulaDialect,
2868) -> Result<ASTNode, ParserError> {
2869    let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2870    let mut parser = SpanParser::new(formula.as_ref(), &spans, dialect);
2871    parser.parse()
2872}
2873
2874/// Parse a single formula and annotate volatility using the provided classifier.
2875/// This is a convenience wrapper around `Parser::new_with_classifier`.
2876pub fn parse_with_volatility_classifier<T, F>(
2877    formula: T,
2878    classifier: F,
2879) -> Result<ASTNode, ParserError>
2880where
2881    T: AsRef<str>,
2882    F: Fn(&str) -> bool + Send + Sync + 'static,
2883{
2884    parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
2885}
2886
2887pub fn parse_with_dialect_and_volatility_classifier<T, F>(
2888    formula: T,
2889    dialect: FormulaDialect,
2890    classifier: F,
2891) -> Result<ASTNode, ParserError>
2892where
2893    T: AsRef<str>,
2894    F: Fn(&str) -> bool + Send + Sync + 'static,
2895{
2896    let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2897    let mut parser =
2898        SpanParser::new(formula.as_ref(), &spans, dialect).with_volatility_classifier(classifier);
2899    parser.parse()
2900}
2901
2902/// Efficient batch parser with an internal token cache and optional volatility classifier.
2903///
2904/// The cache is keyed by the original formula string; repeated formulas across a batch
2905/// (very common in spreadsheets) will avoid re-tokenization and whitespace filtering.
2906pub struct BatchParser {
2907    include_whitespace: bool,
2908    volatility_classifier: Option<VolatilityClassifierArc>,
2909    token_cache: std::collections::HashMap<String, Arc<[crate::tokenizer::TokenSpan]>>, // cached tokens
2910    dialect: FormulaDialect,
2911}
2912
2913impl BatchParser {
2914    pub fn builder() -> BatchParserBuilder {
2915        BatchParserBuilder::default()
2916    }
2917
2918    /// Parse a formula using the internal cache and configured classifier.
2919    pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
2920        let spans = if let Some(tokens) = self.token_cache.get(formula) {
2921            Arc::clone(tokens)
2922        } else {
2923            let mut spans = crate::tokenizer::tokenize_spans_with_dialect(formula, self.dialect)?;
2924            if !self.include_whitespace {
2925                spans.retain(|t| t.token_type != TokenType::Whitespace);
2926            }
2927
2928            let spans: Arc<[crate::tokenizer::TokenSpan]> = Arc::from(spans.into_boxed_slice());
2929            self.token_cache
2930                .insert(formula.to_string(), Arc::clone(&spans));
2931            spans
2932        };
2933
2934        let mut parser = SpanParser::new(formula, spans.as_ref(), self.dialect);
2935        if let Some(classifier) = self.volatility_classifier.clone() {
2936            parser = parser.with_volatility_classifier(move |name| classifier(name));
2937        }
2938        parser.parse()
2939    }
2940}
2941
2942#[derive(Default)]
2943pub struct BatchParserBuilder {
2944    include_whitespace: bool,
2945    volatility_classifier: Option<VolatilityClassifierArc>,
2946    dialect: FormulaDialect,
2947}
2948
2949impl BatchParserBuilder {
2950    pub fn include_whitespace(mut self, include: bool) -> Self {
2951        self.include_whitespace = include;
2952        self
2953    }
2954
2955    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
2956    where
2957        F: Fn(&str) -> bool + Send + Sync + 'static,
2958    {
2959        self.volatility_classifier = Some(Arc::new(f));
2960        self
2961    }
2962
2963    pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
2964        self.dialect = dialect;
2965        self
2966    }
2967
2968    pub fn build(self) -> BatchParser {
2969        BatchParser {
2970            include_whitespace: self.include_whitespace,
2971            volatility_classifier: self.volatility_classifier,
2972            token_cache: std::collections::HashMap::new(),
2973            dialect: self.dialect,
2974        }
2975    }
2976}