Skip to main content

formualizer_parse/
parser.rs

1use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
2use crate::types::{FormulaDialect, ParsingError};
3use crate::{ExcelError, LiteralValue};
4
5#[cfg(feature = "serde")]
6use serde::{Deserialize, Serialize};
7
8use crate::hasher::FormulaHasher;
9use formualizer_common::coord::{
10    col_index_from_letters_1based, col_letters_from_1based, parse_a1_1based,
11};
12use formualizer_common::{
13    AxisBound, RelativeCoord, SheetCellRef, SheetLocator, SheetRangeRef, SheetRef,
14};
15use once_cell::sync::Lazy;
16use smallvec::SmallVec;
17use std::error::Error;
18use std::fmt::{self, Display};
19use std::hash::{Hash, Hasher};
20use std::str::FromStr;
21use std::sync::Arc;
22
23type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
24type VolatilityClassifierBox = Box<VolatilityFn>;
25type VolatilityClassifierArc = Arc<VolatilityFn>;
26
27/// A custom error type for the parser.
28#[derive(Debug)]
29pub struct ParserError {
30    pub message: String,
31    pub position: Option<usize>,
32}
33
34impl Display for ParserError {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        if let Some(pos) = self.position {
37            write!(f, "ParserError at position {}: {}", pos, self.message)
38        } else {
39            write!(f, "ParserError: {}", self.message)
40        }
41    }
42}
43
44impl Error for ParserError {}
45
46// Column lookup table for common columns (A-ZZ = 702 columns)
47static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
48    let mut cols = Vec::with_capacity(702);
49    // Single letters A-Z
50    for c in b'A'..=b'Z' {
51        cols.push(String::from(c as char));
52    }
53    // Double letters AA-ZZ
54    for c1 in b'A'..=b'Z' {
55        for c2 in b'A'..=b'Z' {
56            cols.push(format!("{}{}", c1 as char, c2 as char));
57        }
58    }
59    cols
60});
61
62/// A structured table reference specifier for accessing specific parts of a table
63#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
64#[derive(Debug, Clone, PartialEq, Hash)]
65pub enum TableSpecifier {
66    /// The entire table
67    All,
68    /// The data area of the table (no headers or totals)
69    Data,
70    /// The headers row
71    Headers,
72    /// The totals row
73    Totals,
74    /// A specific row
75    Row(TableRowSpecifier),
76    /// A specific column
77    Column(String),
78    /// A range of columns
79    ColumnRange(String, String),
80    /// Special items like #Headers, #Data, #Totals, etc.
81    SpecialItem(SpecialItem),
82    /// A combination of specifiers, for complex references
83    Combination(Vec<Box<TableSpecifier>>),
84}
85
86/// Specifies which row(s) to use in a table reference
87#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
88#[derive(Debug, Clone, PartialEq, Hash)]
89pub enum TableRowSpecifier {
90    /// The current row (context dependent)
91    Current,
92    /// All rows
93    All,
94    /// Data rows only
95    Data,
96    /// Headers row
97    Headers,
98    /// Totals row
99    Totals,
100    /// Specific row by index (1-based)
101    Index(u32),
102}
103
104/// Special items in structured references
105#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
106#[derive(Debug, Clone, PartialEq, Hash)]
107pub enum SpecialItem {
108    /// The #Headers item
109    Headers,
110    /// The #Data item
111    Data,
112    /// The #Totals item
113    Totals,
114    /// The #All item (the whole table)
115    All,
116    /// The @ item (current row)
117    ThisRow,
118}
119
120/// A reference to a table including specifiers
121#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
122#[derive(Debug, Clone, PartialEq, Hash)]
123pub struct TableReference {
124    /// The name of the table
125    pub name: String,
126    /// Optional specifier for which part of the table to use
127    pub specifier: Option<TableSpecifier>,
128}
129
130#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
131#[derive(Debug, Clone, PartialEq, Hash)]
132pub enum ExternalBookRef {
133    Token(String),
134}
135
136impl ExternalBookRef {
137    pub fn token(&self) -> &str {
138        match self {
139            ExternalBookRef::Token(s) => s,
140        }
141    }
142}
143
144#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
146pub enum ExternalRefKind {
147    Cell {
148        row: u32,
149        col: u32,
150        row_abs: bool,
151        col_abs: bool,
152    },
153    Range {
154        start_row: Option<u32>,
155        start_col: Option<u32>,
156        end_row: Option<u32>,
157        end_col: Option<u32>,
158        start_row_abs: bool,
159        start_col_abs: bool,
160        end_row_abs: bool,
161        end_col_abs: bool,
162    },
163}
164
165impl ExternalRefKind {
166    pub fn cell(row: u32, col: u32) -> Self {
167        Self::Cell {
168            row,
169            col,
170            row_abs: false,
171            col_abs: false,
172        }
173    }
174
175    pub fn cell_with_abs(row: u32, col: u32, row_abs: bool, col_abs: bool) -> Self {
176        Self::Cell {
177            row,
178            col,
179            row_abs,
180            col_abs,
181        }
182    }
183
184    pub fn range(
185        start_row: Option<u32>,
186        start_col: Option<u32>,
187        end_row: Option<u32>,
188        end_col: Option<u32>,
189    ) -> Self {
190        Self::Range {
191            start_row,
192            start_col,
193            end_row,
194            end_col,
195            start_row_abs: false,
196            start_col_abs: false,
197            end_row_abs: false,
198            end_col_abs: false,
199        }
200    }
201
202    // Constructor-style helper mirroring the enum fields.
203    // Keeping the signature explicit makes callers easier to read.
204    #[allow(clippy::too_many_arguments)]
205    pub fn range_with_abs(
206        start_row: Option<u32>,
207        start_col: Option<u32>,
208        end_row: Option<u32>,
209        end_col: Option<u32>,
210        start_row_abs: bool,
211        start_col_abs: bool,
212        end_row_abs: bool,
213        end_col_abs: bool,
214    ) -> Self {
215        Self::Range {
216            start_row,
217            start_col,
218            end_row,
219            end_col,
220            start_row_abs,
221            start_col_abs,
222            end_row_abs,
223            end_col_abs,
224        }
225    }
226}
227
228#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
229#[derive(Debug, Clone, PartialEq, Hash)]
230pub struct ExternalReference {
231    pub raw: String,
232    pub book: ExternalBookRef,
233    pub sheet: String,
234    pub kind: ExternalRefKind,
235}
236
237/// A reference to something outside the cell.
238#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
239#[derive(Debug, Clone, PartialEq, Hash)]
240pub enum ReferenceType {
241    Cell {
242        sheet: Option<String>,
243        row: u32,
244        col: u32,
245        row_abs: bool,
246        col_abs: bool,
247    },
248    Range {
249        sheet: Option<String>,
250        start_row: Option<u32>,
251        start_col: Option<u32>,
252        end_row: Option<u32>,
253        end_col: Option<u32>,
254        start_row_abs: bool,
255        start_col_abs: bool,
256        end_row_abs: bool,
257        end_col_abs: bool,
258    },
259    External(ExternalReference),
260    Table(TableReference),
261    NamedRange(String),
262}
263
264impl Display for TableSpecifier {
265    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266        match self {
267            TableSpecifier::All => write!(f, "#All"),
268            TableSpecifier::Data => write!(f, "#Data"),
269            TableSpecifier::Headers => write!(f, "#Headers"),
270            TableSpecifier::Totals => write!(f, "#Totals"),
271            TableSpecifier::Row(row) => write!(f, "{row}"),
272            TableSpecifier::Column(column) => write!(f, "{column}"),
273            TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
274            TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
275            TableSpecifier::Combination(specs) => {
276                // Emit nested bracketed parts so the surrounding Table formatter prints
277                // canonical structured refs like Table[[#Headers],[Column1]:[Column2]]
278                let parts: Vec<String> = specs.iter().map(|s| format!("[{s}]")).collect();
279                write!(f, "{}", parts.join(","))
280            }
281        }
282    }
283}
284
285impl Display for TableRowSpecifier {
286    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
287        match self {
288            TableRowSpecifier::Current => write!(f, "@"),
289            TableRowSpecifier::All => write!(f, "#All"),
290            TableRowSpecifier::Data => write!(f, "#Data"),
291            TableRowSpecifier::Headers => write!(f, "#Headers"),
292            TableRowSpecifier::Totals => write!(f, "#Totals"),
293            TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
294        }
295    }
296}
297
298impl Display for SpecialItem {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        match self {
301            SpecialItem::Headers => write!(f, "#Headers"),
302            SpecialItem::Data => write!(f, "#Data"),
303            SpecialItem::Totals => write!(f, "#Totals"),
304            SpecialItem::All => write!(f, "#All"),
305            SpecialItem::ThisRow => write!(f, "@"),
306        }
307    }
308}
309
310/// Check if a sheet name needs to be quoted in Excel formulas
311fn sheet_name_needs_quoting(name: &str) -> bool {
312    if name.is_empty() {
313        return false;
314    }
315
316    let bytes = name.as_bytes();
317
318    // Check if starts with a digit
319    if bytes[0].is_ascii_digit() {
320        return true;
321    }
322
323    // Check for any special characters that require quoting
324    // This includes: space, !, ", #, $, %, &, ', (, ), *, +, comma, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~
325    for &byte in bytes {
326        match byte {
327            b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
328            | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
329            | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
330            _ => {}
331        }
332    }
333
334    // Check for Excel reserved words (case-insensitive)
335    let upper = name.to_uppercase();
336    matches!(
337        upper.as_str(),
338        "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
339    )
340}
341
342#[derive(Debug, Clone)]
343struct OpenFormulaRefPart {
344    sheet: Option<String>,
345    coord: String,
346}
347
348type AxisPartWithAbs = Option<(u32, bool)>;
349type RangePartWithAbs = (AxisPartWithAbs, AxisPartWithAbs);
350
351impl ReferenceType {
352    /// Build a cell reference with relative anchors.
353    pub fn cell(sheet: Option<String>, row: u32, col: u32) -> Self {
354        Self::Cell {
355            sheet,
356            row,
357            col,
358            row_abs: false,
359            col_abs: false,
360        }
361    }
362
363    /// Build a cell reference with explicit anchors.
364    pub fn cell_with_abs(
365        sheet: Option<String>,
366        row: u32,
367        col: u32,
368        row_abs: bool,
369        col_abs: bool,
370    ) -> Self {
371        Self::Cell {
372            sheet,
373            row,
374            col,
375            row_abs,
376            col_abs,
377        }
378    }
379
380    /// Build a range reference with relative anchors.
381    pub fn range(
382        sheet: Option<String>,
383        start_row: Option<u32>,
384        start_col: Option<u32>,
385        end_row: Option<u32>,
386        end_col: Option<u32>,
387    ) -> Self {
388        Self::Range {
389            sheet,
390            start_row,
391            start_col,
392            end_row,
393            end_col,
394            start_row_abs: false,
395            start_col_abs: false,
396            end_row_abs: false,
397            end_col_abs: false,
398        }
399    }
400
401    /// Build a range reference with explicit anchors.
402    // Constructor-style helper mirroring the enum fields.
403    // Keeping the signature explicit makes callers easier to read.
404    #[allow(clippy::too_many_arguments)]
405    pub fn range_with_abs(
406        sheet: Option<String>,
407        start_row: Option<u32>,
408        start_col: Option<u32>,
409        end_row: Option<u32>,
410        end_col: Option<u32>,
411        start_row_abs: bool,
412        start_col_abs: bool,
413        end_row_abs: bool,
414        end_col_abs: bool,
415    ) -> Self {
416        Self::Range {
417            sheet,
418            start_row,
419            start_col,
420            end_row,
421            end_col,
422            start_row_abs,
423            start_col_abs,
424            end_row_abs,
425            end_col_abs,
426        }
427    }
428
429    /// Create a reference from a string. Can be A1, A:A, A1:B2, Table1[Column], etc.
430    pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
431        Self::parse_excel_reference(reference)
432    }
433
434    /// Create a reference from a string using the specified formula dialect.
435    pub fn from_string_with_dialect(
436        reference: &str,
437        dialect: FormulaDialect,
438    ) -> Result<Self, ParsingError> {
439        match dialect {
440            FormulaDialect::Excel => Self::parse_excel_reference(reference),
441            FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
442                .or_else(|_| Self::parse_excel_reference(reference)),
443        }
444    }
445
446    /// Parse a grid reference into a shared SheetRef, preserving $ anchors.
447    ///
448    /// Only cell and range references are supported. Table and named ranges return an error.
449    pub fn parse_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
450        Self::parse_sheet_ref_with_dialect(reference, FormulaDialect::Excel)
451    }
452
453    /// Parse a grid reference into a shared SheetRef using the specified dialect.
454    pub fn parse_sheet_ref_with_dialect(
455        reference: &str,
456        dialect: FormulaDialect,
457    ) -> Result<SheetRef<'static>, ParsingError> {
458        match dialect {
459            FormulaDialect::Excel => Self::parse_excel_sheet_ref(reference),
460            FormulaDialect::OpenFormula => Self::parse_openformula_sheet_ref(reference)
461                .or_else(|_| Self::parse_excel_sheet_ref(reference)),
462        }
463    }
464
465    /// Lossy conversion from parsed ReferenceType into SheetRef.
466    /// External, table, and named ranges are discarded; anchors are preserved.
467    pub fn to_sheet_ref_lossy(&self) -> Option<SheetRef<'_>> {
468        match self {
469            ReferenceType::Cell {
470                sheet,
471                row,
472                col,
473                row_abs,
474                col_abs,
475            } => {
476                let row0 = row.checked_sub(1)?;
477                let col0 = col.checked_sub(1)?;
478                let sheet_loc = match sheet.as_deref() {
479                    Some(name) => SheetLocator::from_name(name),
480                    None => SheetLocator::Current,
481                };
482                let coord = RelativeCoord::new(row0, col0, *row_abs, *col_abs);
483                Some(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
484            }
485            ReferenceType::Range {
486                sheet,
487                start_row,
488                start_col,
489                end_row,
490                end_col,
491                start_row_abs,
492                start_col_abs,
493                end_row_abs,
494                end_col_abs,
495            } => {
496                let sheet_loc = match sheet.as_deref() {
497                    Some(name) => SheetLocator::from_name(name),
498                    None => SheetLocator::Current,
499                };
500                let sr = start_row
501                    .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_row_abs)));
502                if start_row.is_some() && sr.is_none() {
503                    return None;
504                }
505                let sc = start_col
506                    .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_col_abs)));
507                if start_col.is_some() && sc.is_none() {
508                    return None;
509                }
510                let er =
511                    end_row.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_row_abs)));
512                if end_row.is_some() && er.is_none() {
513                    return None;
514                }
515                let ec =
516                    end_col.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_col_abs)));
517                if end_col.is_some() && ec.is_none() {
518                    return None;
519                }
520                let range = SheetRangeRef::from_parts(sheet_loc, sr, sc, er, ec).ok()?;
521                Some(SheetRef::Range(range))
522            }
523            _ => None,
524        }
525    }
526
527    fn parse_excel_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
528        let (sheet, ref_part) = Self::extract_sheet_name(reference);
529
530        if ref_part.contains('[') {
531            return Err(ParsingError::InvalidReference(
532                "Table references are not supported for SheetRef".to_string(),
533            ));
534        }
535
536        let sheet_loc: SheetLocator<'static> = match sheet {
537            Some(name) => SheetLocator::from_name(name),
538            None => SheetLocator::Current,
539        };
540
541        if ref_part.contains(':') {
542            let mut parts = ref_part.splitn(2, ':');
543            let start = parts.next().unwrap();
544            let end = parts.next().ok_or_else(|| {
545                ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
546            })?;
547
548            let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
549            let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
550
551            let start_col = Self::axis_bound_from_1based(start_col)?;
552            let start_row = Self::axis_bound_from_1based(start_row)?;
553            let end_col = Self::axis_bound_from_1based(end_col)?;
554            let end_row = Self::axis_bound_from_1based(end_row)?;
555
556            let range =
557                SheetRangeRef::from_parts(sheet_loc, start_row, start_col, end_row, end_col)
558                    .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
559            Ok(SheetRef::Range(range))
560        } else {
561            let (row, col, row_abs, col_abs) = parse_a1_1based(&ref_part)
562                .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
563            let coord = RelativeCoord::new(row - 1, col - 1, row_abs, col_abs);
564            Ok(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
565        }
566    }
567
568    fn parse_openformula_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
569        Self::parse_excel_sheet_ref(reference)
570    }
571
572    fn axis_bound_from_1based(
573        bound: Option<(u32, bool)>,
574    ) -> Result<Option<AxisBound>, ParsingError> {
575        match bound {
576            Some((index, abs)) => AxisBound::from_excel_1based(index, abs)
577                .map(Some)
578                .map_err(|err| ParsingError::InvalidReference(err.to_string())),
579            None => Ok(None),
580        }
581    }
582
583    fn parse_range_part_with_abs(part: &str) -> Result<RangePartWithAbs, ParsingError> {
584        if let Ok((row, col, row_abs, col_abs)) = parse_a1_1based(part) {
585            return Ok((Some((col, col_abs)), Some((row, row_abs))));
586        }
587
588        let bytes = part.as_bytes();
589        let len = bytes.len();
590        let mut i = 0usize;
591
592        let mut col_abs = false;
593        let mut row_abs = false;
594
595        if i < len && bytes[i] == b'$' {
596            col_abs = true;
597            i += 1;
598        }
599
600        let col_start = i;
601        while i < len && bytes[i].is_ascii_alphabetic() {
602            i += 1;
603        }
604
605        if i > col_start {
606            let col_str = &part[col_start..i];
607            let col1 = Self::column_to_number(col_str)?;
608
609            if i == len {
610                return Ok((Some((col1, col_abs)), None));
611            }
612
613            if i < len && bytes[i] == b'$' {
614                row_abs = true;
615                i += 1;
616            }
617
618            if i >= len {
619                return Err(ParsingError::InvalidReference(format!(
620                    "Invalid range part: {part}"
621                )));
622            }
623
624            let row_start = i;
625            while i < len && bytes[i].is_ascii_digit() {
626                i += 1;
627            }
628
629            if row_start == i || i != len {
630                return Err(ParsingError::InvalidReference(format!(
631                    "Invalid range part: {part}"
632                )));
633            }
634
635            let row_str = &part[row_start..i];
636            let row1 = row_str
637                .parse::<u32>()
638                .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
639            if row1 == 0 {
640                return Err(ParsingError::InvalidReference(format!(
641                    "Invalid range part: {part}"
642                )));
643            }
644
645            return Ok((Some((col1, col_abs)), Some((row1, row_abs))));
646        }
647
648        i = 0;
649        if i < len && bytes[i] == b'$' {
650            row_abs = true;
651            i += 1;
652        }
653
654        let row_start = i;
655        while i < len && bytes[i].is_ascii_digit() {
656            i += 1;
657        }
658
659        if row_start == i || i != len {
660            return Err(ParsingError::InvalidReference(format!(
661                "Invalid range part: {part}"
662            )));
663        }
664
665        let row_str = &part[row_start..i];
666        let row1 = row_str
667            .parse::<u32>()
668            .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
669        if row1 == 0 {
670            return Err(ParsingError::InvalidReference(format!(
671                "Invalid range part: {part}"
672            )));
673        }
674
675        Ok((None, Some((row1, row_abs))))
676    }
677
678    fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
679        // Excel structured reference shorthands that appear as a single bracketed token.
680        //
681        // We use these forms to avoid ambiguity with cell refs / named ranges:
682        // - `[TableName]` resolves to the table's data body (equivalent to `TableName[#Data]`).
683        // - `[@Column]` / `[@[Column Name]]` is a "This Row" selector; it requires table-aware
684        //   context during resolution and will be rewritten by the evaluator/graph builder.
685        if reference.starts_with('[') && reference.ends_with(']') && !reference.contains('!') {
686            return Self::parse_bracketed_structured_reference(reference);
687        }
688
689        // Extract sheet name if present
690        let (sheet, ref_part) = Self::extract_sheet_name(reference);
691
692        // Table references live in the ref_part (e.g., "Table1[Column]").
693        // Sheet names can contain '[' for external workbook refs (e.g., "[1]Sheet1!A1").
694        if ref_part.contains('[') {
695            return Self::parse_table_reference(&ref_part);
696        }
697
698        let external_sheet = sheet.as_deref().and_then(|s| {
699            // Excel external workbook refs embed a "[...]" token inside the sheet segment.
700            // Use the last '[' to allow paths/URIs that may contain earlier brackets, then
701            // take the first ']' after it to avoid being confused by ']' in the sheet name.
702            let lb = s.rfind('[')?;
703            let rb_rel = s[lb..].find(']')?;
704            let rb = lb + rb_rel;
705            if lb >= rb {
706                return None;
707            }
708
709            let token = &s[..=rb];
710            let sheet_name = &s[rb + 1..];
711            if sheet_name.is_empty() {
712                None
713            } else {
714                Some((token, sheet_name))
715            }
716        });
717
718        if ref_part.contains(':') {
719            // Range reference
720            let mut parts = ref_part.splitn(2, ':');
721            let start = parts.next().unwrap();
722            let end = parts.next().ok_or_else(|| {
723                ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
724            })?;
725            let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
726            let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
727
728            let split = |bound: Option<(u32, bool)>| match bound {
729                Some((index, abs)) => (Some(index), abs),
730                None => (None, false),
731            };
732            let (start_col, start_col_abs) = split(start_col);
733            let (start_row, start_row_abs) = split(start_row);
734            let (end_col, end_col_abs) = split(end_col);
735            let (end_row, end_row_abs) = split(end_row);
736
737            if let Some((book_token, sheet_name)) = external_sheet {
738                Ok(ReferenceType::External(ExternalReference {
739                    raw: reference.to_string(),
740                    book: ExternalBookRef::Token(book_token.to_string()),
741                    sheet: sheet_name.to_string(),
742                    kind: ExternalRefKind::Range {
743                        start_row,
744                        start_col,
745                        end_row,
746                        end_col,
747                        start_row_abs,
748                        start_col_abs,
749                        end_row_abs,
750                        end_col_abs,
751                    },
752                }))
753            } else {
754                Ok(ReferenceType::Range {
755                    sheet,
756                    start_row,
757                    start_col,
758                    end_row,
759                    end_col,
760                    start_row_abs,
761                    start_col_abs,
762                    end_row_abs,
763                    end_col_abs,
764                })
765            }
766        } else {
767            // Try to parse as a single cell reference
768            match Self::parse_cell_reference(&ref_part) {
769                Ok((col, row, col_abs, row_abs)) => {
770                    if let Some((book_token, sheet_name)) = external_sheet {
771                        Ok(ReferenceType::External(ExternalReference {
772                            raw: reference.to_string(),
773                            book: ExternalBookRef::Token(book_token.to_string()),
774                            sheet: sheet_name.to_string(),
775                            kind: ExternalRefKind::Cell {
776                                row,
777                                col,
778                                row_abs,
779                                col_abs,
780                            },
781                        }))
782                    } else {
783                        Ok(ReferenceType::Cell {
784                            sheet,
785                            row,
786                            col,
787                            row_abs,
788                            col_abs,
789                        })
790                    }
791                }
792                Err(_) => {
793                    // Treat it as a named range
794                    Ok(ReferenceType::NamedRange(reference.to_string()))
795                }
796            }
797        }
798    }
799
800    /// Parse a cell reference like "A1" into (column, row) using byte-based parsing.
801    fn parse_cell_reference(reference: &str) -> Result<(u32, u32, bool, bool), ParsingError> {
802        parse_a1_1based(reference)
803            .map(|(row, col, row_abs, col_abs)| (col, row, col_abs, row_abs))
804            .map_err(|_| {
805                ParsingError::InvalidReference(format!("Invalid cell reference: {reference}"))
806            })
807    }
808
809    /// Convert a column letter (e.g., "A", "BC") to a column number (1-based) using byte operations.
810    pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
811        col_index_from_letters_1based(column)
812            .map_err(|_| ParsingError::InvalidReference(format!("Invalid column: {column}")))
813    }
814
815    /// Convert a column number to a column letter using lookup table for common values.
816    pub(crate) fn number_to_column(num: u32) -> String {
817        if num == 0 {
818            return String::new();
819        }
820        // Use lookup table for common columns (1-702 covers A-ZZ)
821        if num > 0 && num <= 702 {
822            return COLUMN_LOOKUP[(num - 1) as usize].clone();
823        }
824
825        col_letters_from_1based(num).unwrap_or_default()
826    }
827
828    fn format_col(col: u32, abs: bool) -> String {
829        if abs {
830            format!("${}", Self::number_to_column(col))
831        } else {
832            Self::number_to_column(col)
833        }
834    }
835
836    fn format_row(row: u32, abs: bool) -> String {
837        if abs {
838            format!("${row}")
839        } else {
840            row.to_string()
841        }
842    }
843}
844
845impl Display for ReferenceType {
846    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
847        write!(
848            f,
849            "{}",
850            match self {
851                ReferenceType::Cell {
852                    sheet,
853                    row,
854                    col,
855                    row_abs,
856                    col_abs,
857                } => {
858                    let col_str = Self::format_col(*col, *col_abs);
859                    let row_str = Self::format_row(*row, *row_abs);
860
861                    if let Some(sheet_name) = sheet {
862                        if sheet_name_needs_quoting(sheet_name) {
863                            // Escape any single quotes in the sheet name by doubling them
864                            let escaped_name = sheet_name.replace('\'', "''");
865                            format!("'{escaped_name}'!{col_str}{row_str}")
866                        } else {
867                            format!("{sheet_name}!{col_str}{row_str}")
868                        }
869                    } else {
870                        format!("{col_str}{row_str}")
871                    }
872                }
873                ReferenceType::Range {
874                    sheet,
875                    start_row,
876                    start_col,
877                    end_row,
878                    end_col,
879                    start_row_abs,
880                    start_col_abs,
881                    end_row_abs,
882                    end_col_abs,
883                } => {
884                    // Format start reference
885                    let start_ref = match (start_col, start_row) {
886                        (Some(col), Some(row)) => format!(
887                            "{}{}",
888                            Self::format_col(*col, *start_col_abs),
889                            Self::format_row(*row, *start_row_abs)
890                        ),
891                        (Some(col), None) => Self::format_col(*col, *start_col_abs),
892                        (None, Some(row)) => Self::format_row(*row, *start_row_abs),
893                        (None, None) => "".to_string(), // Should not happen in normal usage
894                    };
895
896                    // Format end reference
897                    let end_ref = match (end_col, end_row) {
898                        (Some(col), Some(row)) => format!(
899                            "{}{}",
900                            Self::format_col(*col, *end_col_abs),
901                            Self::format_row(*row, *end_row_abs)
902                        ),
903                        (Some(col), None) => Self::format_col(*col, *end_col_abs),
904                        (None, Some(row)) => Self::format_row(*row, *end_row_abs),
905                        (None, None) => "".to_string(), // Should not happen in normal usage
906                    };
907
908                    let range_part = format!("{start_ref}:{end_ref}");
909
910                    if let Some(sheet_name) = sheet {
911                        if sheet_name_needs_quoting(sheet_name) {
912                            // Escape any single quotes in the sheet name by doubling them
913                            let escaped_name = sheet_name.replace('\'', "''");
914                            format!("'{escaped_name}'!{range_part}")
915                        } else {
916                            format!("{sheet_name}!{range_part}")
917                        }
918                    } else {
919                        range_part
920                    }
921                }
922                ReferenceType::External(ext) => ext.raw.clone(),
923                ReferenceType::Table(table_ref) => {
924                    if let Some(specifier) = &table_ref.specifier {
925                        // For table references, we need to handle column specifiers specially
926                        // to remove leading/trailing whitespace
927                        match specifier {
928                            TableSpecifier::Column(column) => {
929                                format!("{}[{}]", table_ref.name, column.trim())
930                            }
931                            TableSpecifier::ColumnRange(start, end) => {
932                                format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
933                            }
934                            _ => {
935                                // For other specifiers, use the standard formatting
936                                format!("{}[{}]", table_ref.name, specifier)
937                            }
938                        }
939                    } else {
940                        table_ref.name.clone()
941                    }
942                }
943                ReferenceType::NamedRange(name) => name.clone(),
944            }
945        )
946    }
947}
948
949impl TryFrom<&str> for ReferenceType {
950    type Error = ParsingError;
951
952    fn try_from(value: &str) -> Result<Self, Self::Error> {
953        ReferenceType::from_string(value)
954    }
955}
956
957impl FromStr for ReferenceType {
958    type Err = ParsingError;
959
960    fn from_str(s: &str) -> Result<Self, Self::Err> {
961        ReferenceType::from_string(s)
962    }
963}
964
965impl ReferenceType {
966    /// Normalise the reference string (convert to canonical form)
967    pub fn normalise(&self) -> String {
968        format!("{self}")
969    }
970
971    /// Extract a sheet name from a reference using byte operations.
972    fn extract_sheet_name(reference: &str) -> (Option<String>, String) {
973        let bytes = reference.as_bytes();
974        let mut i = 0;
975
976        // Handle quoted sheet names.
977        // Excel escapes a single quote inside a quoted sheet name by doubling it.
978        // Example: 'Bob''s Sheet'!A1
979        if i < bytes.len() && bytes[i] == b'\'' {
980            i += 1;
981            let start = i;
982
983            while i < bytes.len() {
984                if bytes[i] == b'\'' {
985                    // Escaped quote inside sheet name: ''
986                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
987                        i += 2;
988                        continue;
989                    }
990
991                    // Closing quote followed by '!'
992                    if i + 1 < bytes.len() && bytes[i + 1] == b'!' {
993                        let raw = &reference[start..i];
994                        let sheet = raw.replace("''", "'");
995                        let ref_part = String::from(&reference[i + 2..]);
996                        return (Some(sheet), ref_part);
997                    }
998                }
999
1000                i += 1;
1001            }
1002        }
1003
1004        // Handle unquoted sheet names
1005        i = 0;
1006        while i < bytes.len() {
1007            if bytes[i] == b'!' && i > 0 {
1008                let sheet = String::from(&reference[0..i]);
1009                let ref_part = String::from(&reference[i + 1..]);
1010                return (Some(sheet), ref_part);
1011            }
1012            i += 1;
1013        }
1014
1015        (None, reference.to_string())
1016    }
1017
1018    /// Parse a table reference like "Table1[Column1]" or more complex ones like "Table1[[#All],[Column1]:[Column2]]".
1019    fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
1020        // Find the first '[' to separate table name from specifier
1021        if let Some(bracket_pos) = reference.find('[') {
1022            let table_name = reference[..bracket_pos].trim();
1023            if table_name.is_empty() {
1024                return Err(ParsingError::InvalidReference(reference.to_string()));
1025            }
1026
1027            let specifier_str = &reference[bracket_pos..];
1028            let specifier = Self::parse_table_specifier(specifier_str)?;
1029
1030            Ok(ReferenceType::Table(TableReference {
1031                name: table_name.to_string(),
1032                specifier,
1033            }))
1034        } else {
1035            Err(ParsingError::InvalidReference(reference.to_string()))
1036        }
1037    }
1038
1039    fn parse_bracketed_structured_reference(reference: &str) -> Result<Self, ParsingError> {
1040        debug_assert!(reference.starts_with('[') && reference.ends_with(']'));
1041        let inner = reference[1..reference.len().saturating_sub(1)].trim();
1042        if inner.is_empty() {
1043            return Err(ParsingError::InvalidReference(reference.to_string()));
1044        }
1045
1046        // This-row column selector: [@Column] or [@[Column Name]]
1047        if let Some(rest) = inner.strip_prefix('@') {
1048            let mut col = rest.trim();
1049            if col.starts_with('[') && col.ends_with(']') && col.len() >= 2 {
1050                col = col[1..col.len() - 1].trim();
1051            }
1052            if col.is_empty() {
1053                return Err(ParsingError::InvalidReference(format!(
1054                    "This-row structured reference missing column: {reference}"
1055                )));
1056            }
1057
1058            let spec = TableSpecifier::Combination(vec![
1059                Box::new(TableSpecifier::SpecialItem(SpecialItem::ThisRow)),
1060                Box::new(TableSpecifier::Column(col.to_string())),
1061            ]);
1062            return Ok(ReferenceType::Table(TableReference {
1063                name: String::new(),
1064                specifier: Some(spec),
1065            }));
1066        }
1067
1068        // Table shorthand: [TableName] means data body.
1069        Ok(ReferenceType::Table(TableReference {
1070            name: inner.to_string(),
1071            specifier: Some(TableSpecifier::SpecialItem(SpecialItem::Data)),
1072        }))
1073    }
1074
1075    /// Parse a table specifier like "[Column1]" or "[[#All],[Column1]:[Column2]]"
1076    fn parse_table_specifier(specifier_str: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1077        if specifier_str.is_empty() || !specifier_str.starts_with('[') {
1078            return Ok(None);
1079        }
1080
1081        // Find balanced closing bracket
1082        let mut depth = 0;
1083        let mut end_pos = 0;
1084
1085        for (i, c) in specifier_str.char_indices() {
1086            if c == '[' {
1087                depth += 1;
1088            } else if c == ']' {
1089                depth -= 1;
1090                if depth == 0 {
1091                    end_pos = i;
1092                    break;
1093                }
1094            }
1095        }
1096
1097        if depth != 0 || end_pos == 0 {
1098            return Err(ParsingError::InvalidReference(format!(
1099                "Unbalanced brackets in table specifier: {specifier_str}"
1100            )));
1101        }
1102
1103        // Extract content between outermost brackets
1104        let content = &specifier_str[1..end_pos];
1105
1106        // Handle different types of specifiers
1107        if content.is_empty() {
1108            // Empty brackets means the whole table
1109            return Ok(Some(TableSpecifier::All));
1110        }
1111
1112        // Handle special items
1113        if content.starts_with("#") {
1114            return Self::parse_special_item(content);
1115        }
1116
1117        // Handle column references
1118        if !content.contains('[') && !content.contains('#') {
1119            // Check for column range using iterator instead of split().collect()
1120            if let Some(colon_pos) = content.find(':') {
1121                let start = content[..colon_pos].trim();
1122                let end = content[colon_pos + 1..].trim();
1123                return Ok(Some(TableSpecifier::ColumnRange(
1124                    start.to_string(),
1125                    end.to_string(),
1126                )));
1127            } else {
1128                // Single column
1129                return Ok(Some(TableSpecifier::Column(content.trim().to_string())));
1130            }
1131        }
1132
1133        // Handle complex structured references with nested brackets
1134        if content.contains('[') {
1135            return Self::parse_complex_table_specifier(content);
1136        }
1137
1138        // If we can't determine the type, just use the raw specifier
1139        Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1140    }
1141
1142    fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
1143        if reference.starts_with('[') && reference.ends_with(']') {
1144            let inner = &reference[1..reference.len() - 1];
1145            if inner.is_empty() {
1146                return Err(ParsingError::InvalidReference(
1147                    "Empty OpenFormula reference".to_string(),
1148                ));
1149            }
1150
1151            let mut parts = inner.splitn(2, ':');
1152            let start_part_str = parts.next().unwrap();
1153            let end_part_str = parts.next();
1154
1155            let start_part = Self::parse_openformula_part(start_part_str)?;
1156            let end_part = if let Some(part) = end_part_str {
1157                Some(Self::parse_openformula_part(part)?)
1158            } else {
1159                None
1160            };
1161
1162            let sheet = match (&start_part.sheet, &end_part) {
1163                (Some(sheet), Some(end)) => {
1164                    if let Some(end_sheet) = &end.sheet {
1165                        if end_sheet != sheet {
1166                            return Err(ParsingError::InvalidReference(format!(
1167                                "Mismatched sheets in reference: {sheet} vs {end_sheet}"
1168                            )));
1169                        }
1170                    }
1171                    Some(sheet.clone())
1172                }
1173                (Some(sheet), None) => Some(sheet.clone()),
1174                (None, Some(end)) => end.sheet.clone(),
1175                (None, None) => None,
1176            };
1177
1178            let mut excel_like = String::new();
1179            if let Some(sheet_name) = sheet {
1180                if sheet_name_needs_quoting(&sheet_name) {
1181                    let escaped = sheet_name.replace('\'', "''");
1182                    excel_like.push('\'');
1183                    excel_like.push_str(&escaped);
1184                    excel_like.push('\'');
1185                } else {
1186                    excel_like.push_str(&sheet_name);
1187                }
1188                excel_like.push('!');
1189            }
1190
1191            excel_like.push_str(&start_part.coord);
1192            if let Some(end) = end_part {
1193                excel_like.push(':');
1194                excel_like.push_str(&end.coord);
1195            }
1196
1197            return Self::parse_excel_reference(&excel_like);
1198        }
1199
1200        Err(ParsingError::InvalidReference(format!(
1201            "Unsupported OpenFormula reference: {reference}"
1202        )))
1203    }
1204
1205    fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
1206        let trimmed = part.trim();
1207        if trimmed.is_empty() {
1208            return Err(ParsingError::InvalidReference(
1209                "Empty component in OpenFormula reference".to_string(),
1210            ));
1211        }
1212
1213        if trimmed == "." {
1214            return Err(ParsingError::InvalidReference(
1215                "Incomplete OpenFormula reference component".to_string(),
1216            ));
1217        }
1218
1219        if trimmed.starts_with('[') {
1220            // Nested brackets are not expected here
1221            return Err(ParsingError::InvalidReference(format!(
1222                "Unexpected '[' in OpenFormula reference component: {trimmed}"
1223            )));
1224        }
1225
1226        let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
1227            (None, stripped.trim())
1228        } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
1229            let sheet_part = trimmed[..dot_idx].trim();
1230            let coord_part = trimmed[dot_idx + 1..].trim();
1231            if coord_part.is_empty() {
1232                return Err(ParsingError::InvalidReference(format!(
1233                    "Missing coordinate in OpenFormula reference component: {trimmed}"
1234                )));
1235            }
1236            let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
1237            (Some(sheet_name), coord_part)
1238        } else {
1239            (None, trimmed)
1240        };
1241
1242        let coord = coord_slice.trim_start_matches('.').trim().to_string();
1243
1244        if coord.is_empty() {
1245            return Err(ParsingError::InvalidReference(format!(
1246                "Missing coordinate in OpenFormula reference component: {trimmed}"
1247            )));
1248        }
1249
1250        Ok(OpenFormulaRefPart { sheet, coord })
1251    }
1252
1253    fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
1254        let without_abs = sheet.trim().trim_start_matches('$');
1255
1256        if without_abs.starts_with('\'') {
1257            if without_abs.len() < 2 || !without_abs.ends_with('\'') {
1258                return Err(ParsingError::InvalidReference(format!(
1259                    "Unterminated sheet name in OpenFormula reference: {sheet}"
1260                )));
1261            }
1262            let inner = &without_abs[1..without_abs.len() - 1];
1263            Ok(inner.replace("''", "'"))
1264        } else {
1265            Ok(without_abs.to_string())
1266        }
1267    }
1268
1269    fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
1270        let bytes = part.as_bytes();
1271        let mut i = 0;
1272        let mut in_quotes = false;
1273
1274        while i < bytes.len() {
1275            match bytes[i] {
1276                b'\'' => {
1277                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
1278                        i += 2;
1279                        continue;
1280                    }
1281                    in_quotes = !in_quotes;
1282                    i += 1;
1283                }
1284                b'.' if !in_quotes => return Some(i),
1285                _ => i += 1,
1286            }
1287        }
1288
1289        None
1290    }
1291
1292    /// Parse a special item specifier like "#Headers", "#Data", etc.
1293    fn parse_special_item(content: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1294        match content {
1295            "#All" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::All))),
1296            "#Headers" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Headers))),
1297            "#Data" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Data))),
1298            "#Totals" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Totals))),
1299            "@" => Ok(Some(TableSpecifier::Row(TableRowSpecifier::Current))),
1300            _ => Err(ParsingError::InvalidReference(format!(
1301                "Unknown special item: {content}"
1302            ))),
1303        }
1304    }
1305
1306    /// Parse complex table specifiers with nested brackets
1307    fn parse_complex_table_specifier(
1308        content: &str,
1309    ) -> Result<Option<TableSpecifier>, ParsingError> {
1310        // This is a more complex case like [[#Headers],[Column1]:[Column2]]
1311        // For now, we'll just store the raw specifier and enhance this in the future
1312
1313        // Try to identify common patterns
1314        if content.contains("[#Headers]")
1315            || content.contains("[#All]")
1316            || content.contains("[#Data]")
1317            || content.contains("[#Totals]")
1318            || content.contains("[@]")
1319        {
1320            // This is a combination of specifiers
1321            // Parse them into a vector
1322            let mut specifiers = Vec::new();
1323
1324            // Simple parsing - this would need enhancement for full support
1325            if content.contains("[#Headers]") {
1326                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Headers)));
1327            }
1328            if content.contains("[#Data]") {
1329                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Data)));
1330            }
1331            if content.contains("[#Totals]") {
1332                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Totals)));
1333            }
1334            if content.contains("[#All]") {
1335                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::All)));
1336            }
1337
1338            if !specifiers.is_empty() {
1339                return Ok(Some(TableSpecifier::Combination(specifiers)));
1340            }
1341        }
1342
1343        // Fallback to storing as a column specifier
1344        Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1345    }
1346
1347    /// Get the Excel-style string representation of this reference
1348    pub fn to_excel_string(&self) -> String {
1349        match self {
1350            ReferenceType::Cell {
1351                sheet,
1352                row,
1353                col,
1354                row_abs,
1355                col_abs,
1356            } => {
1357                let col_str = Self::format_col(*col, *col_abs);
1358                let row_str = Self::format_row(*row, *row_abs);
1359                if let Some(s) = sheet {
1360                    if sheet_name_needs_quoting(s) {
1361                        let escaped_name = s.replace('\'', "''");
1362                        format!("'{}'!{}{}", escaped_name, col_str, row_str)
1363                    } else {
1364                        format!("{}!{}{}", s, col_str, row_str)
1365                    }
1366                } else {
1367                    format!("{}{}", col_str, row_str)
1368                }
1369            }
1370            ReferenceType::Range {
1371                sheet,
1372                start_row,
1373                start_col,
1374                end_row,
1375                end_col,
1376                start_row_abs,
1377                start_col_abs,
1378                end_row_abs,
1379                end_col_abs,
1380            } => {
1381                // Format start reference
1382                let start_ref = match (start_col, start_row) {
1383                    (Some(col), Some(row)) => format!(
1384                        "{}{}",
1385                        Self::format_col(*col, *start_col_abs),
1386                        Self::format_row(*row, *start_row_abs)
1387                    ),
1388                    (Some(col), None) => Self::format_col(*col, *start_col_abs),
1389                    (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1390                    (None, None) => "".to_string(), // Should not happen in normal usage
1391                };
1392
1393                // Format end reference
1394                let end_ref = match (end_col, end_row) {
1395                    (Some(col), Some(row)) => format!(
1396                        "{}{}",
1397                        Self::format_col(*col, *end_col_abs),
1398                        Self::format_row(*row, *end_row_abs)
1399                    ),
1400                    (Some(col), None) => Self::format_col(*col, *end_col_abs),
1401                    (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1402                    (None, None) => "".to_string(), // Should not happen in normal usage
1403                };
1404
1405                let range_part = format!("{start_ref}:{end_ref}");
1406
1407                if let Some(s) = sheet {
1408                    if sheet_name_needs_quoting(s) {
1409                        let escaped_name = s.replace('\'', "''");
1410                        format!("'{escaped_name}'!{range_part}")
1411                    } else {
1412                        format!("{s}!{range_part}")
1413                    }
1414                } else {
1415                    range_part
1416                }
1417            }
1418            ReferenceType::External(ext) => ext.raw.clone(),
1419            ReferenceType::Table(table_ref) => {
1420                if let Some(specifier) = &table_ref.specifier {
1421                    format!("{}[{}]", table_ref.name, specifier)
1422                } else {
1423                    table_ref.name.clone()
1424                }
1425            }
1426            ReferenceType::NamedRange(name) => name.clone(),
1427        }
1428    }
1429}
1430
1431/// The different types of AST nodes.
1432#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1433#[derive(Debug, Clone, PartialEq, Hash)]
1434pub enum ASTNodeType {
1435    Literal(LiteralValue),
1436    Reference {
1437        original: String, // Original reference string (preserved for display/debugging)
1438        reference: ReferenceType, // Parsed reference
1439    },
1440    UnaryOp {
1441        op: String,
1442        expr: Box<ASTNode>,
1443    },
1444    BinaryOp {
1445        op: String,
1446        left: Box<ASTNode>,
1447        right: Box<ASTNode>,
1448    },
1449    Function {
1450        name: String,
1451        args: Vec<ASTNode>, // Most functions have <= 4 args
1452    },
1453    Array(Vec<Vec<ASTNode>>), // Most arrays are small
1454}
1455
1456impl Display for ASTNodeType {
1457    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1458        match self {
1459            ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
1460            ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
1461            ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
1462            ASTNodeType::BinaryOp { op, left, right } => {
1463                write!(f, "BinaryOp({op}, {left}, {right})")
1464            }
1465            ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
1466            ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
1467        }
1468    }
1469}
1470
1471/// An AST node represents a parsed formula element
1472#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1473#[derive(Debug, Clone, PartialEq)]
1474pub struct ASTNode {
1475    pub node_type: ASTNodeType,
1476    pub source_token: Option<Token>,
1477    /// True if this AST contains any volatile function calls.
1478    ///
1479    /// This is set by the parser when a volatility classifier is provided.
1480    /// For ASTs constructed manually (e.g., in tests), this defaults to false.
1481    pub contains_volatile: bool,
1482}
1483
1484impl ASTNode {
1485    pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1486        ASTNode {
1487            node_type,
1488            source_token,
1489            contains_volatile: false,
1490        }
1491    }
1492
1493    /// Create an ASTNode while explicitly setting contains_volatile.
1494    pub fn new_with_volatile(
1495        node_type: ASTNodeType,
1496        source_token: Option<Token>,
1497        contains_volatile: bool,
1498    ) -> Self {
1499        ASTNode {
1500            node_type,
1501            source_token,
1502            contains_volatile,
1503        }
1504    }
1505
1506    /// Whether this AST contains any volatile functions.
1507    pub fn contains_volatile(&self) -> bool {
1508        self.contains_volatile
1509    }
1510
1511    pub fn fingerprint(&self) -> u64 {
1512        self.calculate_hash()
1513    }
1514
1515    /// Calculate a hash for this ASTNode
1516    pub fn calculate_hash(&self) -> u64 {
1517        let mut hasher = FormulaHasher::new();
1518        self.hash_node(&mut hasher);
1519        hasher.finish()
1520    }
1521
1522    fn hash_node(&self, hasher: &mut FormulaHasher) {
1523        match &self.node_type {
1524            ASTNodeType::Literal(value) => {
1525                hasher.write(&[1]); // Discriminant for Literal
1526                value.hash(hasher);
1527            }
1528            ASTNodeType::Reference { reference, .. } => {
1529                hasher.write(&[2]); // Discriminant for Reference
1530                reference.hash(hasher);
1531            }
1532            ASTNodeType::UnaryOp { op, expr } => {
1533                hasher.write(&[3]); // Discriminant for UnaryOp
1534                hasher.write(op.as_bytes());
1535                expr.hash_node(hasher);
1536            }
1537            ASTNodeType::BinaryOp { op, left, right } => {
1538                hasher.write(&[4]); // Discriminant for BinaryOp
1539                hasher.write(op.as_bytes());
1540                left.hash_node(hasher);
1541                right.hash_node(hasher);
1542            }
1543            ASTNodeType::Function { name, args } => {
1544                hasher.write(&[5]); // Discriminant for Function
1545                // Use lowercase function name to be case-insensitive
1546                let name_lower = name.to_lowercase();
1547                hasher.write(name_lower.as_bytes());
1548                hasher.write_usize(args.len());
1549                for arg in args {
1550                    arg.hash_node(hasher);
1551                }
1552            }
1553            ASTNodeType::Array(rows) => {
1554                hasher.write(&[6]); // Discriminant for Array
1555                hasher.write_usize(rows.len());
1556                for row in rows {
1557                    hasher.write_usize(row.len());
1558                    for item in row {
1559                        item.hash_node(hasher);
1560                    }
1561                }
1562            }
1563        }
1564    }
1565
1566    pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1567        let mut dependencies = Vec::new();
1568        self.collect_dependencies(&mut dependencies);
1569        dependencies
1570    }
1571
1572    pub fn get_dependency_strings(&self) -> Vec<String> {
1573        self.get_dependencies()
1574            .into_iter()
1575            .map(|dep| format!("{dep}"))
1576            .collect()
1577    }
1578
1579    fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1580        match &self.node_type {
1581            ASTNodeType::Reference { reference, .. } => {
1582                dependencies.push(reference);
1583            }
1584            ASTNodeType::UnaryOp { expr, .. } => {
1585                expr.collect_dependencies(dependencies);
1586            }
1587            ASTNodeType::BinaryOp { left, right, .. } => {
1588                left.collect_dependencies(dependencies);
1589                right.collect_dependencies(dependencies);
1590            }
1591            ASTNodeType::Function { args, .. } => {
1592                for arg in args {
1593                    arg.collect_dependencies(dependencies);
1594                }
1595            }
1596            ASTNodeType::Array(rows) => {
1597                for row in rows {
1598                    for item in row {
1599                        item.collect_dependencies(dependencies);
1600                    }
1601                }
1602            }
1603            _ => {}
1604        }
1605    }
1606
1607    /// Lightweight borrowed view of a reference encountered during AST traversal.
1608    /// This mirrors ReferenceType variants but borrows sheet/name strings to avoid allocation.
1609    pub fn refs(&self) -> RefIter<'_> {
1610        RefIter {
1611            stack: smallvec::smallvec![self],
1612        }
1613    }
1614
1615    /// Visit all references in this AST without allocating intermediates.
1616    pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1617        let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1618        stack.push(self);
1619        while let Some(node) = stack.pop() {
1620            match &node.node_type {
1621                ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1622                ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1623                ASTNodeType::BinaryOp { left, right, .. } => {
1624                    // Push right first so left is visited first (stable-ish order)
1625                    stack.push(right);
1626                    stack.push(left);
1627                }
1628                ASTNodeType::Function { args, .. } => {
1629                    for a in args.iter().rev() {
1630                        stack.push(a);
1631                    }
1632                }
1633                ASTNodeType::Array(rows) => {
1634                    for r in rows.iter().rev() {
1635                        for item in r.iter().rev() {
1636                            stack.push(item);
1637                        }
1638                    }
1639                }
1640                ASTNodeType::Literal(_) => {}
1641            }
1642        }
1643    }
1644
1645    /// Convenience: collect references into a small, inline vector based on a policy.
1646    pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1647        let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1648        self.visit_refs(|rv| match rv {
1649            RefView::Cell {
1650                sheet,
1651                row,
1652                col,
1653                row_abs,
1654                col_abs,
1655            } => out.push(ReferenceType::Cell {
1656                sheet: sheet.map(|s| s.to_string()),
1657                row,
1658                col,
1659                row_abs,
1660                col_abs,
1661            }),
1662            RefView::Range {
1663                sheet,
1664                start_row,
1665                start_col,
1666                end_row,
1667                end_col,
1668                start_row_abs,
1669                start_col_abs,
1670                end_row_abs,
1671                end_col_abs,
1672            } => {
1673                // Optionally expand very small finite ranges into individual cells
1674                if policy.expand_small_ranges {
1675                    if let (Some(sr), Some(sc), Some(er), Some(ec)) =
1676                        (start_row, start_col, end_row, end_col)
1677                    {
1678                        let rows = er.saturating_sub(sr) + 1;
1679                        let cols = ec.saturating_sub(sc) + 1;
1680                        let area = rows.saturating_mul(cols);
1681                        if area as usize <= policy.range_expansion_limit {
1682                            let row_abs = start_row_abs && end_row_abs;
1683                            let col_abs = start_col_abs && end_col_abs;
1684                            for r in sr..=er {
1685                                for c in sc..=ec {
1686                                    out.push(ReferenceType::Cell {
1687                                        sheet: sheet.map(|s| s.to_string()),
1688                                        row: r,
1689                                        col: c,
1690                                        row_abs,
1691                                        col_abs,
1692                                    });
1693                                }
1694                            }
1695                            return; // handled
1696                        }
1697                    }
1698                }
1699                out.push(ReferenceType::Range {
1700                    sheet: sheet.map(|s| s.to_string()),
1701                    start_row,
1702                    start_col,
1703                    end_row,
1704                    end_col,
1705                    start_row_abs,
1706                    start_col_abs,
1707                    end_row_abs,
1708                    end_col_abs,
1709                });
1710            }
1711            RefView::External {
1712                raw,
1713                book,
1714                sheet,
1715                kind,
1716            } => out.push(ReferenceType::External(ExternalReference {
1717                raw: raw.to_string(),
1718                book: ExternalBookRef::Token(book.to_string()),
1719                sheet: sheet.to_string(),
1720                kind,
1721            })),
1722            RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
1723                name: name.to_string(),
1724                specifier: specifier.cloned(),
1725            })),
1726            RefView::NamedRange { name } => {
1727                if policy.include_names {
1728                    out.push(ReferenceType::NamedRange(name.to_string()));
1729                }
1730            }
1731        });
1732        out
1733    }
1734    /// Recursively updates sheet references within the AST.
1735    ///
1736    /// If `target_name` is provided, only references matching that sheet name are updated.
1737    /// This is used for "healing" specific broken references (Tombstone rescue).
1738    /// If `target_name` is None, it acts as a global rename (standard sheet rename).
1739    pub fn update_sheet_references(&mut self, target_name: Option<&str>, new_name: &str) {
1740        match &mut self.node_type {
1741            ASTNodeType::Reference {
1742                reference: ReferenceType::Cell { sheet, .. } | ReferenceType::Range { sheet, .. },
1743                ..
1744            } => {
1745                if let Some(current_sheet) = sheet
1746                    && (target_name.is_none() || target_name == Some(current_sheet.as_str()))
1747                {
1748                    *sheet = Some(new_name.to_string());
1749                }
1750            }
1751            ASTNodeType::UnaryOp { expr, .. } => {
1752                expr.update_sheet_references(target_name, new_name);
1753            }
1754            ASTNodeType::BinaryOp { left, right, .. } => {
1755                left.update_sheet_references(target_name, new_name);
1756                right.update_sheet_references(target_name, new_name);
1757            }
1758            ASTNodeType::Function { args, .. } => {
1759                for arg in args {
1760                    arg.update_sheet_references(target_name, new_name);
1761                }
1762            }
1763            ASTNodeType::Array(rows) => {
1764                for row in rows {
1765                    for cell in row {
1766                        cell.update_sheet_references(target_name, new_name);
1767                    }
1768                }
1769            }
1770            _ => {}
1771        }
1772    }
1773}
1774
1775/// A borrowing view over a ReferenceType. Avoids cloning sheet/names while walking.
1776#[derive(Clone, Copy, Debug)]
1777pub enum RefView<'a> {
1778    Cell {
1779        sheet: Option<&'a str>,
1780        row: u32,
1781        col: u32,
1782        row_abs: bool,
1783        col_abs: bool,
1784    },
1785    Range {
1786        sheet: Option<&'a str>,
1787        start_row: Option<u32>,
1788        start_col: Option<u32>,
1789        end_row: Option<u32>,
1790        end_col: Option<u32>,
1791        start_row_abs: bool,
1792        start_col_abs: bool,
1793        end_row_abs: bool,
1794        end_col_abs: bool,
1795    },
1796    External {
1797        raw: &'a str,
1798        book: &'a str,
1799        sheet: &'a str,
1800        kind: ExternalRefKind,
1801    },
1802    Table {
1803        name: &'a str,
1804        specifier: Option<&'a TableSpecifier>,
1805    },
1806    NamedRange {
1807        name: &'a str,
1808    },
1809}
1810
1811impl<'a> From<&'a ReferenceType> for RefView<'a> {
1812    fn from(r: &'a ReferenceType) -> Self {
1813        match r {
1814            ReferenceType::Cell {
1815                sheet,
1816                row,
1817                col,
1818                row_abs,
1819                col_abs,
1820            } => RefView::Cell {
1821                sheet: sheet.as_deref(),
1822                row: *row,
1823                col: *col,
1824                row_abs: *row_abs,
1825                col_abs: *col_abs,
1826            },
1827            ReferenceType::Range {
1828                sheet,
1829                start_row,
1830                start_col,
1831                end_row,
1832                end_col,
1833                start_row_abs,
1834                start_col_abs,
1835                end_row_abs,
1836                end_col_abs,
1837            } => RefView::Range {
1838                sheet: sheet.as_deref(),
1839                start_row: *start_row,
1840                start_col: *start_col,
1841                end_row: *end_row,
1842                end_col: *end_col,
1843                start_row_abs: *start_row_abs,
1844                start_col_abs: *start_col_abs,
1845                end_row_abs: *end_row_abs,
1846                end_col_abs: *end_col_abs,
1847            },
1848            ReferenceType::External(ext) => RefView::External {
1849                raw: ext.raw.as_str(),
1850                book: ext.book.token(),
1851                sheet: ext.sheet.as_str(),
1852                kind: ext.kind,
1853            },
1854            ReferenceType::Table(tr) => RefView::Table {
1855                name: tr.name.as_str(),
1856                specifier: tr.specifier.as_ref(),
1857            },
1858            ReferenceType::NamedRange(name) => RefView::NamedRange { name },
1859        }
1860    }
1861}
1862
1863/// Iterator over RefView for an AST, implemented via an explicit stack to avoid recursion allocation.
1864pub struct RefIter<'a> {
1865    stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
1866}
1867
1868impl<'a> Iterator for RefIter<'a> {
1869    type Item = RefView<'a>;
1870    fn next(&mut self) -> Option<Self::Item> {
1871        while let Some(node) = self.stack.pop() {
1872            match &node.node_type {
1873                ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
1874                ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
1875                ASTNodeType::BinaryOp { left, right, .. } => {
1876                    self.stack.push(right);
1877                    self.stack.push(left);
1878                }
1879                ASTNodeType::Function { args, .. } => {
1880                    for a in args.iter().rev() {
1881                        self.stack.push(a);
1882                    }
1883                }
1884                ASTNodeType::Array(rows) => {
1885                    for r in rows.iter().rev() {
1886                        for item in r.iter().rev() {
1887                            self.stack.push(item);
1888                        }
1889                    }
1890                }
1891                ASTNodeType::Literal(_) => {}
1892            }
1893        }
1894        None
1895    }
1896}
1897
1898/// Policy controlling how references are collected.
1899#[derive(Debug, Clone)]
1900pub struct CollectPolicy {
1901    pub expand_small_ranges: bool,
1902    pub range_expansion_limit: usize,
1903    pub include_names: bool,
1904}
1905
1906impl Default for CollectPolicy {
1907    fn default() -> Self {
1908        Self {
1909            expand_small_ranges: false,
1910            range_expansion_limit: 0,
1911            include_names: true,
1912        }
1913    }
1914}
1915
1916impl Display for ASTNode {
1917    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1918        write!(f, "{}", self.node_type)
1919    }
1920}
1921
1922impl std::hash::Hash for ASTNode {
1923    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1924        let hash = self.calculate_hash();
1925        state.write_u64(hash);
1926    }
1927}
1928
1929/// A parser for converting tokens into an AST.
1930pub struct Parser {
1931    tokens: Arc<[Token]>,
1932    position: usize,
1933    /// Optional classifier to determine whether a function name is volatile.
1934    volatility_classifier: Option<VolatilityClassifierBox>,
1935    dialect: FormulaDialect,
1936}
1937
1938impl TryFrom<&str> for Parser {
1939    type Error = TokenizerError;
1940
1941    fn try_from(formula: &str) -> Result<Self, Self::Error> {
1942        let tokens = Tokenizer::new(formula)?.items;
1943        Ok(Self::new(tokens, false))
1944    }
1945}
1946
1947impl TryFrom<String> for Parser {
1948    type Error = TokenizerError;
1949
1950    fn try_from(formula: String) -> Result<Self, Self::Error> {
1951        Self::try_from(formula.as_str())
1952    }
1953}
1954
1955impl Parser {
1956    pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
1957        Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
1958    }
1959
1960    pub fn new_with_dialect(
1961        mut tokens: Vec<Token>,
1962        include_whitespace: bool,
1963        dialect: FormulaDialect,
1964    ) -> Self {
1965        if !include_whitespace {
1966            tokens.retain(|t| t.token_type != TokenType::Whitespace);
1967        }
1968
1969        Parser {
1970            tokens: Arc::from(tokens.into_boxed_slice()),
1971            position: 0,
1972            volatility_classifier: None,
1973            dialect,
1974        }
1975    }
1976
1977    pub fn try_from_formula(formula: &str) -> Result<Self, TokenizerError> {
1978        let tokens = Tokenizer::new(formula)?.items;
1979        Ok(Self::new(tokens, false))
1980    }
1981
1982    /// Provide a function-volatility classifier for this parser.
1983    /// If set, the parser will annotate ASTs with a contains_volatile bit.
1984    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1985    where
1986        F: Fn(&str) -> bool + Send + Sync + 'static,
1987    {
1988        self.volatility_classifier = Some(Box::new(f));
1989        self
1990    }
1991
1992    /// Convenience constructor to set a classifier alongside other options.
1993    pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
1994    where
1995        F: Fn(&str) -> bool + Send + Sync + 'static,
1996    {
1997        Self::new(tokens, include_whitespace).with_volatility_classifier(f)
1998    }
1999
2000    pub fn new_with_classifier_and_dialect<F>(
2001        tokens: Vec<Token>,
2002        include_whitespace: bool,
2003        dialect: FormulaDialect,
2004        f: F,
2005    ) -> Self
2006    where
2007        F: Fn(&str) -> bool + Send + Sync + 'static,
2008    {
2009        Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
2010    }
2011
2012    fn skip_whitespace(&mut self) {
2013        while self.position < self.tokens.len()
2014            && self.tokens[self.position].token_type == TokenType::Whitespace
2015        {
2016            self.position += 1;
2017        }
2018    }
2019
2020    /// Parse the tokens into an AST.
2021    pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
2022        if self.tokens.is_empty() {
2023            return Err(ParserError {
2024                message: "No tokens to parse".to_string(),
2025                position: None,
2026            });
2027        }
2028
2029        self.skip_whitespace();
2030        if self.position >= self.tokens.len() {
2031            return Err(ParserError {
2032                message: "No tokens to parse".to_string(),
2033                position: None,
2034            });
2035        }
2036
2037        // Check for literal formula (doesn't start with '=')
2038        if self.tokens[self.position].token_type == TokenType::Literal {
2039            let token = self.tokens[self.position].clone();
2040            self.position += 1;
2041            self.skip_whitespace();
2042            if self.position < self.tokens.len() {
2043                return Err(ParserError {
2044                    message: format!(
2045                        "Unexpected token at position {}: {:?}",
2046                        self.position, self.tokens[self.position]
2047                    ),
2048                    position: Some(self.position),
2049                });
2050            }
2051            return Ok(ASTNode::new(
2052                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2053                Some(token),
2054            ));
2055        }
2056
2057        let ast = self.parse_expression()?;
2058        self.skip_whitespace();
2059        if self.position < self.tokens.len() {
2060            return Err(ParserError {
2061                message: format!(
2062                    "Unexpected token at position {}: {:?}",
2063                    self.position, self.tokens[self.position]
2064                ),
2065                position: Some(self.position),
2066            });
2067        }
2068        Ok(ast)
2069    }
2070
2071    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2072        self.parse_bp(0)
2073    }
2074
2075    // Pratt-style precedence parser. `min_precedence` is the minimum binding power
2076    // an operator must have to be consumed at this level.
2077    fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2078        let mut left = self.parse_prefix()?;
2079
2080        loop {
2081            self.skip_whitespace();
2082            if self.position >= self.tokens.len() {
2083                break;
2084            }
2085
2086            // Postfix operators (e.g. percent).
2087            if self.tokens[self.position].token_type == TokenType::OpPostfix {
2088                let (precedence, _) = self.tokens[self.position]
2089                    .get_precedence()
2090                    .unwrap_or((0, Associativity::Left));
2091                if precedence < min_precedence {
2092                    break;
2093                }
2094
2095                let op_token = self.tokens[self.position].clone();
2096                self.position += 1;
2097                let contains_volatile = left.contains_volatile;
2098                left = ASTNode::new_with_volatile(
2099                    ASTNodeType::UnaryOp {
2100                        op: op_token.value.clone(),
2101                        expr: Box::new(left),
2102                    },
2103                    Some(op_token),
2104                    contains_volatile,
2105                );
2106                continue;
2107            }
2108
2109            let token = &self.tokens[self.position];
2110            if token.token_type != TokenType::OpInfix {
2111                break;
2112            }
2113
2114            let (precedence, associativity) =
2115                token.get_precedence().unwrap_or((0, Associativity::Left));
2116            if precedence < min_precedence {
2117                break;
2118            }
2119
2120            let op_token = self.tokens[self.position].clone();
2121            self.position += 1;
2122
2123            let next_min_precedence = if associativity == Associativity::Left {
2124                precedence + 1
2125            } else {
2126                precedence
2127            };
2128
2129            let right = self.parse_bp(next_min_precedence)?;
2130            let contains_volatile = left.contains_volatile || right.contains_volatile;
2131            left = ASTNode::new_with_volatile(
2132                ASTNodeType::BinaryOp {
2133                    op: op_token.value.clone(),
2134                    left: Box::new(left),
2135                    right: Box::new(right),
2136                },
2137                Some(op_token),
2138                contains_volatile,
2139            );
2140        }
2141
2142        Ok(left)
2143    }
2144
2145    fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2146        self.skip_whitespace();
2147        if self.position < self.tokens.len()
2148            && self.tokens[self.position].token_type == TokenType::OpPrefix
2149        {
2150            let op_token = self.tokens[self.position].clone();
2151            self.position += 1;
2152
2153            // Prefix unary binds tighter than exponent (Excel semantics),
2154            // so parse the RHS with min_precedence equal to unary's precedence.
2155            let (precedence, _) = op_token
2156                .get_precedence()
2157                .unwrap_or((0, Associativity::Right));
2158
2159            let expr = self.parse_bp(precedence)?;
2160            let contains_volatile = expr.contains_volatile;
2161            return Ok(ASTNode::new_with_volatile(
2162                ASTNodeType::UnaryOp {
2163                    op: op_token.value.clone(),
2164                    expr: Box::new(expr),
2165                },
2166                Some(op_token),
2167                contains_volatile,
2168            ));
2169        }
2170
2171        self.parse_primary()
2172    }
2173
2174    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2175        self.skip_whitespace();
2176        if self.position >= self.tokens.len() {
2177            return Err(ParserError {
2178                message: "Unexpected end of tokens".to_string(),
2179                position: Some(self.position),
2180            });
2181        }
2182
2183        let token = &self.tokens[self.position];
2184        match token.token_type {
2185            TokenType::Operand => {
2186                let operand_token = self.tokens[self.position].clone();
2187                self.position += 1;
2188                self.parse_operand(operand_token)
2189            }
2190            TokenType::Func => {
2191                let func_token = self.tokens[self.position].clone();
2192                self.position += 1;
2193                self.parse_function(func_token)
2194            }
2195            TokenType::Paren if token.subtype == TokenSubType::Open => {
2196                self.position += 1;
2197                let expr = self.parse_expression()?;
2198                if self.position >= self.tokens.len()
2199                    || self.tokens[self.position].token_type != TokenType::Paren
2200                    || self.tokens[self.position].subtype != TokenSubType::Close
2201                {
2202                    return Err(ParserError {
2203                        message: "Expected closing parenthesis".to_string(),
2204                        position: Some(self.position),
2205                    });
2206                }
2207                self.position += 1;
2208                Ok(expr)
2209            }
2210            TokenType::Array if token.subtype == TokenSubType::Open => {
2211                self.position += 1;
2212                self.parse_array()
2213            }
2214            _ => Err(ParserError {
2215                message: format!("Unexpected token: {token:?}"),
2216                position: Some(self.position),
2217            }),
2218        }
2219    }
2220
2221    fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
2222        match token.subtype {
2223            TokenSubType::Number => {
2224                let value = token.value.parse::<f64>().map_err(|_| ParserError {
2225                    message: format!("Invalid number: {}", token.value),
2226                    position: Some(self.position),
2227                })?;
2228                Ok(ASTNode::new(
2229                    ASTNodeType::Literal(LiteralValue::Number(value)),
2230                    Some(token),
2231                ))
2232            }
2233            TokenSubType::Text => {
2234                // Strip surrounding quotes from text literals
2235                let mut text = token.value.clone();
2236                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2237                    text = text[1..text.len() - 1].to_string();
2238                    // Handle escaped quotes
2239                    text = text.replace("\"\"", "\"");
2240                }
2241                Ok(ASTNode::new(
2242                    ASTNodeType::Literal(LiteralValue::Text(text)),
2243                    Some(token),
2244                ))
2245            }
2246            TokenSubType::Logical => {
2247                let value = token.value.to_uppercase() == "TRUE";
2248                Ok(ASTNode::new(
2249                    ASTNodeType::Literal(LiteralValue::Boolean(value)),
2250                    Some(token),
2251                ))
2252            }
2253            TokenSubType::Error => {
2254                let error = ExcelError::from_error_string(&token.value);
2255                Ok(ASTNode::new(
2256                    ASTNodeType::Literal(LiteralValue::Error(error)),
2257                    Some(token),
2258                ))
2259            }
2260            TokenSubType::Range => {
2261                let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
2262                    .map_err(|e| ParserError {
2263                        message: format!("Invalid reference '{}': {}", token.value, e),
2264                        position: Some(self.position),
2265                    })?;
2266                Ok(ASTNode::new(
2267                    ASTNodeType::Reference {
2268                        original: token.value.clone(),
2269                        reference,
2270                    },
2271                    Some(token),
2272                ))
2273            }
2274            _ => Err(ParserError {
2275                message: format!("Unexpected operand subtype: {:?}", token.subtype),
2276                position: Some(self.position),
2277            }),
2278        }
2279    }
2280
2281    fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
2282        let name = func_token.value[..func_token.value.len() - 1].to_string();
2283        let args = self.parse_function_arguments()?;
2284        // Determine volatility for this function
2285        let this_is_volatile = self
2286            .volatility_classifier
2287            .as_ref()
2288            .map(|f| f(name.as_str()))
2289            .unwrap_or(false);
2290        let args_volatile = args.iter().any(|a| a.contains_volatile);
2291
2292        Ok(ASTNode::new_with_volatile(
2293            ASTNodeType::Function { name, args },
2294            Some(func_token),
2295            this_is_volatile || args_volatile,
2296        ))
2297    }
2298
2299    /// Parse function arguments.
2300    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2301        let mut args = Vec::new();
2302
2303        // Check for closing parenthesis (empty arguments)
2304        if self.position < self.tokens.len()
2305            && self.tokens[self.position].token_type == TokenType::Func
2306            && self.tokens[self.position].subtype == TokenSubType::Close
2307        {
2308            self.position += 1;
2309            return Ok(args);
2310        }
2311
2312        // Handle optional arguments (consecutive separators)
2313        // Check if we start with a separator (empty first argument)
2314        if self.position < self.tokens.len()
2315            && self.tokens[self.position].token_type == TokenType::Sep
2316            && self.tokens[self.position].subtype == TokenSubType::Arg
2317        {
2318            // Empty first argument - represented as empty text literal for compatibility
2319            args.push(ASTNode::new(
2320                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2321                None,
2322            ));
2323            self.position += 1;
2324        } else {
2325            // Parse first argument
2326            args.push(self.parse_expression()?);
2327        }
2328
2329        // Parse remaining arguments
2330        while self.position < self.tokens.len() {
2331            let token = &self.tokens[self.position];
2332
2333            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2334                self.position += 1;
2335                // Check for consecutive separators (empty argument)
2336                if self.position < self.tokens.len() {
2337                    let next_token = &self.tokens[self.position];
2338                    if next_token.token_type == TokenType::Sep
2339                        && next_token.subtype == TokenSubType::Arg
2340                    {
2341                        // Empty argument - represented as empty text literal for compatibility
2342                        args.push(ASTNode::new(
2343                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2344                            None,
2345                        ));
2346                    } else if next_token.token_type == TokenType::Func
2347                        && next_token.subtype == TokenSubType::Close
2348                    {
2349                        // Empty last argument
2350                        args.push(ASTNode::new(
2351                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2352                            None,
2353                        ));
2354                        self.position += 1;
2355                        break;
2356                    } else {
2357                        args.push(self.parse_expression()?);
2358                    }
2359                } else {
2360                    // Trailing separator at end of formula
2361                    args.push(ASTNode::new(
2362                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2363                        None,
2364                    ));
2365                }
2366            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2367                self.position += 1;
2368                break;
2369            } else {
2370                return Err(ParserError {
2371                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2372                    position: Some(self.position),
2373                });
2374            }
2375        }
2376
2377        Ok(args)
2378    }
2379
2380    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2381        let mut rows = Vec::new();
2382        let mut current_row = Vec::new();
2383
2384        // Check for empty array
2385        if self.position < self.tokens.len()
2386            && self.tokens[self.position].token_type == TokenType::Array
2387            && self.tokens[self.position].subtype == TokenSubType::Close
2388        {
2389            self.position += 1;
2390            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2391        }
2392
2393        // Parse first element
2394        current_row.push(self.parse_expression()?);
2395
2396        while self.position < self.tokens.len() {
2397            let token = &self.tokens[self.position];
2398
2399            if token.token_type == TokenType::Sep {
2400                if token.subtype == TokenSubType::Arg {
2401                    // Column separator
2402                    self.position += 1;
2403                    current_row.push(self.parse_expression()?);
2404                } else if token.subtype == TokenSubType::Row {
2405                    // Row separator
2406                    self.position += 1;
2407                    rows.push(current_row);
2408                    current_row = vec![self.parse_expression()?];
2409                }
2410            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2411                self.position += 1;
2412                rows.push(current_row);
2413                break;
2414            } else {
2415                return Err(ParserError {
2416                    message: format!("Unexpected token in array: {token:?}"),
2417                    position: Some(self.position),
2418                });
2419            }
2420        }
2421
2422        // Array volatility is the OR of element volatility
2423        let contains_volatile = rows
2424            .iter()
2425            .flat_map(|r| r.iter())
2426            .any(|n| n.contains_volatile);
2427        Ok(ASTNode::new_with_volatile(
2428            ASTNodeType::Array(rows),
2429            None,
2430            contains_volatile,
2431        ))
2432    }
2433}
2434
2435impl From<TokenizerError> for ParserError {
2436    fn from(err: TokenizerError) -> Self {
2437        ParserError {
2438            message: err.message,
2439            position: Some(err.pos),
2440        }
2441    }
2442}
2443
2444struct SpanParser<'a> {
2445    source: &'a str,
2446    tokens: &'a [crate::tokenizer::TokenSpan],
2447    position: usize,
2448    volatility_classifier: Option<VolatilityClassifierBox>,
2449    dialect: FormulaDialect,
2450}
2451
2452impl<'a> SpanParser<'a> {
2453    fn new(
2454        source: &'a str,
2455        tokens: &'a [crate::tokenizer::TokenSpan],
2456        dialect: FormulaDialect,
2457    ) -> Self {
2458        SpanParser {
2459            source,
2460            tokens,
2461            position: 0,
2462            volatility_classifier: None,
2463            dialect,
2464        }
2465    }
2466
2467    fn with_volatility_classifier<F>(mut self, f: F) -> Self
2468    where
2469        F: Fn(&str) -> bool + Send + Sync + 'static,
2470    {
2471        self.volatility_classifier = Some(Box::new(f));
2472        self
2473    }
2474
2475    fn skip_whitespace(&mut self) {
2476        while self.position < self.tokens.len()
2477            && self.tokens[self.position].token_type == TokenType::Whitespace
2478        {
2479            self.position += 1;
2480        }
2481    }
2482
2483    fn span_value(&self, span: &crate::tokenizer::TokenSpan) -> &str {
2484        &self.source[span.start..span.end]
2485    }
2486
2487    fn span_to_token(&self, span: &crate::tokenizer::TokenSpan) -> Token {
2488        Token::new_with_span(
2489            self.span_value(span).to_string(),
2490            span.token_type,
2491            span.subtype,
2492            span.start,
2493            span.end,
2494        )
2495    }
2496
2497    fn span_precedence(&self, span: &crate::tokenizer::TokenSpan) -> Option<(u8, Associativity)> {
2498        if !matches!(
2499            span.token_type,
2500            TokenType::OpPrefix | TokenType::OpInfix | TokenType::OpPostfix
2501        ) {
2502            return None;
2503        }
2504
2505        let op = if span.token_type == TokenType::OpPrefix {
2506            "u"
2507        } else {
2508            self.span_value(span)
2509        };
2510
2511        match op {
2512            ":" | " " | "," => Some((8, Associativity::Left)),
2513            "%" => Some((7, Associativity::Left)),
2514            "u" => Some((6, Associativity::Right)),
2515            "^" => Some((5, Associativity::Right)),
2516            "*" | "/" => Some((4, Associativity::Left)),
2517            "+" | "-" => Some((3, Associativity::Left)),
2518            "&" => Some((2, Associativity::Left)),
2519            "=" | "<" | ">" | "<=" | ">=" | "<>" => Some((1, Associativity::Left)),
2520            _ => None,
2521        }
2522    }
2523
2524    fn parse(&mut self) -> Result<ASTNode, ParserError> {
2525        if self.tokens.is_empty() {
2526            return Err(ParserError {
2527                message: "No tokens to parse".to_string(),
2528                position: None,
2529            });
2530        }
2531
2532        self.skip_whitespace();
2533        if self.position >= self.tokens.len() {
2534            return Err(ParserError {
2535                message: "No tokens to parse".to_string(),
2536                position: None,
2537            });
2538        }
2539
2540        if self.tokens[self.position].token_type == TokenType::Literal {
2541            let span = self.tokens[self.position];
2542            self.position += 1;
2543            self.skip_whitespace();
2544            if self.position < self.tokens.len() {
2545                return Err(ParserError {
2546                    message: format!(
2547                        "Unexpected token at position {}: {:?}",
2548                        self.position, self.tokens[self.position]
2549                    ),
2550                    position: Some(self.position),
2551                });
2552            }
2553
2554            let token = self.span_to_token(&span);
2555            return Ok(ASTNode::new(
2556                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2557                Some(token),
2558            ));
2559        }
2560
2561        let ast = self.parse_expression()?;
2562        self.skip_whitespace();
2563        if self.position < self.tokens.len() {
2564            return Err(ParserError {
2565                message: format!(
2566                    "Unexpected token at position {}: {:?}",
2567                    self.position, self.tokens[self.position]
2568                ),
2569                position: Some(self.position),
2570            });
2571        }
2572        Ok(ast)
2573    }
2574
2575    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2576        self.parse_bp(0)
2577    }
2578
2579    fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2580        let mut left = self.parse_prefix()?;
2581
2582        loop {
2583            self.skip_whitespace();
2584            if self.position >= self.tokens.len() {
2585                break;
2586            }
2587
2588            if self.tokens[self.position].token_type == TokenType::OpPostfix {
2589                let (precedence, _) = self
2590                    .span_precedence(&self.tokens[self.position])
2591                    .unwrap_or((0, Associativity::Left));
2592                if precedence < min_precedence {
2593                    break;
2594                }
2595
2596                let op_span = self.tokens[self.position];
2597                self.position += 1;
2598                let op_token = self.span_to_token(&op_span);
2599                let contains_volatile = left.contains_volatile;
2600                left = ASTNode::new_with_volatile(
2601                    ASTNodeType::UnaryOp {
2602                        op: op_token.value.clone(),
2603                        expr: Box::new(left),
2604                    },
2605                    Some(op_token),
2606                    contains_volatile,
2607                );
2608                continue;
2609            }
2610
2611            let token = &self.tokens[self.position];
2612            if token.token_type != TokenType::OpInfix {
2613                break;
2614            }
2615
2616            let (precedence, associativity) = self
2617                .span_precedence(token)
2618                .unwrap_or((0, Associativity::Left));
2619            if precedence < min_precedence {
2620                break;
2621            }
2622
2623            let op_span = self.tokens[self.position];
2624            self.position += 1;
2625
2626            let next_min_precedence = if associativity == Associativity::Left {
2627                precedence + 1
2628            } else {
2629                precedence
2630            };
2631
2632            let right = self.parse_bp(next_min_precedence)?;
2633            let op_token = self.span_to_token(&op_span);
2634            let contains_volatile = left.contains_volatile || right.contains_volatile;
2635            left = ASTNode::new_with_volatile(
2636                ASTNodeType::BinaryOp {
2637                    op: op_token.value.clone(),
2638                    left: Box::new(left),
2639                    right: Box::new(right),
2640                },
2641                Some(op_token),
2642                contains_volatile,
2643            );
2644        }
2645
2646        Ok(left)
2647    }
2648
2649    fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2650        self.skip_whitespace();
2651        if self.position < self.tokens.len()
2652            && self.tokens[self.position].token_type == TokenType::OpPrefix
2653        {
2654            let op_span = self.tokens[self.position];
2655            self.position += 1;
2656
2657            let (precedence, _) = self
2658                .span_precedence(&op_span)
2659                .unwrap_or((0, Associativity::Right));
2660
2661            let expr = self.parse_bp(precedence)?;
2662            let op_token = self.span_to_token(&op_span);
2663            let contains_volatile = expr.contains_volatile;
2664            return Ok(ASTNode::new_with_volatile(
2665                ASTNodeType::UnaryOp {
2666                    op: op_token.value.clone(),
2667                    expr: Box::new(expr),
2668                },
2669                Some(op_token),
2670                contains_volatile,
2671            ));
2672        }
2673
2674        self.parse_primary()
2675    }
2676
2677    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2678        self.skip_whitespace();
2679        if self.position >= self.tokens.len() {
2680            return Err(ParserError {
2681                message: "Unexpected end of tokens".to_string(),
2682                position: Some(self.position),
2683            });
2684        }
2685
2686        let token = &self.tokens[self.position];
2687        match token.token_type {
2688            TokenType::Operand => {
2689                let span = self.tokens[self.position];
2690                self.position += 1;
2691                self.parse_operand(span)
2692            }
2693            TokenType::Func => {
2694                let span = self.tokens[self.position];
2695                self.position += 1;
2696                self.parse_function(span)
2697            }
2698            TokenType::Paren if token.subtype == TokenSubType::Open => {
2699                self.position += 1;
2700                let expr = self.parse_expression()?;
2701                self.skip_whitespace();
2702                if self.position >= self.tokens.len()
2703                    || self.tokens[self.position].token_type != TokenType::Paren
2704                    || self.tokens[self.position].subtype != TokenSubType::Close
2705                {
2706                    return Err(ParserError {
2707                        message: "Expected closing parenthesis".to_string(),
2708                        position: Some(self.position),
2709                    });
2710                }
2711                self.position += 1;
2712                Ok(expr)
2713            }
2714            TokenType::Array if token.subtype == TokenSubType::Open => {
2715                self.position += 1;
2716                self.parse_array()
2717            }
2718            _ => Err(ParserError {
2719                message: format!("Unexpected token: {token:?}"),
2720                position: Some(self.position),
2721            }),
2722        }
2723    }
2724
2725    fn parse_operand(&mut self, span: crate::tokenizer::TokenSpan) -> Result<ASTNode, ParserError> {
2726        let value = self.span_value(&span);
2727        let token = self.span_to_token(&span);
2728
2729        match span.subtype {
2730            TokenSubType::Number => {
2731                let value = value.parse::<f64>().map_err(|_| ParserError {
2732                    message: format!("Invalid number: {value}"),
2733                    position: Some(self.position),
2734                })?;
2735                Ok(ASTNode::new(
2736                    ASTNodeType::Literal(LiteralValue::Number(value)),
2737                    Some(token),
2738                ))
2739            }
2740            TokenSubType::Text => {
2741                let mut text = value.to_string();
2742                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2743                    text = text[1..text.len() - 1].to_string();
2744                    text = text.replace("\"\"", "\"");
2745                }
2746                Ok(ASTNode::new(
2747                    ASTNodeType::Literal(LiteralValue::Text(text)),
2748                    Some(token),
2749                ))
2750            }
2751            TokenSubType::Logical => {
2752                let v = value.to_uppercase() == "TRUE";
2753                Ok(ASTNode::new(
2754                    ASTNodeType::Literal(LiteralValue::Boolean(v)),
2755                    Some(token),
2756                ))
2757            }
2758            TokenSubType::Error => {
2759                let error = ExcelError::from_error_string(value);
2760                Ok(ASTNode::new(
2761                    ASTNodeType::Literal(LiteralValue::Error(error)),
2762                    Some(token),
2763                ))
2764            }
2765            TokenSubType::Range => {
2766                let reference = ReferenceType::from_string_with_dialect(value, self.dialect)
2767                    .map_err(|e| ParserError {
2768                        message: format!("Invalid reference '{value}': {e}"),
2769                        position: Some(self.position),
2770                    })?;
2771                Ok(ASTNode::new(
2772                    ASTNodeType::Reference {
2773                        original: value.to_string(),
2774                        reference,
2775                    },
2776                    Some(token),
2777                ))
2778            }
2779            _ => Err(ParserError {
2780                message: format!("Unexpected operand subtype: {:?}", span.subtype),
2781                position: Some(self.position),
2782            }),
2783        }
2784    }
2785
2786    fn parse_function(
2787        &mut self,
2788        func_span: crate::tokenizer::TokenSpan,
2789    ) -> Result<ASTNode, ParserError> {
2790        let func_value = self.span_value(&func_span);
2791        if func_value.is_empty() {
2792            return Err(ParserError {
2793                message: "Invalid function token".to_string(),
2794                position: Some(self.position),
2795            });
2796        }
2797        let name = func_value[..func_value.len() - 1].to_string();
2798        let args = self.parse_function_arguments()?;
2799
2800        let this_is_volatile = self
2801            .volatility_classifier
2802            .as_ref()
2803            .map(|f| f(name.as_str()))
2804            .unwrap_or(false);
2805        let args_volatile = args.iter().any(|a| a.contains_volatile);
2806
2807        let func_token = self.span_to_token(&func_span);
2808        Ok(ASTNode::new_with_volatile(
2809            ASTNodeType::Function { name, args },
2810            Some(func_token),
2811            this_is_volatile || args_volatile,
2812        ))
2813    }
2814
2815    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2816        let mut args = Vec::new();
2817
2818        self.skip_whitespace();
2819        if self.position < self.tokens.len()
2820            && self.tokens[self.position].token_type == TokenType::Func
2821            && self.tokens[self.position].subtype == TokenSubType::Close
2822        {
2823            self.position += 1;
2824            return Ok(args);
2825        }
2826
2827        self.skip_whitespace();
2828        if self.position < self.tokens.len()
2829            && self.tokens[self.position].token_type == TokenType::Sep
2830            && self.tokens[self.position].subtype == TokenSubType::Arg
2831        {
2832            args.push(ASTNode::new(
2833                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2834                None,
2835            ));
2836            self.position += 1;
2837        } else {
2838            args.push(self.parse_expression()?);
2839        }
2840
2841        while self.position < self.tokens.len() {
2842            self.skip_whitespace();
2843            if self.position >= self.tokens.len() {
2844                break;
2845            }
2846
2847            let token = &self.tokens[self.position];
2848            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2849                self.position += 1;
2850                self.skip_whitespace();
2851                if self.position < self.tokens.len() {
2852                    let next_token = &self.tokens[self.position];
2853                    if next_token.token_type == TokenType::Sep
2854                        && next_token.subtype == TokenSubType::Arg
2855                    {
2856                        args.push(ASTNode::new(
2857                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2858                            None,
2859                        ));
2860                    } else if next_token.token_type == TokenType::Func
2861                        && next_token.subtype == TokenSubType::Close
2862                    {
2863                        args.push(ASTNode::new(
2864                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2865                            None,
2866                        ));
2867                        self.position += 1;
2868                        break;
2869                    } else {
2870                        args.push(self.parse_expression()?);
2871                    }
2872                } else {
2873                    args.push(ASTNode::new(
2874                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2875                        None,
2876                    ));
2877                }
2878            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2879                self.position += 1;
2880                break;
2881            } else {
2882                return Err(ParserError {
2883                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2884                    position: Some(self.position),
2885                });
2886            }
2887        }
2888
2889        Ok(args)
2890    }
2891
2892    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2893        let mut rows = Vec::new();
2894        let mut current_row = Vec::new();
2895
2896        self.skip_whitespace();
2897        if self.position < self.tokens.len()
2898            && self.tokens[self.position].token_type == TokenType::Array
2899            && self.tokens[self.position].subtype == TokenSubType::Close
2900        {
2901            self.position += 1;
2902            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2903        }
2904
2905        current_row.push(self.parse_expression()?);
2906
2907        while self.position < self.tokens.len() {
2908            self.skip_whitespace();
2909            if self.position >= self.tokens.len() {
2910                break;
2911            }
2912            let token = &self.tokens[self.position];
2913
2914            if token.token_type == TokenType::Sep {
2915                if token.subtype == TokenSubType::Arg {
2916                    self.position += 1;
2917                    current_row.push(self.parse_expression()?);
2918                } else if token.subtype == TokenSubType::Row {
2919                    self.position += 1;
2920                    rows.push(current_row);
2921                    current_row = vec![self.parse_expression()?];
2922                }
2923            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2924                self.position += 1;
2925                rows.push(current_row);
2926                break;
2927            } else {
2928                return Err(ParserError {
2929                    message: format!("Unexpected token in array: {token:?}"),
2930                    position: Some(self.position),
2931                });
2932            }
2933        }
2934
2935        let contains_volatile = rows
2936            .iter()
2937            .flat_map(|r| r.iter())
2938            .any(|n| n.contains_volatile);
2939
2940        Ok(ASTNode::new_with_volatile(
2941            ASTNodeType::Array(rows),
2942            None,
2943            contains_volatile,
2944        ))
2945    }
2946}
2947
2948/// Normalise a reference string to its canonical form
2949pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
2950    let ref_type = ReferenceType::from_string(reference)?;
2951    Ok(ref_type.to_string())
2952}
2953
2954pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
2955    parse_with_dialect(formula, FormulaDialect::Excel)
2956}
2957
2958pub fn parse_with_dialect<T: AsRef<str>>(
2959    formula: T,
2960    dialect: FormulaDialect,
2961) -> Result<ASTNode, ParserError> {
2962    let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2963    let mut parser = SpanParser::new(formula.as_ref(), &spans, dialect);
2964    parser.parse()
2965}
2966
2967/// Parse a single formula and annotate volatility using the provided classifier.
2968/// This is a convenience wrapper around `Parser::new_with_classifier`.
2969pub fn parse_with_volatility_classifier<T, F>(
2970    formula: T,
2971    classifier: F,
2972) -> Result<ASTNode, ParserError>
2973where
2974    T: AsRef<str>,
2975    F: Fn(&str) -> bool + Send + Sync + 'static,
2976{
2977    parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
2978}
2979
2980pub fn parse_with_dialect_and_volatility_classifier<T, F>(
2981    formula: T,
2982    dialect: FormulaDialect,
2983    classifier: F,
2984) -> Result<ASTNode, ParserError>
2985where
2986    T: AsRef<str>,
2987    F: Fn(&str) -> bool + Send + Sync + 'static,
2988{
2989    let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2990    let mut parser =
2991        SpanParser::new(formula.as_ref(), &spans, dialect).with_volatility_classifier(classifier);
2992    parser.parse()
2993}
2994
2995/// Efficient batch parser with an internal token cache and optional volatility classifier.
2996///
2997/// The cache is keyed by the original formula string; repeated formulas across a batch
2998/// (very common in spreadsheets) will avoid re-tokenization and whitespace filtering.
2999pub struct BatchParser {
3000    include_whitespace: bool,
3001    volatility_classifier: Option<VolatilityClassifierArc>,
3002    token_cache: std::collections::HashMap<String, Arc<[crate::tokenizer::TokenSpan]>>, // cached tokens
3003    dialect: FormulaDialect,
3004}
3005
3006impl BatchParser {
3007    pub fn builder() -> BatchParserBuilder {
3008        BatchParserBuilder::default()
3009    }
3010
3011    /// Parse a formula using the internal cache and configured classifier.
3012    pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
3013        let spans = if let Some(tokens) = self.token_cache.get(formula) {
3014            Arc::clone(tokens)
3015        } else {
3016            let mut spans = crate::tokenizer::tokenize_spans_with_dialect(formula, self.dialect)?;
3017            if !self.include_whitespace {
3018                spans.retain(|t| t.token_type != TokenType::Whitespace);
3019            }
3020
3021            let spans: Arc<[crate::tokenizer::TokenSpan]> = Arc::from(spans.into_boxed_slice());
3022            self.token_cache
3023                .insert(formula.to_string(), Arc::clone(&spans));
3024            spans
3025        };
3026
3027        let mut parser = SpanParser::new(formula, spans.as_ref(), self.dialect);
3028        if let Some(classifier) = self.volatility_classifier.clone() {
3029            parser = parser.with_volatility_classifier(move |name| classifier(name));
3030        }
3031        parser.parse()
3032    }
3033}
3034
3035#[derive(Default)]
3036pub struct BatchParserBuilder {
3037    include_whitespace: bool,
3038    volatility_classifier: Option<VolatilityClassifierArc>,
3039    dialect: FormulaDialect,
3040}
3041
3042impl BatchParserBuilder {
3043    pub fn include_whitespace(mut self, include: bool) -> Self {
3044        self.include_whitespace = include;
3045        self
3046    }
3047
3048    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
3049    where
3050        F: Fn(&str) -> bool + Send + Sync + 'static,
3051    {
3052        self.volatility_classifier = Some(Arc::new(f));
3053        self
3054    }
3055
3056    pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
3057        self.dialect = dialect;
3058        self
3059    }
3060
3061    pub fn build(self) -> BatchParser {
3062        BatchParser {
3063            include_whitespace: self.include_whitespace,
3064            volatility_classifier: self.volatility_classifier,
3065            token_cache: std::collections::HashMap::new(),
3066            dialect: self.dialect,
3067        }
3068    }
3069}