formualizer_parse/
parser.rs

1use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
2use crate::types::{FormulaDialect, ParsingError};
3use crate::{ExcelError, LiteralValue};
4
5use crate::hasher::FormulaHasher;
6use formualizer_common::coord::{
7    col_index_from_letters_1based, col_letters_from_1based, parse_a1_1based,
8};
9use formualizer_common::{
10    AxisBound, RelativeCoord, SheetCellRef, SheetLocator, SheetRangeRef, SheetRef,
11};
12use once_cell::sync::Lazy;
13use smallvec::SmallVec;
14use std::error::Error;
15use std::fmt::{self, Display};
16use std::hash::{Hash, Hasher};
17use std::str::FromStr;
18use std::sync::Arc;
19
20type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
21type VolatilityClassifierBox = Box<VolatilityFn>;
22type VolatilityClassifierArc = Arc<VolatilityFn>;
23
24/// A custom error type for the parser.
25#[derive(Debug)]
26pub struct ParserError {
27    pub message: String,
28    pub position: Option<usize>,
29}
30
31impl Display for ParserError {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        if let Some(pos) = self.position {
34            write!(f, "ParserError at position {}: {}", pos, self.message)
35        } else {
36            write!(f, "ParserError: {}", self.message)
37        }
38    }
39}
40
41impl Error for ParserError {}
42
43// Column lookup table for common columns (A-ZZ = 702 columns)
44static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
45    let mut cols = Vec::with_capacity(702);
46    // Single letters A-Z
47    for c in b'A'..=b'Z' {
48        cols.push(String::from(c as char));
49    }
50    // Double letters AA-ZZ
51    for c1 in b'A'..=b'Z' {
52        for c2 in b'A'..=b'Z' {
53            cols.push(format!("{}{}", c1 as char, c2 as char));
54        }
55    }
56    cols
57});
58
59/// A structured table reference specifier for accessing specific parts of a table
60#[derive(Debug, Clone, PartialEq, Hash)]
61pub enum TableSpecifier {
62    /// The entire table
63    All,
64    /// The data area of the table (no headers or totals)
65    Data,
66    /// The headers row
67    Headers,
68    /// The totals row
69    Totals,
70    /// A specific row
71    Row(TableRowSpecifier),
72    /// A specific column
73    Column(String),
74    /// A range of columns
75    ColumnRange(String, String),
76    /// Special items like #Headers, #Data, #Totals, etc.
77    SpecialItem(SpecialItem),
78    /// A combination of specifiers, for complex references
79    Combination(Vec<Box<TableSpecifier>>),
80}
81
82/// Specifies which row(s) to use in a table reference
83#[derive(Debug, Clone, PartialEq, Hash)]
84pub enum TableRowSpecifier {
85    /// The current row (context dependent)
86    Current,
87    /// All rows
88    All,
89    /// Data rows only
90    Data,
91    /// Headers row
92    Headers,
93    /// Totals row
94    Totals,
95    /// Specific row by index (1-based)
96    Index(u32),
97}
98
99/// Special items in structured references
100#[derive(Debug, Clone, PartialEq, Hash)]
101pub enum SpecialItem {
102    /// The #Headers item
103    Headers,
104    /// The #Data item
105    Data,
106    /// The #Totals item
107    Totals,
108    /// The #All item (the whole table)
109    All,
110    /// The @ item (current row)
111    ThisRow,
112}
113
114/// A reference to a table including specifiers
115#[derive(Debug, Clone, PartialEq, Hash)]
116pub struct TableReference {
117    /// The name of the table
118    pub name: String,
119    /// Optional specifier for which part of the table to use
120    pub specifier: Option<TableSpecifier>,
121}
122
123/// A reference to something outside the cell.
124#[derive(Debug, Clone, PartialEq, Hash)]
125pub enum ReferenceType {
126    Cell {
127        sheet: Option<String>,
128        row: u32,
129        col: u32,
130    },
131    Range {
132        sheet: Option<String>,
133        start_row: Option<u32>,
134        start_col: Option<u32>,
135        end_row: Option<u32>,
136        end_col: Option<u32>,
137    },
138    Table(TableReference),
139    NamedRange(String),
140}
141
142impl Display for TableSpecifier {
143    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144        match self {
145            TableSpecifier::All => write!(f, "#All"),
146            TableSpecifier::Data => write!(f, "#Data"),
147            TableSpecifier::Headers => write!(f, "#Headers"),
148            TableSpecifier::Totals => write!(f, "#Totals"),
149            TableSpecifier::Row(row) => write!(f, "{row}"),
150            TableSpecifier::Column(column) => write!(f, "{column}"),
151            TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
152            TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
153            TableSpecifier::Combination(specs) => {
154                // Emit nested bracketed parts so the surrounding Table formatter prints
155                // canonical structured refs like Table[[#Headers],[Column1]:[Column2]]
156                let parts: Vec<String> = specs.iter().map(|s| format!("[{s}]")).collect();
157                write!(f, "{}", parts.join(","))
158            }
159        }
160    }
161}
162
163impl Display for TableRowSpecifier {
164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165        match self {
166            TableRowSpecifier::Current => write!(f, "@"),
167            TableRowSpecifier::All => write!(f, "#All"),
168            TableRowSpecifier::Data => write!(f, "#Data"),
169            TableRowSpecifier::Headers => write!(f, "#Headers"),
170            TableRowSpecifier::Totals => write!(f, "#Totals"),
171            TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
172        }
173    }
174}
175
176impl Display for SpecialItem {
177    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
178        match self {
179            SpecialItem::Headers => write!(f, "#Headers"),
180            SpecialItem::Data => write!(f, "#Data"),
181            SpecialItem::Totals => write!(f, "#Totals"),
182            SpecialItem::All => write!(f, "#All"),
183            SpecialItem::ThisRow => write!(f, "@"),
184        }
185    }
186}
187
188/// Check if a sheet name needs to be quoted in Excel formulas
189fn sheet_name_needs_quoting(name: &str) -> bool {
190    if name.is_empty() {
191        return false;
192    }
193
194    let bytes = name.as_bytes();
195
196    // Check if starts with a digit
197    if bytes[0].is_ascii_digit() {
198        return true;
199    }
200
201    // Check for any special characters that require quoting
202    // This includes: space, !, ", #, $, %, &, ', (, ), *, +, comma, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~
203    for &byte in bytes {
204        match byte {
205            b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
206            | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
207            | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
208            _ => {}
209        }
210    }
211
212    // Check for Excel reserved words (case-insensitive)
213    let upper = name.to_uppercase();
214    matches!(
215        upper.as_str(),
216        "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
217    )
218}
219
220#[derive(Debug, Clone)]
221struct OpenFormulaRefPart {
222    sheet: Option<String>,
223    coord: String,
224}
225
226impl ReferenceType {
227    /// Create a reference from a string. Can be A1, A:A, A1:B2, Table1[Column], etc.
228    pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
229        Self::parse_excel_reference(reference)
230    }
231
232    /// Create a reference from a string using the specified formula dialect.
233    pub fn from_string_with_dialect(
234        reference: &str,
235        dialect: FormulaDialect,
236    ) -> Result<Self, ParsingError> {
237        match dialect {
238            FormulaDialect::Excel => Self::parse_excel_reference(reference),
239            FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
240                .or_else(|_| Self::parse_excel_reference(reference)),
241        }
242    }
243
244    /// Parse a grid reference into a shared SheetRef, preserving $ anchors.
245    ///
246    /// Only cell and range references are supported. Table and named ranges return an error.
247    pub fn parse_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
248        Self::parse_sheet_ref_with_dialect(reference, FormulaDialect::Excel)
249    }
250
251    /// Parse a grid reference into a shared SheetRef using the specified dialect.
252    pub fn parse_sheet_ref_with_dialect(
253        reference: &str,
254        dialect: FormulaDialect,
255    ) -> Result<SheetRef<'static>, ParsingError> {
256        match dialect {
257            FormulaDialect::Excel => Self::parse_excel_sheet_ref(reference),
258            FormulaDialect::OpenFormula => Self::parse_openformula_sheet_ref(reference)
259                .or_else(|_| Self::parse_excel_sheet_ref(reference)),
260        }
261    }
262
263    /// Lossy conversion from parsed ReferenceType into SheetRef.
264    /// Absolute anchors are not recoverable from ReferenceType and default to relative.
265    pub fn to_sheet_ref_lossy(&self) -> Option<SheetRef<'_>> {
266        fn bound_from_1based(v: Option<u32>) -> Option<AxisBound> {
267            v.and_then(|x| x.checked_sub(1).map(|i| AxisBound::new(i, false)))
268        }
269
270        match self {
271            ReferenceType::Cell { sheet, row, col } => {
272                let row0 = row.checked_sub(1)?;
273                let col0 = col.checked_sub(1)?;
274                let sheet_loc = match sheet.as_deref() {
275                    Some(name) => SheetLocator::from_name(name),
276                    None => SheetLocator::Current,
277                };
278                let coord = RelativeCoord::new(row0, col0, false, false);
279                Some(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
280            }
281            ReferenceType::Range {
282                sheet,
283                start_row,
284                start_col,
285                end_row,
286                end_col,
287            } => {
288                let sheet_loc = match sheet.as_deref() {
289                    Some(name) => SheetLocator::from_name(name),
290                    None => SheetLocator::Current,
291                };
292                let sr = bound_from_1based(*start_row);
293                if start_row.is_some() && sr.is_none() {
294                    return None;
295                }
296                let sc = bound_from_1based(*start_col);
297                if start_col.is_some() && sc.is_none() {
298                    return None;
299                }
300                let er = bound_from_1based(*end_row);
301                if end_row.is_some() && er.is_none() {
302                    return None;
303                }
304                let ec = bound_from_1based(*end_col);
305                if end_col.is_some() && ec.is_none() {
306                    return None;
307                }
308                let range = SheetRangeRef::from_parts(sheet_loc, sr, sc, er, ec).ok()?;
309                Some(SheetRef::Range(range))
310            }
311            _ => None,
312        }
313    }
314
315    fn parse_excel_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
316        if reference.contains('[') {
317            return Err(ParsingError::InvalidReference(
318                "Table references are not supported for SheetRef".to_string(),
319            ));
320        }
321
322        let (sheet, ref_part) = Self::extract_sheet_name(reference);
323        let sheet_loc: SheetLocator<'static> = match sheet {
324            Some(name) => SheetLocator::from_name(name),
325            None => SheetLocator::Current,
326        };
327
328        if ref_part.contains(':') {
329            let mut parts = ref_part.splitn(2, ':');
330            let start = parts.next().unwrap();
331            let end = parts.next().ok_or_else(|| {
332                ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
333            })?;
334
335            let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
336            let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
337
338            let range =
339                SheetRangeRef::from_parts(sheet_loc, start_row, start_col, end_row, end_col)
340                    .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
341            Ok(SheetRef::Range(range))
342        } else {
343            let (row, col, row_abs, col_abs) = parse_a1_1based(&ref_part)
344                .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
345            let coord = RelativeCoord::new(row - 1, col - 1, row_abs, col_abs);
346            Ok(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
347        }
348    }
349
350    fn parse_openformula_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
351        Self::parse_excel_sheet_ref(reference)
352    }
353
354    fn parse_range_part_with_abs(
355        part: &str,
356    ) -> Result<(Option<AxisBound>, Option<AxisBound>), ParsingError> {
357        if let Ok((row, col, row_abs, col_abs)) = parse_a1_1based(part) {
358            let row_b = AxisBound::new(row - 1, row_abs);
359            let col_b = AxisBound::new(col - 1, col_abs);
360            return Ok((Some(col_b), Some(row_b)));
361        }
362
363        let bytes = part.as_bytes();
364        let len = bytes.len();
365        let mut i = 0usize;
366
367        let mut col_abs = false;
368        let mut row_abs = false;
369
370        if i < len && bytes[i] == b'$' {
371            col_abs = true;
372            i += 1;
373        }
374
375        let col_start = i;
376        while i < len && bytes[i].is_ascii_alphabetic() {
377            i += 1;
378        }
379
380        if i > col_start {
381            let col_str = &part[col_start..i];
382            let col1 = Self::column_to_number(col_str)?;
383
384            if i == len {
385                let col_b = AxisBound::new(col1 - 1, col_abs);
386                return Ok((Some(col_b), None));
387            }
388
389            if i < len && bytes[i] == b'$' {
390                row_abs = true;
391                i += 1;
392            }
393
394            if i >= len {
395                return Err(ParsingError::InvalidReference(format!(
396                    "Invalid range part: {part}"
397                )));
398            }
399
400            let row_start = i;
401            while i < len && bytes[i].is_ascii_digit() {
402                i += 1;
403            }
404
405            if row_start == i || i != len {
406                return Err(ParsingError::InvalidReference(format!(
407                    "Invalid range part: {part}"
408                )));
409            }
410
411            let row_str = &part[row_start..i];
412            let row1 = row_str
413                .parse::<u32>()
414                .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
415            if row1 == 0 {
416                return Err(ParsingError::InvalidReference(format!(
417                    "Invalid range part: {part}"
418                )));
419            }
420
421            let col_b = AxisBound::new(col1 - 1, col_abs);
422            let row_b = AxisBound::new(row1 - 1, row_abs);
423            return Ok((Some(col_b), Some(row_b)));
424        }
425
426        i = 0;
427        if i < len && bytes[i] == b'$' {
428            row_abs = true;
429            i += 1;
430        }
431
432        let row_start = i;
433        while i < len && bytes[i].is_ascii_digit() {
434            i += 1;
435        }
436
437        if row_start == i || i != len {
438            return Err(ParsingError::InvalidReference(format!(
439                "Invalid range part: {part}"
440            )));
441        }
442
443        let row_str = &part[row_start..i];
444        let row1 = row_str
445            .parse::<u32>()
446            .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
447        if row1 == 0 {
448            return Err(ParsingError::InvalidReference(format!(
449                "Invalid range part: {part}"
450            )));
451        }
452
453        Ok((None, Some(AxisBound::new(row1 - 1, row_abs))))
454    }
455
456    fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
457        // First check if this is a table reference (contains '[')
458        if reference.contains('[') {
459            return Self::parse_table_reference(reference);
460        }
461
462        // Extract sheet name if present
463        let (sheet, ref_part) = Self::extract_sheet_name(reference);
464
465        if ref_part.contains(':') {
466            // Range reference
467            Self::parse_range_reference(&ref_part, sheet)
468        } else {
469            // Try to parse as a single cell reference
470            match Self::parse_cell_reference(&ref_part) {
471                Ok((col, row)) => Ok(ReferenceType::Cell { sheet, row, col }),
472                Err(_) => {
473                    // Treat it as a named range
474                    Ok(ReferenceType::NamedRange(reference.to_string()))
475                }
476            }
477        }
478    }
479
480    /// Parse a range reference like "A1:B2", "A:A", or "1:1"
481    fn parse_range_reference(reference: &str, sheet: Option<String>) -> Result<Self, ParsingError> {
482        let mut parts = reference.splitn(2, ':');
483        let start = parts.next().unwrap();
484        let end = parts
485            .next()
486            .ok_or_else(|| ParsingError::InvalidReference(format!("Invalid range: {reference}")))?;
487
488        let (start_col, start_row) = Self::parse_range_part(start)?;
489        let (end_col, end_row) = Self::parse_range_part(end)?;
490
491        Ok(ReferenceType::Range {
492            sheet,
493            start_row,
494            start_col,
495            end_row,
496            end_col,
497        })
498    }
499
500    /// Parse a part of a range reference (either start or end).
501    /// Returns (column, row) where either can be None for infinite ranges.
502    fn parse_range_part(part: &str) -> Result<(Option<u32>, Option<u32>), ParsingError> {
503        // Try to parse as a normal cell reference (A1, B2, etc.)
504        if let Ok((col, row)) = Self::parse_cell_reference(part) {
505            return Ok((Some(col), Some(row)));
506        }
507
508        // Try to parse as column-only or row-only
509        let bytes = part.as_bytes();
510        let mut i = 0;
511
512        // Skip optional $
513        if i < bytes.len() && bytes[i] == b'$' {
514            i += 1;
515        }
516
517        // Check if we have letters (column)
518        let col_start = i;
519        while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
520            i += 1;
521        }
522
523        if i > col_start {
524            // We have a column
525            let col_str = &part[col_start..i];
526            let col = Self::column_to_number(col_str)?;
527
528            // Skip optional $ before row
529            if i < bytes.len() && bytes[i] == b'$' {
530                i += 1;
531            }
532
533            // Check if we have digits (row)
534            let row_start = i;
535            while i < bytes.len() && bytes[i].is_ascii_digit() {
536                i += 1;
537            }
538
539            if i > row_start && i == bytes.len() {
540                // We have both column and row (shouldn't happen as parse_cell_reference should have caught it)
541                let row_str = &part[row_start..i];
542                let row = row_str.parse::<u32>().map_err(|_| {
543                    ParsingError::InvalidReference(format!("Invalid row: {row_str}"))
544                })?;
545                return Ok((Some(col), Some(row)));
546            } else if i == col_start + col_str.len()
547                || (i == col_start + col_str.len() + 1 && bytes[col_start + col_str.len()] == b'$')
548            {
549                // Just a column
550                return Ok((Some(col), None));
551            }
552        } else {
553            // No column, check for row-only reference
554            i = 0;
555            if i < bytes.len() && bytes[i] == b'$' {
556                i += 1;
557            }
558
559            let row_start = i;
560            while i < bytes.len() && bytes[i].is_ascii_digit() {
561                i += 1;
562            }
563
564            if i > row_start && i == bytes.len() {
565                let row_str = &part[row_start..i];
566                let row = row_str.parse::<u32>().map_err(|_| {
567                    ParsingError::InvalidReference(format!("Invalid row: {row_str}"))
568                })?;
569                return Ok((None, Some(row)));
570            }
571        }
572
573        Err(ParsingError::InvalidReference(format!(
574            "Invalid range part: {part}"
575        )))
576    }
577
578    /// Parse a cell reference like "A1" into (column, row) using byte-based parsing.
579    fn parse_cell_reference(reference: &str) -> Result<(u32, u32), ParsingError> {
580        parse_a1_1based(reference)
581            .map(|(row, col, _, _)| (col, row))
582            .map_err(|_| {
583                ParsingError::InvalidReference(format!("Invalid cell reference: {reference}"))
584            })
585    }
586
587    /// Convert a column letter (e.g., "A", "BC") to a column number (1-based) using byte operations.
588    pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
589        col_index_from_letters_1based(column)
590            .map_err(|_| ParsingError::InvalidReference(format!("Invalid column: {column}")))
591    }
592
593    /// Convert a column number to a column letter using lookup table for common values.
594    pub(crate) fn number_to_column(num: u32) -> String {
595        if num == 0 {
596            return String::new();
597        }
598        // Use lookup table for common columns (1-702 covers A-ZZ)
599        if num > 0 && num <= 702 {
600            return COLUMN_LOOKUP[(num - 1) as usize].clone();
601        }
602
603        col_letters_from_1based(num).unwrap_or_default()
604    }
605}
606
607impl Display for ReferenceType {
608    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
609        write!(
610            f,
611            "{}",
612            match self {
613                ReferenceType::Cell { sheet, row, col } => {
614                    let col_str = Self::number_to_column(*col);
615                    let row_str = row.to_string();
616
617                    if let Some(sheet_name) = sheet {
618                        if sheet_name_needs_quoting(sheet_name) {
619                            // Escape any single quotes in the sheet name by doubling them
620                            let escaped_name = sheet_name.replace('\'', "''");
621                            format!("'{escaped_name}'!{col_str}{row_str}")
622                        } else {
623                            format!("{sheet_name}!{col_str}{row_str}")
624                        }
625                    } else {
626                        format!("{col_str}{row_str}")
627                    }
628                }
629                ReferenceType::Range {
630                    sheet,
631                    start_row,
632                    start_col,
633                    end_row,
634                    end_col,
635                } => {
636                    // Format start reference
637                    let start_ref = match (start_col, start_row) {
638                        (Some(col), Some(row)) => {
639                            format!("{}{}", Self::number_to_column(*col), row)
640                        }
641                        (Some(col), None) => Self::number_to_column(*col),
642                        (None, Some(row)) => row.to_string(),
643                        (None, None) => "".to_string(), // Should not happen in normal usage
644                    };
645
646                    // Format end reference
647                    let end_ref = match (end_col, end_row) {
648                        (Some(col), Some(row)) => {
649                            format!("{}{}", Self::number_to_column(*col), row)
650                        }
651                        (Some(col), None) => Self::number_to_column(*col),
652                        (None, Some(row)) => row.to_string(),
653                        (None, None) => "".to_string(), // Should not happen in normal usage
654                    };
655
656                    let range_part = format!("{start_ref}:{end_ref}");
657
658                    if let Some(sheet_name) = sheet {
659                        if sheet_name_needs_quoting(sheet_name) {
660                            // Escape any single quotes in the sheet name by doubling them
661                            let escaped_name = sheet_name.replace('\'', "''");
662                            format!("'{escaped_name}'!{range_part}")
663                        } else {
664                            format!("{sheet_name}!{range_part}")
665                        }
666                    } else {
667                        range_part
668                    }
669                }
670                ReferenceType::Table(table_ref) => {
671                    if let Some(specifier) = &table_ref.specifier {
672                        // For table references, we need to handle column specifiers specially
673                        // to remove leading/trailing whitespace
674                        match specifier {
675                            TableSpecifier::Column(column) => {
676                                format!("{}[{}]", table_ref.name, column.trim())
677                            }
678                            TableSpecifier::ColumnRange(start, end) => {
679                                format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
680                            }
681                            _ => {
682                                // For other specifiers, use the standard formatting
683                                format!("{}[{}]", table_ref.name, specifier)
684                            }
685                        }
686                    } else {
687                        table_ref.name.clone()
688                    }
689                }
690                ReferenceType::NamedRange(name) => name.clone(),
691            }
692        )
693    }
694}
695
696impl TryFrom<&str> for ReferenceType {
697    type Error = ParsingError;
698
699    fn try_from(value: &str) -> Result<Self, Self::Error> {
700        ReferenceType::from_string(value)
701    }
702}
703
704impl FromStr for ReferenceType {
705    type Err = ParsingError;
706
707    fn from_str(s: &str) -> Result<Self, Self::Err> {
708        ReferenceType::from_string(s)
709    }
710}
711
712impl ReferenceType {
713    /// Normalise the reference string (convert to canonical form)
714    pub fn normalise(&self) -> String {
715        format!("{self}")
716    }
717
718    /// Extract a sheet name from a reference using byte operations.
719    fn extract_sheet_name(reference: &str) -> (Option<String>, String) {
720        let bytes = reference.as_bytes();
721        let mut i = 0;
722
723        // Handle quoted sheet names.
724        // Excel escapes a single quote inside a quoted sheet name by doubling it.
725        // Example: 'Bob''s Sheet'!A1
726        if i < bytes.len() && bytes[i] == b'\'' {
727            i += 1;
728            let start = i;
729
730            while i < bytes.len() {
731                if bytes[i] == b'\'' {
732                    // Escaped quote inside sheet name: ''
733                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
734                        i += 2;
735                        continue;
736                    }
737
738                    // Closing quote followed by '!'
739                    if i + 1 < bytes.len() && bytes[i + 1] == b'!' {
740                        let raw = &reference[start..i];
741                        let sheet = raw.replace("''", "'");
742                        let ref_part = String::from(&reference[i + 2..]);
743                        return (Some(sheet), ref_part);
744                    }
745                }
746
747                i += 1;
748            }
749        }
750
751        // Handle unquoted sheet names
752        i = 0;
753        while i < bytes.len() {
754            if bytes[i] == b'!' && i > 0 {
755                let sheet = String::from(&reference[0..i]);
756                let ref_part = String::from(&reference[i + 1..]);
757                return (Some(sheet), ref_part);
758            }
759            i += 1;
760        }
761
762        (None, reference.to_string())
763    }
764
765    /// Parse a table reference like "Table1[Column1]" or more complex ones like "Table1[[#All],[Column1]:[Column2]]".
766    fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
767        // Find the first '[' to separate table name from specifier
768        if let Some(bracket_pos) = reference.find('[') {
769            let table_name = reference[..bracket_pos].trim();
770            if table_name.is_empty() {
771                return Err(ParsingError::InvalidReference(reference.to_string()));
772            }
773
774            let specifier_str = &reference[bracket_pos..];
775            let specifier = Self::parse_table_specifier(specifier_str)?;
776
777            Ok(ReferenceType::Table(TableReference {
778                name: table_name.to_string(),
779                specifier,
780            }))
781        } else {
782            Err(ParsingError::InvalidReference(reference.to_string()))
783        }
784    }
785
786    /// Parse a table specifier like "[Column1]" or "[[#All],[Column1]:[Column2]]"
787    fn parse_table_specifier(specifier_str: &str) -> Result<Option<TableSpecifier>, ParsingError> {
788        if specifier_str.is_empty() || !specifier_str.starts_with('[') {
789            return Ok(None);
790        }
791
792        // Find balanced closing bracket
793        let mut depth = 0;
794        let mut end_pos = 0;
795
796        for (i, c) in specifier_str.chars().enumerate() {
797            if c == '[' {
798                depth += 1;
799            } else if c == ']' {
800                depth -= 1;
801                if depth == 0 {
802                    end_pos = i;
803                    break;
804                }
805            }
806        }
807
808        if depth != 0 || end_pos == 0 {
809            return Err(ParsingError::InvalidReference(format!(
810                "Unbalanced brackets in table specifier: {specifier_str}"
811            )));
812        }
813
814        // Extract content between outermost brackets
815        let content = &specifier_str[1..end_pos];
816
817        // Handle different types of specifiers
818        if content.is_empty() {
819            // Empty brackets means the whole table
820            return Ok(Some(TableSpecifier::All));
821        }
822
823        // Handle special items
824        if content.starts_with("#") {
825            return Self::parse_special_item(content);
826        }
827
828        // Handle column references
829        if !content.contains('[') && !content.contains('#') {
830            // Check for column range using iterator instead of split().collect()
831            if let Some(colon_pos) = content.find(':') {
832                let start = content[..colon_pos].trim();
833                let end = content[colon_pos + 1..].trim();
834                return Ok(Some(TableSpecifier::ColumnRange(
835                    start.to_string(),
836                    end.to_string(),
837                )));
838            } else {
839                // Single column
840                return Ok(Some(TableSpecifier::Column(content.trim().to_string())));
841            }
842        }
843
844        // Handle complex structured references with nested brackets
845        if content.contains('[') {
846            return Self::parse_complex_table_specifier(content);
847        }
848
849        // If we can't determine the type, just use the raw specifier
850        Ok(Some(TableSpecifier::Column(content.trim().to_string())))
851    }
852
853    fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
854        if reference.starts_with('[') && reference.ends_with(']') {
855            let inner = &reference[1..reference.len() - 1];
856            if inner.is_empty() {
857                return Err(ParsingError::InvalidReference(
858                    "Empty OpenFormula reference".to_string(),
859                ));
860            }
861
862            let mut parts = inner.splitn(2, ':');
863            let start_part_str = parts.next().unwrap();
864            let end_part_str = parts.next();
865
866            let start_part = Self::parse_openformula_part(start_part_str)?;
867            let end_part = if let Some(part) = end_part_str {
868                Some(Self::parse_openformula_part(part)?)
869            } else {
870                None
871            };
872
873            let sheet = match (&start_part.sheet, &end_part) {
874                (Some(sheet), Some(end)) => {
875                    if let Some(end_sheet) = &end.sheet
876                        && end_sheet != sheet
877                    {
878                        return Err(ParsingError::InvalidReference(format!(
879                            "Mismatched sheets in reference: {sheet} vs {end_sheet}"
880                        )));
881                    }
882                    Some(sheet.clone())
883                }
884                (Some(sheet), None) => Some(sheet.clone()),
885                (None, Some(end)) => end.sheet.clone(),
886                (None, None) => None,
887            };
888
889            let mut excel_like = String::new();
890            if let Some(sheet_name) = sheet {
891                if sheet_name_needs_quoting(&sheet_name) {
892                    let escaped = sheet_name.replace('\'', "''");
893                    excel_like.push('\'');
894                    excel_like.push_str(&escaped);
895                    excel_like.push('\'');
896                } else {
897                    excel_like.push_str(&sheet_name);
898                }
899                excel_like.push('!');
900            }
901
902            excel_like.push_str(&start_part.coord);
903            if let Some(end) = end_part {
904                excel_like.push(':');
905                excel_like.push_str(&end.coord);
906            }
907
908            return Self::parse_excel_reference(&excel_like);
909        }
910
911        Err(ParsingError::InvalidReference(format!(
912            "Unsupported OpenFormula reference: {reference}"
913        )))
914    }
915
916    fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
917        let trimmed = part.trim();
918        if trimmed.is_empty() {
919            return Err(ParsingError::InvalidReference(
920                "Empty component in OpenFormula reference".to_string(),
921            ));
922        }
923
924        if trimmed == "." {
925            return Err(ParsingError::InvalidReference(
926                "Incomplete OpenFormula reference component".to_string(),
927            ));
928        }
929
930        if trimmed.starts_with('[') {
931            // Nested brackets are not expected here
932            return Err(ParsingError::InvalidReference(format!(
933                "Unexpected '[' in OpenFormula reference component: {trimmed}"
934            )));
935        }
936
937        let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
938            (None, stripped.trim())
939        } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
940            let sheet_part = trimmed[..dot_idx].trim();
941            let coord_part = trimmed[dot_idx + 1..].trim();
942            if coord_part.is_empty() {
943                return Err(ParsingError::InvalidReference(format!(
944                    "Missing coordinate in OpenFormula reference component: {trimmed}"
945                )));
946            }
947            let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
948            (Some(sheet_name), coord_part)
949        } else {
950            (None, trimmed)
951        };
952
953        let coord = coord_slice.trim_start_matches('.').trim().to_string();
954
955        if coord.is_empty() {
956            return Err(ParsingError::InvalidReference(format!(
957                "Missing coordinate in OpenFormula reference component: {trimmed}"
958            )));
959        }
960
961        Ok(OpenFormulaRefPart { sheet, coord })
962    }
963
964    fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
965        let without_abs = sheet.trim().trim_start_matches('$');
966
967        if without_abs.starts_with('\'') {
968            if without_abs.len() < 2 || !without_abs.ends_with('\'') {
969                return Err(ParsingError::InvalidReference(format!(
970                    "Unterminated sheet name in OpenFormula reference: {sheet}"
971                )));
972            }
973            let inner = &without_abs[1..without_abs.len() - 1];
974            Ok(inner.replace("''", "'"))
975        } else {
976            Ok(without_abs.to_string())
977        }
978    }
979
980    fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
981        let bytes = part.as_bytes();
982        let mut i = 0;
983        let mut in_quotes = false;
984
985        while i < bytes.len() {
986            match bytes[i] {
987                b'\'' => {
988                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
989                        i += 2;
990                        continue;
991                    }
992                    in_quotes = !in_quotes;
993                    i += 1;
994                }
995                b'.' if !in_quotes => return Some(i),
996                _ => i += 1,
997            }
998        }
999
1000        None
1001    }
1002
1003    /// Parse a special item specifier like "#Headers", "#Data", etc.
1004    fn parse_special_item(content: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1005        match content {
1006            "#All" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::All))),
1007            "#Headers" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Headers))),
1008            "#Data" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Data))),
1009            "#Totals" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Totals))),
1010            "@" => Ok(Some(TableSpecifier::Row(TableRowSpecifier::Current))),
1011            _ => Err(ParsingError::InvalidReference(format!(
1012                "Unknown special item: {content}"
1013            ))),
1014        }
1015    }
1016
1017    /// Parse complex table specifiers with nested brackets
1018    fn parse_complex_table_specifier(
1019        content: &str,
1020    ) -> Result<Option<TableSpecifier>, ParsingError> {
1021        // This is a more complex case like [[#Headers],[Column1]:[Column2]]
1022        // For now, we'll just store the raw specifier and enhance this in the future
1023
1024        // Try to identify common patterns
1025        if content.contains("[#Headers]")
1026            || content.contains("[#All]")
1027            || content.contains("[#Data]")
1028            || content.contains("[#Totals]")
1029            || content.contains("[@]")
1030        {
1031            // This is a combination of specifiers
1032            // Parse them into a vector
1033            let mut specifiers = Vec::new();
1034
1035            // Simple parsing - this would need enhancement for full support
1036            if content.contains("[#Headers]") {
1037                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Headers)));
1038            }
1039            if content.contains("[#Data]") {
1040                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Data)));
1041            }
1042            if content.contains("[#Totals]") {
1043                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Totals)));
1044            }
1045            if content.contains("[#All]") {
1046                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::All)));
1047            }
1048
1049            if !specifiers.is_empty() {
1050                return Ok(Some(TableSpecifier::Combination(specifiers)));
1051            }
1052        }
1053
1054        // Fallback to storing as a column specifier
1055        Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1056    }
1057
1058    /// Get the Excel-style string representation of this reference
1059    pub fn to_excel_string(&self) -> String {
1060        match self {
1061            ReferenceType::Cell { sheet, row, col } => {
1062                if let Some(s) = sheet {
1063                    if sheet_name_needs_quoting(s) {
1064                        let escaped_name = s.replace('\'', "''");
1065                        format!("'{}'!{}{}", escaped_name, Self::number_to_column(*col), row)
1066                    } else {
1067                        format!("{}!{}{}", s, Self::number_to_column(*col), row)
1068                    }
1069                } else {
1070                    format!("{}{}", Self::number_to_column(*col), row)
1071                }
1072            }
1073            ReferenceType::Range {
1074                sheet,
1075                start_row,
1076                start_col,
1077                end_row,
1078                end_col,
1079            } => {
1080                // Format start reference
1081                let start_ref = match (start_col, start_row) {
1082                    (Some(col), Some(row)) => format!("{}{}", Self::number_to_column(*col), row),
1083                    (Some(col), None) => Self::number_to_column(*col),
1084                    (None, Some(row)) => row.to_string(),
1085                    (None, None) => "".to_string(), // Should not happen in normal usage
1086                };
1087
1088                // Format end reference
1089                let end_ref = match (end_col, end_row) {
1090                    (Some(col), Some(row)) => format!("{}{}", Self::number_to_column(*col), row),
1091                    (Some(col), None) => Self::number_to_column(*col),
1092                    (None, Some(row)) => row.to_string(),
1093                    (None, None) => "".to_string(), // Should not happen in normal usage
1094                };
1095
1096                let range_part = format!("{start_ref}:{end_ref}");
1097
1098                if let Some(s) = sheet {
1099                    if sheet_name_needs_quoting(s) {
1100                        let escaped_name = s.replace('\'', "''");
1101                        format!("'{escaped_name}'!{range_part}")
1102                    } else {
1103                        format!("{s}!{range_part}")
1104                    }
1105                } else {
1106                    range_part
1107                }
1108            }
1109            ReferenceType::Table(table_ref) => {
1110                if let Some(specifier) = &table_ref.specifier {
1111                    format!("{}[{}]", table_ref.name, specifier)
1112                } else {
1113                    table_ref.name.clone()
1114                }
1115            }
1116            ReferenceType::NamedRange(name) => name.clone(),
1117        }
1118    }
1119}
1120
1121/// The different types of AST nodes.
1122#[derive(Debug, Clone, PartialEq, Hash)]
1123pub enum ASTNodeType {
1124    Literal(LiteralValue),
1125    Reference {
1126        original: String, // Original reference string (preserved for display/debugging)
1127        reference: ReferenceType, // Parsed reference
1128    },
1129    UnaryOp {
1130        op: String,
1131        expr: Box<ASTNode>,
1132    },
1133    BinaryOp {
1134        op: String,
1135        left: Box<ASTNode>,
1136        right: Box<ASTNode>,
1137    },
1138    Function {
1139        name: String,
1140        args: Vec<ASTNode>, // Most functions have <= 4 args
1141    },
1142    Array(Vec<Vec<ASTNode>>), // Most arrays are small
1143}
1144
1145impl Display for ASTNodeType {
1146    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1147        match self {
1148            ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
1149            ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
1150            ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
1151            ASTNodeType::BinaryOp { op, left, right } => {
1152                write!(f, "BinaryOp({op}, {left}, {right})")
1153            }
1154            ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
1155            ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
1156        }
1157    }
1158}
1159
1160/// An AST node represents a parsed formula element
1161#[derive(Debug, Clone, PartialEq)]
1162pub struct ASTNode {
1163    pub node_type: ASTNodeType,
1164    pub source_token: Option<Token>,
1165    /// True if this AST contains any volatile function calls.
1166    ///
1167    /// This is set by the parser when a volatility classifier is provided.
1168    /// For ASTs constructed manually (e.g., in tests), this defaults to false.
1169    pub contains_volatile: bool,
1170}
1171
1172impl ASTNode {
1173    pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1174        ASTNode {
1175            node_type,
1176            source_token,
1177            contains_volatile: false,
1178        }
1179    }
1180
1181    /// Create an ASTNode while explicitly setting contains_volatile.
1182    pub fn new_with_volatile(
1183        node_type: ASTNodeType,
1184        source_token: Option<Token>,
1185        contains_volatile: bool,
1186    ) -> Self {
1187        ASTNode {
1188            node_type,
1189            source_token,
1190            contains_volatile,
1191        }
1192    }
1193
1194    /// Whether this AST contains any volatile functions.
1195    pub fn contains_volatile(&self) -> bool {
1196        self.contains_volatile
1197    }
1198
1199    pub fn fingerprint(&self) -> u64 {
1200        self.calculate_hash()
1201    }
1202
1203    /// Calculate a hash for this ASTNode
1204    pub fn calculate_hash(&self) -> u64 {
1205        let mut hasher = FormulaHasher::new();
1206        self.hash_node(&mut hasher);
1207        hasher.finish()
1208    }
1209
1210    fn hash_node(&self, hasher: &mut FormulaHasher) {
1211        match &self.node_type {
1212            ASTNodeType::Literal(value) => {
1213                hasher.write(&[1]); // Discriminant for Literal
1214                value.hash(hasher);
1215            }
1216            ASTNodeType::Reference { reference, .. } => {
1217                hasher.write(&[2]); // Discriminant for Reference
1218                reference.hash(hasher);
1219            }
1220            ASTNodeType::UnaryOp { op, expr } => {
1221                hasher.write(&[3]); // Discriminant for UnaryOp
1222                hasher.write(op.as_bytes());
1223                expr.hash_node(hasher);
1224            }
1225            ASTNodeType::BinaryOp { op, left, right } => {
1226                hasher.write(&[4]); // Discriminant for BinaryOp
1227                hasher.write(op.as_bytes());
1228                left.hash_node(hasher);
1229                right.hash_node(hasher);
1230            }
1231            ASTNodeType::Function { name, args } => {
1232                hasher.write(&[5]); // Discriminant for Function
1233                // Use lowercase function name to be case-insensitive
1234                let name_lower = name.to_lowercase();
1235                hasher.write(name_lower.as_bytes());
1236                hasher.write_usize(args.len());
1237                for arg in args {
1238                    arg.hash_node(hasher);
1239                }
1240            }
1241            ASTNodeType::Array(rows) => {
1242                hasher.write(&[6]); // Discriminant for Array
1243                hasher.write_usize(rows.len());
1244                for row in rows {
1245                    hasher.write_usize(row.len());
1246                    for item in row {
1247                        item.hash_node(hasher);
1248                    }
1249                }
1250            }
1251        }
1252    }
1253
1254    pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1255        let mut dependencies = Vec::new();
1256        self.collect_dependencies(&mut dependencies);
1257        dependencies
1258    }
1259
1260    pub fn get_dependency_strings(&self) -> Vec<String> {
1261        self.get_dependencies()
1262            .into_iter()
1263            .map(|dep| format!("{dep}"))
1264            .collect()
1265    }
1266
1267    fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1268        match &self.node_type {
1269            ASTNodeType::Reference { reference, .. } => {
1270                dependencies.push(reference);
1271            }
1272            ASTNodeType::UnaryOp { expr, .. } => {
1273                expr.collect_dependencies(dependencies);
1274            }
1275            ASTNodeType::BinaryOp { left, right, .. } => {
1276                left.collect_dependencies(dependencies);
1277                right.collect_dependencies(dependencies);
1278            }
1279            ASTNodeType::Function { args, .. } => {
1280                for arg in args {
1281                    arg.collect_dependencies(dependencies);
1282                }
1283            }
1284            ASTNodeType::Array(rows) => {
1285                for row in rows {
1286                    for item in row {
1287                        item.collect_dependencies(dependencies);
1288                    }
1289                }
1290            }
1291            _ => {}
1292        }
1293    }
1294
1295    /// Lightweight borrowed view of a reference encountered during AST traversal.
1296    /// This mirrors ReferenceType variants but borrows sheet/name strings to avoid allocation.
1297    pub fn refs(&self) -> RefIter<'_> {
1298        RefIter {
1299            stack: smallvec::smallvec![self],
1300        }
1301    }
1302
1303    /// Visit all references in this AST without allocating intermediates.
1304    pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1305        let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1306        stack.push(self);
1307        while let Some(node) = stack.pop() {
1308            match &node.node_type {
1309                ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1310                ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1311                ASTNodeType::BinaryOp { left, right, .. } => {
1312                    // Push right first so left is visited first (stable-ish order)
1313                    stack.push(right);
1314                    stack.push(left);
1315                }
1316                ASTNodeType::Function { args, .. } => {
1317                    for a in args.iter().rev() {
1318                        stack.push(a);
1319                    }
1320                }
1321                ASTNodeType::Array(rows) => {
1322                    for r in rows.iter().rev() {
1323                        for item in r.iter().rev() {
1324                            stack.push(item);
1325                        }
1326                    }
1327                }
1328                ASTNodeType::Literal(_) => {}
1329            }
1330        }
1331    }
1332
1333    /// Convenience: collect references into a small, inline vector based on a policy.
1334    pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1335        let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1336        self.visit_refs(|rv| match rv {
1337            RefView::Cell { sheet, row, col } => out.push(ReferenceType::Cell {
1338                sheet: sheet.map(|s| s.to_string()),
1339                row,
1340                col,
1341            }),
1342            RefView::Range {
1343                sheet,
1344                start_row,
1345                start_col,
1346                end_row,
1347                end_col,
1348            } => {
1349                // Optionally expand very small finite ranges into individual cells
1350                if policy.expand_small_ranges
1351                    && let (Some(sr), Some(sc), Some(er), Some(ec)) =
1352                        (start_row, start_col, end_row, end_col)
1353                {
1354                    let rows = er.saturating_sub(sr) + 1;
1355                    let cols = ec.saturating_sub(sc) + 1;
1356                    let area = rows.saturating_mul(cols);
1357                    if area as usize <= policy.range_expansion_limit {
1358                        for r in sr..=er {
1359                            for c in sc..=ec {
1360                                out.push(ReferenceType::Cell {
1361                                    sheet: sheet.map(|s| s.to_string()),
1362                                    row: r,
1363                                    col: c,
1364                                });
1365                            }
1366                        }
1367                        return; // handled
1368                    }
1369                }
1370                out.push(ReferenceType::Range {
1371                    sheet: sheet.map(|s| s.to_string()),
1372                    start_row,
1373                    start_col,
1374                    end_row,
1375                    end_col,
1376                });
1377            }
1378            RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
1379                name: name.to_string(),
1380                specifier: specifier.cloned(),
1381            })),
1382            RefView::NamedRange { name } => {
1383                if policy.include_names {
1384                    out.push(ReferenceType::NamedRange(name.to_string()));
1385                }
1386            }
1387        });
1388        out
1389    }
1390}
1391
1392/// A borrowing view over a ReferenceType. Avoids cloning sheet/names while walking.
1393#[derive(Clone, Copy, Debug)]
1394pub enum RefView<'a> {
1395    Cell {
1396        sheet: Option<&'a str>,
1397        row: u32,
1398        col: u32,
1399    },
1400    Range {
1401        sheet: Option<&'a str>,
1402        start_row: Option<u32>,
1403        start_col: Option<u32>,
1404        end_row: Option<u32>,
1405        end_col: Option<u32>,
1406    },
1407    Table {
1408        name: &'a str,
1409        specifier: Option<&'a TableSpecifier>,
1410    },
1411    NamedRange {
1412        name: &'a str,
1413    },
1414}
1415
1416impl<'a> From<&'a ReferenceType> for RefView<'a> {
1417    fn from(r: &'a ReferenceType) -> Self {
1418        match r {
1419            ReferenceType::Cell { sheet, row, col } => RefView::Cell {
1420                sheet: sheet.as_deref(),
1421                row: *row,
1422                col: *col,
1423            },
1424            ReferenceType::Range {
1425                sheet,
1426                start_row,
1427                start_col,
1428                end_row,
1429                end_col,
1430            } => RefView::Range {
1431                sheet: sheet.as_deref(),
1432                start_row: *start_row,
1433                start_col: *start_col,
1434                end_row: *end_row,
1435                end_col: *end_col,
1436            },
1437            ReferenceType::Table(tr) => RefView::Table {
1438                name: tr.name.as_str(),
1439                specifier: tr.specifier.as_ref(),
1440            },
1441            ReferenceType::NamedRange(name) => RefView::NamedRange { name },
1442        }
1443    }
1444}
1445
1446/// Iterator over RefView for an AST, implemented via an explicit stack to avoid recursion allocation.
1447pub struct RefIter<'a> {
1448    stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
1449}
1450
1451impl<'a> Iterator for RefIter<'a> {
1452    type Item = RefView<'a>;
1453    fn next(&mut self) -> Option<Self::Item> {
1454        while let Some(node) = self.stack.pop() {
1455            match &node.node_type {
1456                ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
1457                ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
1458                ASTNodeType::BinaryOp { left, right, .. } => {
1459                    self.stack.push(right);
1460                    self.stack.push(left);
1461                }
1462                ASTNodeType::Function { args, .. } => {
1463                    for a in args.iter().rev() {
1464                        self.stack.push(a);
1465                    }
1466                }
1467                ASTNodeType::Array(rows) => {
1468                    for r in rows.iter().rev() {
1469                        for item in r.iter().rev() {
1470                            self.stack.push(item);
1471                        }
1472                    }
1473                }
1474                ASTNodeType::Literal(_) => {}
1475            }
1476        }
1477        None
1478    }
1479}
1480
1481/// Policy controlling how references are collected.
1482#[derive(Debug, Clone)]
1483pub struct CollectPolicy {
1484    pub expand_small_ranges: bool,
1485    pub range_expansion_limit: usize,
1486    pub include_names: bool,
1487}
1488
1489impl Default for CollectPolicy {
1490    fn default() -> Self {
1491        Self {
1492            expand_small_ranges: false,
1493            range_expansion_limit: 0,
1494            include_names: true,
1495        }
1496    }
1497}
1498
1499impl Display for ASTNode {
1500    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1501        write!(f, "{}", self.node_type)
1502    }
1503}
1504
1505impl std::hash::Hash for ASTNode {
1506    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1507        let hash = self.calculate_hash();
1508        state.write_u64(hash);
1509    }
1510}
1511
1512/// A parser for converting tokens into an AST.
1513pub struct Parser {
1514    tokens: Arc<[Token]>,
1515    position: usize,
1516    /// Optional classifier to determine whether a function name is volatile.
1517    volatility_classifier: Option<VolatilityClassifierBox>,
1518    dialect: FormulaDialect,
1519}
1520
1521impl<T> From<T> for Parser
1522where
1523    T: AsRef<str>,
1524{
1525    fn from(formula: T) -> Self {
1526        let tokens = Tokenizer::new(formula.as_ref()).unwrap().items;
1527        Self::new(tokens, false)
1528    }
1529}
1530
1531impl Parser {
1532    pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
1533        Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
1534    }
1535
1536    pub fn new_with_dialect(
1537        mut tokens: Vec<Token>,
1538        include_whitespace: bool,
1539        dialect: FormulaDialect,
1540    ) -> Self {
1541        if !include_whitespace {
1542            tokens.retain(|t| t.token_type != TokenType::Whitespace);
1543        }
1544
1545        Parser {
1546            tokens: Arc::from(tokens.into_boxed_slice()),
1547            position: 0,
1548            volatility_classifier: None,
1549            dialect,
1550        }
1551    }
1552
1553    /// Provide a function-volatility classifier for this parser.
1554    /// If set, the parser will annotate ASTs with a contains_volatile bit.
1555    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1556    where
1557        F: Fn(&str) -> bool + Send + Sync + 'static,
1558    {
1559        self.volatility_classifier = Some(Box::new(f));
1560        self
1561    }
1562
1563    /// Convenience constructor to set a classifier alongside other options.
1564    pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
1565    where
1566        F: Fn(&str) -> bool + Send + Sync + 'static,
1567    {
1568        Self::new(tokens, include_whitespace).with_volatility_classifier(f)
1569    }
1570
1571    pub fn new_with_classifier_and_dialect<F>(
1572        tokens: Vec<Token>,
1573        include_whitespace: bool,
1574        dialect: FormulaDialect,
1575        f: F,
1576    ) -> Self
1577    where
1578        F: Fn(&str) -> bool + Send + Sync + 'static,
1579    {
1580        Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
1581    }
1582
1583    fn skip_whitespace(&mut self) {
1584        while self.position < self.tokens.len()
1585            && self.tokens[self.position].token_type == TokenType::Whitespace
1586        {
1587            self.position += 1;
1588        }
1589    }
1590
1591    /// Parse the tokens into an AST.
1592    pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
1593        if self.tokens.is_empty() {
1594            return Err(ParserError {
1595                message: "No tokens to parse".to_string(),
1596                position: None,
1597            });
1598        }
1599
1600        self.skip_whitespace();
1601        if self.position >= self.tokens.len() {
1602            return Err(ParserError {
1603                message: "No tokens to parse".to_string(),
1604                position: None,
1605            });
1606        }
1607
1608        // Check for literal formula (doesn't start with '=')
1609        if self.tokens[self.position].token_type == TokenType::Literal {
1610            let token = self.tokens[self.position].clone();
1611            self.position += 1;
1612            self.skip_whitespace();
1613            if self.position < self.tokens.len() {
1614                return Err(ParserError {
1615                    message: format!(
1616                        "Unexpected token at position {}: {:?}",
1617                        self.position, self.tokens[self.position]
1618                    ),
1619                    position: Some(self.position),
1620                });
1621            }
1622            return Ok(ASTNode::new(
1623                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
1624                Some(token),
1625            ));
1626        }
1627
1628        let ast = self.parse_expression()?;
1629        self.skip_whitespace();
1630        if self.position < self.tokens.len() {
1631            return Err(ParserError {
1632                message: format!(
1633                    "Unexpected token at position {}: {:?}",
1634                    self.position, self.tokens[self.position]
1635                ),
1636                position: Some(self.position),
1637            });
1638        }
1639        Ok(ast)
1640    }
1641
1642    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
1643        self.parse_bp(0)
1644    }
1645
1646    // Pratt-style precedence parser. `min_precedence` is the minimum binding power
1647    // an operator must have to be consumed at this level.
1648    fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
1649        let mut left = self.parse_prefix()?;
1650
1651        loop {
1652            self.skip_whitespace();
1653            if self.position >= self.tokens.len() {
1654                break;
1655            }
1656
1657            // Postfix operators (e.g. percent).
1658            if self.tokens[self.position].token_type == TokenType::OpPostfix {
1659                let (precedence, _) = self.tokens[self.position]
1660                    .get_precedence()
1661                    .unwrap_or((0, Associativity::Left));
1662                if precedence < min_precedence {
1663                    break;
1664                }
1665
1666                let op_token = self.tokens[self.position].clone();
1667                self.position += 1;
1668                let contains_volatile = left.contains_volatile;
1669                left = ASTNode::new_with_volatile(
1670                    ASTNodeType::UnaryOp {
1671                        op: op_token.value.clone(),
1672                        expr: Box::new(left),
1673                    },
1674                    Some(op_token),
1675                    contains_volatile,
1676                );
1677                continue;
1678            }
1679
1680            let token = &self.tokens[self.position];
1681            if token.token_type != TokenType::OpInfix {
1682                break;
1683            }
1684
1685            let (precedence, associativity) =
1686                token.get_precedence().unwrap_or((0, Associativity::Left));
1687            if precedence < min_precedence {
1688                break;
1689            }
1690
1691            let op_token = self.tokens[self.position].clone();
1692            self.position += 1;
1693
1694            let next_min_precedence = if associativity == Associativity::Left {
1695                precedence + 1
1696            } else {
1697                precedence
1698            };
1699
1700            let right = self.parse_bp(next_min_precedence)?;
1701            let contains_volatile = left.contains_volatile || right.contains_volatile;
1702            left = ASTNode::new_with_volatile(
1703                ASTNodeType::BinaryOp {
1704                    op: op_token.value.clone(),
1705                    left: Box::new(left),
1706                    right: Box::new(right),
1707                },
1708                Some(op_token),
1709                contains_volatile,
1710            );
1711        }
1712
1713        Ok(left)
1714    }
1715
1716    fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
1717        self.skip_whitespace();
1718        if self.position < self.tokens.len()
1719            && self.tokens[self.position].token_type == TokenType::OpPrefix
1720        {
1721            let op_token = self.tokens[self.position].clone();
1722            self.position += 1;
1723
1724            // Prefix unary binds looser than exponent, so parse the RHS with
1725            // min_precedence equal to unary's precedence.
1726            let (precedence, _) = op_token
1727                .get_precedence()
1728                .unwrap_or((0, Associativity::Right));
1729
1730            let expr = self.parse_bp(precedence)?;
1731            let contains_volatile = expr.contains_volatile;
1732            return Ok(ASTNode::new_with_volatile(
1733                ASTNodeType::UnaryOp {
1734                    op: op_token.value.clone(),
1735                    expr: Box::new(expr),
1736                },
1737                Some(op_token),
1738                contains_volatile,
1739            ));
1740        }
1741
1742        self.parse_primary()
1743    }
1744
1745    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
1746        self.skip_whitespace();
1747        if self.position >= self.tokens.len() {
1748            return Err(ParserError {
1749                message: "Unexpected end of tokens".to_string(),
1750                position: Some(self.position),
1751            });
1752        }
1753
1754        let token = &self.tokens[self.position];
1755        match token.token_type {
1756            TokenType::Operand => {
1757                let operand_token = self.tokens[self.position].clone();
1758                self.position += 1;
1759                self.parse_operand(operand_token)
1760            }
1761            TokenType::Func => {
1762                let func_token = self.tokens[self.position].clone();
1763                self.position += 1;
1764                self.parse_function(func_token)
1765            }
1766            TokenType::Paren if token.subtype == TokenSubType::Open => {
1767                self.position += 1;
1768                let expr = self.parse_expression()?;
1769                if self.position >= self.tokens.len()
1770                    || self.tokens[self.position].token_type != TokenType::Paren
1771                    || self.tokens[self.position].subtype != TokenSubType::Close
1772                {
1773                    return Err(ParserError {
1774                        message: "Expected closing parenthesis".to_string(),
1775                        position: Some(self.position),
1776                    });
1777                }
1778                self.position += 1;
1779                Ok(expr)
1780            }
1781            TokenType::Array if token.subtype == TokenSubType::Open => {
1782                self.position += 1;
1783                self.parse_array()
1784            }
1785            _ => Err(ParserError {
1786                message: format!("Unexpected token: {token:?}"),
1787                position: Some(self.position),
1788            }),
1789        }
1790    }
1791
1792    fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
1793        match token.subtype {
1794            TokenSubType::Number => {
1795                let value = token.value.parse::<f64>().map_err(|_| ParserError {
1796                    message: format!("Invalid number: {}", token.value),
1797                    position: Some(self.position),
1798                })?;
1799                Ok(ASTNode::new(
1800                    ASTNodeType::Literal(LiteralValue::Number(value)),
1801                    Some(token),
1802                ))
1803            }
1804            TokenSubType::Text => {
1805                // Strip surrounding quotes from text literals
1806                let mut text = token.value.clone();
1807                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
1808                    text = text[1..text.len() - 1].to_string();
1809                    // Handle escaped quotes
1810                    text = text.replace("\"\"", "\"");
1811                }
1812                Ok(ASTNode::new(
1813                    ASTNodeType::Literal(LiteralValue::Text(text)),
1814                    Some(token),
1815                ))
1816            }
1817            TokenSubType::Logical => {
1818                let value = token.value.to_uppercase() == "TRUE";
1819                Ok(ASTNode::new(
1820                    ASTNodeType::Literal(LiteralValue::Boolean(value)),
1821                    Some(token),
1822                ))
1823            }
1824            TokenSubType::Error => {
1825                let error = ExcelError::from_error_string(&token.value);
1826                Ok(ASTNode::new(
1827                    ASTNodeType::Literal(LiteralValue::Error(error)),
1828                    Some(token),
1829                ))
1830            }
1831            TokenSubType::Range => {
1832                let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
1833                    .map_err(|e| ParserError {
1834                        message: format!("Invalid reference '{}': {}", token.value, e),
1835                        position: Some(self.position),
1836                    })?;
1837                Ok(ASTNode::new(
1838                    ASTNodeType::Reference {
1839                        original: token.value.clone(),
1840                        reference,
1841                    },
1842                    Some(token),
1843                ))
1844            }
1845            _ => Err(ParserError {
1846                message: format!("Unexpected operand subtype: {:?}", token.subtype),
1847                position: Some(self.position),
1848            }),
1849        }
1850    }
1851
1852    fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
1853        let name = func_token.value[..func_token.value.len() - 1].to_string();
1854        let args = self.parse_function_arguments()?;
1855        // Determine volatility for this function
1856        let this_is_volatile = self
1857            .volatility_classifier
1858            .as_ref()
1859            .map(|f| f(name.as_str()))
1860            .unwrap_or(false);
1861        let args_volatile = args.iter().any(|a| a.contains_volatile);
1862
1863        Ok(ASTNode::new_with_volatile(
1864            ASTNodeType::Function { name, args },
1865            Some(func_token),
1866            this_is_volatile || args_volatile,
1867        ))
1868    }
1869
1870    /// Parse function arguments.
1871    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
1872        let mut args = Vec::new();
1873
1874        // Check for closing parenthesis (empty arguments)
1875        if self.position < self.tokens.len()
1876            && self.tokens[self.position].token_type == TokenType::Func
1877            && self.tokens[self.position].subtype == TokenSubType::Close
1878        {
1879            self.position += 1;
1880            return Ok(args);
1881        }
1882
1883        // Handle optional arguments (consecutive separators)
1884        // Check if we start with a separator (empty first argument)
1885        if self.position < self.tokens.len()
1886            && self.tokens[self.position].token_type == TokenType::Sep
1887            && self.tokens[self.position].subtype == TokenSubType::Arg
1888        {
1889            // Empty first argument - represented as empty text literal for compatibility
1890            args.push(ASTNode::new(
1891                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1892                None,
1893            ));
1894            self.position += 1;
1895        } else {
1896            // Parse first argument
1897            args.push(self.parse_expression()?);
1898        }
1899
1900        // Parse remaining arguments
1901        while self.position < self.tokens.len() {
1902            let token = &self.tokens[self.position];
1903
1904            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
1905                self.position += 1;
1906                // Check for consecutive separators (empty argument)
1907                if self.position < self.tokens.len() {
1908                    let next_token = &self.tokens[self.position];
1909                    if next_token.token_type == TokenType::Sep
1910                        && next_token.subtype == TokenSubType::Arg
1911                    {
1912                        // Empty argument - represented as empty text literal for compatibility
1913                        args.push(ASTNode::new(
1914                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1915                            None,
1916                        ));
1917                    } else if next_token.token_type == TokenType::Func
1918                        && next_token.subtype == TokenSubType::Close
1919                    {
1920                        // Empty last argument
1921                        args.push(ASTNode::new(
1922                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1923                            None,
1924                        ));
1925                        self.position += 1;
1926                        break;
1927                    } else {
1928                        args.push(self.parse_expression()?);
1929                    }
1930                } else {
1931                    // Trailing separator at end of formula
1932                    args.push(ASTNode::new(
1933                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1934                        None,
1935                    ));
1936                }
1937            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
1938                self.position += 1;
1939                break;
1940            } else {
1941                return Err(ParserError {
1942                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
1943                    position: Some(self.position),
1944                });
1945            }
1946        }
1947
1948        Ok(args)
1949    }
1950
1951    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
1952        let mut rows = Vec::new();
1953        let mut current_row = Vec::new();
1954
1955        // Check for empty array
1956        if self.position < self.tokens.len()
1957            && self.tokens[self.position].token_type == TokenType::Array
1958            && self.tokens[self.position].subtype == TokenSubType::Close
1959        {
1960            self.position += 1;
1961            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
1962        }
1963
1964        // Parse first element
1965        current_row.push(self.parse_expression()?);
1966
1967        while self.position < self.tokens.len() {
1968            let token = &self.tokens[self.position];
1969
1970            if token.token_type == TokenType::Sep {
1971                if token.subtype == TokenSubType::Arg {
1972                    // Column separator
1973                    self.position += 1;
1974                    current_row.push(self.parse_expression()?);
1975                } else if token.subtype == TokenSubType::Row {
1976                    // Row separator
1977                    self.position += 1;
1978                    rows.push(current_row);
1979                    current_row = vec![self.parse_expression()?];
1980                }
1981            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
1982                self.position += 1;
1983                rows.push(current_row);
1984                break;
1985            } else {
1986                return Err(ParserError {
1987                    message: format!("Unexpected token in array: {token:?}"),
1988                    position: Some(self.position),
1989                });
1990            }
1991        }
1992
1993        // Array volatility is the OR of element volatility
1994        let contains_volatile = rows
1995            .iter()
1996            .flat_map(|r| r.iter())
1997            .any(|n| n.contains_volatile);
1998        Ok(ASTNode::new_with_volatile(
1999            ASTNodeType::Array(rows),
2000            None,
2001            contains_volatile,
2002        ))
2003    }
2004}
2005
2006impl From<TokenizerError> for ParserError {
2007    fn from(err: TokenizerError) -> Self {
2008        ParserError {
2009            message: err.message,
2010            position: Some(err.pos),
2011        }
2012    }
2013}
2014
2015struct SpanParser<'a> {
2016    source: &'a str,
2017    tokens: &'a [crate::tokenizer::TokenSpan],
2018    position: usize,
2019    volatility_classifier: Option<VolatilityClassifierBox>,
2020    dialect: FormulaDialect,
2021}
2022
2023impl<'a> SpanParser<'a> {
2024    fn new(
2025        source: &'a str,
2026        tokens: &'a [crate::tokenizer::TokenSpan],
2027        dialect: FormulaDialect,
2028    ) -> Self {
2029        SpanParser {
2030            source,
2031            tokens,
2032            position: 0,
2033            volatility_classifier: None,
2034            dialect,
2035        }
2036    }
2037
2038    fn with_volatility_classifier<F>(mut self, f: F) -> Self
2039    where
2040        F: Fn(&str) -> bool + Send + Sync + 'static,
2041    {
2042        self.volatility_classifier = Some(Box::new(f));
2043        self
2044    }
2045
2046    fn skip_whitespace(&mut self) {
2047        while self.position < self.tokens.len()
2048            && self.tokens[self.position].token_type == TokenType::Whitespace
2049        {
2050            self.position += 1;
2051        }
2052    }
2053
2054    fn span_value(&self, span: &crate::tokenizer::TokenSpan) -> &str {
2055        &self.source[span.start..span.end]
2056    }
2057
2058    fn span_to_token(&self, span: &crate::tokenizer::TokenSpan) -> Token {
2059        Token::new_with_span(
2060            self.span_value(span).to_string(),
2061            span.token_type,
2062            span.subtype,
2063            span.start,
2064            span.end,
2065        )
2066    }
2067
2068    fn span_precedence(&self, span: &crate::tokenizer::TokenSpan) -> Option<(u8, Associativity)> {
2069        if !matches!(
2070            span.token_type,
2071            TokenType::OpPrefix | TokenType::OpInfix | TokenType::OpPostfix
2072        ) {
2073            return None;
2074        }
2075
2076        let op = if span.token_type == TokenType::OpPrefix {
2077            "u"
2078        } else {
2079            self.span_value(span)
2080        };
2081
2082        match op {
2083            ":" | " " | "," => Some((8, Associativity::Left)),
2084            "%" => Some((7, Associativity::Left)),
2085            "^" => Some((6, Associativity::Right)),
2086            "u" => Some((5, Associativity::Right)),
2087            "*" | "/" => Some((4, Associativity::Left)),
2088            "+" | "-" => Some((3, Associativity::Left)),
2089            "&" => Some((2, Associativity::Left)),
2090            "=" | "<" | ">" | "<=" | ">=" | "<>" => Some((1, Associativity::Left)),
2091            _ => None,
2092        }
2093    }
2094
2095    fn parse(&mut self) -> Result<ASTNode, ParserError> {
2096        if self.tokens.is_empty() {
2097            return Err(ParserError {
2098                message: "No tokens to parse".to_string(),
2099                position: None,
2100            });
2101        }
2102
2103        self.skip_whitespace();
2104        if self.position >= self.tokens.len() {
2105            return Err(ParserError {
2106                message: "No tokens to parse".to_string(),
2107                position: None,
2108            });
2109        }
2110
2111        if self.tokens[self.position].token_type == TokenType::Literal {
2112            let span = self.tokens[self.position];
2113            self.position += 1;
2114            self.skip_whitespace();
2115            if self.position < self.tokens.len() {
2116                return Err(ParserError {
2117                    message: format!(
2118                        "Unexpected token at position {}: {:?}",
2119                        self.position, self.tokens[self.position]
2120                    ),
2121                    position: Some(self.position),
2122                });
2123            }
2124
2125            let token = self.span_to_token(&span);
2126            return Ok(ASTNode::new(
2127                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2128                Some(token),
2129            ));
2130        }
2131
2132        let ast = self.parse_expression()?;
2133        self.skip_whitespace();
2134        if self.position < self.tokens.len() {
2135            return Err(ParserError {
2136                message: format!(
2137                    "Unexpected token at position {}: {:?}",
2138                    self.position, self.tokens[self.position]
2139                ),
2140                position: Some(self.position),
2141            });
2142        }
2143        Ok(ast)
2144    }
2145
2146    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2147        self.parse_bp(0)
2148    }
2149
2150    fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2151        let mut left = self.parse_prefix()?;
2152
2153        loop {
2154            self.skip_whitespace();
2155            if self.position >= self.tokens.len() {
2156                break;
2157            }
2158
2159            if self.tokens[self.position].token_type == TokenType::OpPostfix {
2160                let (precedence, _) = self
2161                    .span_precedence(&self.tokens[self.position])
2162                    .unwrap_or((0, Associativity::Left));
2163                if precedence < min_precedence {
2164                    break;
2165                }
2166
2167                let op_span = self.tokens[self.position];
2168                self.position += 1;
2169                let op_token = self.span_to_token(&op_span);
2170                let contains_volatile = left.contains_volatile;
2171                left = ASTNode::new_with_volatile(
2172                    ASTNodeType::UnaryOp {
2173                        op: op_token.value.clone(),
2174                        expr: Box::new(left),
2175                    },
2176                    Some(op_token),
2177                    contains_volatile,
2178                );
2179                continue;
2180            }
2181
2182            let token = &self.tokens[self.position];
2183            if token.token_type != TokenType::OpInfix {
2184                break;
2185            }
2186
2187            let (precedence, associativity) = self
2188                .span_precedence(token)
2189                .unwrap_or((0, Associativity::Left));
2190            if precedence < min_precedence {
2191                break;
2192            }
2193
2194            let op_span = self.tokens[self.position];
2195            self.position += 1;
2196
2197            let next_min_precedence = if associativity == Associativity::Left {
2198                precedence + 1
2199            } else {
2200                precedence
2201            };
2202
2203            let right = self.parse_bp(next_min_precedence)?;
2204            let op_token = self.span_to_token(&op_span);
2205            let contains_volatile = left.contains_volatile || right.contains_volatile;
2206            left = ASTNode::new_with_volatile(
2207                ASTNodeType::BinaryOp {
2208                    op: op_token.value.clone(),
2209                    left: Box::new(left),
2210                    right: Box::new(right),
2211                },
2212                Some(op_token),
2213                contains_volatile,
2214            );
2215        }
2216
2217        Ok(left)
2218    }
2219
2220    fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2221        self.skip_whitespace();
2222        if self.position < self.tokens.len()
2223            && self.tokens[self.position].token_type == TokenType::OpPrefix
2224        {
2225            let op_span = self.tokens[self.position];
2226            self.position += 1;
2227
2228            let (precedence, _) = self
2229                .span_precedence(&op_span)
2230                .unwrap_or((0, Associativity::Right));
2231
2232            let expr = self.parse_bp(precedence)?;
2233            let op_token = self.span_to_token(&op_span);
2234            let contains_volatile = expr.contains_volatile;
2235            return Ok(ASTNode::new_with_volatile(
2236                ASTNodeType::UnaryOp {
2237                    op: op_token.value.clone(),
2238                    expr: Box::new(expr),
2239                },
2240                Some(op_token),
2241                contains_volatile,
2242            ));
2243        }
2244
2245        self.parse_primary()
2246    }
2247
2248    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2249        self.skip_whitespace();
2250        if self.position >= self.tokens.len() {
2251            return Err(ParserError {
2252                message: "Unexpected end of tokens".to_string(),
2253                position: Some(self.position),
2254            });
2255        }
2256
2257        let token = &self.tokens[self.position];
2258        match token.token_type {
2259            TokenType::Operand => {
2260                let span = self.tokens[self.position];
2261                self.position += 1;
2262                self.parse_operand(span)
2263            }
2264            TokenType::Func => {
2265                let span = self.tokens[self.position];
2266                self.position += 1;
2267                self.parse_function(span)
2268            }
2269            TokenType::Paren if token.subtype == TokenSubType::Open => {
2270                self.position += 1;
2271                let expr = self.parse_expression()?;
2272                self.skip_whitespace();
2273                if self.position >= self.tokens.len()
2274                    || self.tokens[self.position].token_type != TokenType::Paren
2275                    || self.tokens[self.position].subtype != TokenSubType::Close
2276                {
2277                    return Err(ParserError {
2278                        message: "Expected closing parenthesis".to_string(),
2279                        position: Some(self.position),
2280                    });
2281                }
2282                self.position += 1;
2283                Ok(expr)
2284            }
2285            TokenType::Array if token.subtype == TokenSubType::Open => {
2286                self.position += 1;
2287                self.parse_array()
2288            }
2289            _ => Err(ParserError {
2290                message: format!("Unexpected token: {token:?}"),
2291                position: Some(self.position),
2292            }),
2293        }
2294    }
2295
2296    fn parse_operand(&mut self, span: crate::tokenizer::TokenSpan) -> Result<ASTNode, ParserError> {
2297        let value = self.span_value(&span);
2298        let token = self.span_to_token(&span);
2299
2300        match span.subtype {
2301            TokenSubType::Number => {
2302                let value = value.parse::<f64>().map_err(|_| ParserError {
2303                    message: format!("Invalid number: {value}"),
2304                    position: Some(self.position),
2305                })?;
2306                Ok(ASTNode::new(
2307                    ASTNodeType::Literal(LiteralValue::Number(value)),
2308                    Some(token),
2309                ))
2310            }
2311            TokenSubType::Text => {
2312                let mut text = value.to_string();
2313                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2314                    text = text[1..text.len() - 1].to_string();
2315                    text = text.replace("\"\"", "\"");
2316                }
2317                Ok(ASTNode::new(
2318                    ASTNodeType::Literal(LiteralValue::Text(text)),
2319                    Some(token),
2320                ))
2321            }
2322            TokenSubType::Logical => {
2323                let v = value.to_uppercase() == "TRUE";
2324                Ok(ASTNode::new(
2325                    ASTNodeType::Literal(LiteralValue::Boolean(v)),
2326                    Some(token),
2327                ))
2328            }
2329            TokenSubType::Error => {
2330                let error = ExcelError::from_error_string(value);
2331                Ok(ASTNode::new(
2332                    ASTNodeType::Literal(LiteralValue::Error(error)),
2333                    Some(token),
2334                ))
2335            }
2336            TokenSubType::Range => {
2337                let reference = ReferenceType::from_string_with_dialect(value, self.dialect)
2338                    .map_err(|e| ParserError {
2339                        message: format!("Invalid reference '{value}': {e}"),
2340                        position: Some(self.position),
2341                    })?;
2342                Ok(ASTNode::new(
2343                    ASTNodeType::Reference {
2344                        original: value.to_string(),
2345                        reference,
2346                    },
2347                    Some(token),
2348                ))
2349            }
2350            _ => Err(ParserError {
2351                message: format!("Unexpected operand subtype: {:?}", span.subtype),
2352                position: Some(self.position),
2353            }),
2354        }
2355    }
2356
2357    fn parse_function(
2358        &mut self,
2359        func_span: crate::tokenizer::TokenSpan,
2360    ) -> Result<ASTNode, ParserError> {
2361        let func_value = self.span_value(&func_span);
2362        if func_value.is_empty() {
2363            return Err(ParserError {
2364                message: "Invalid function token".to_string(),
2365                position: Some(self.position),
2366            });
2367        }
2368        let name = func_value[..func_value.len() - 1].to_string();
2369        let args = self.parse_function_arguments()?;
2370
2371        let this_is_volatile = self
2372            .volatility_classifier
2373            .as_ref()
2374            .map(|f| f(name.as_str()))
2375            .unwrap_or(false);
2376        let args_volatile = args.iter().any(|a| a.contains_volatile);
2377
2378        let func_token = self.span_to_token(&func_span);
2379        Ok(ASTNode::new_with_volatile(
2380            ASTNodeType::Function { name, args },
2381            Some(func_token),
2382            this_is_volatile || args_volatile,
2383        ))
2384    }
2385
2386    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2387        let mut args = Vec::new();
2388
2389        self.skip_whitespace();
2390        if self.position < self.tokens.len()
2391            && self.tokens[self.position].token_type == TokenType::Func
2392            && self.tokens[self.position].subtype == TokenSubType::Close
2393        {
2394            self.position += 1;
2395            return Ok(args);
2396        }
2397
2398        self.skip_whitespace();
2399        if self.position < self.tokens.len()
2400            && self.tokens[self.position].token_type == TokenType::Sep
2401            && self.tokens[self.position].subtype == TokenSubType::Arg
2402        {
2403            args.push(ASTNode::new(
2404                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2405                None,
2406            ));
2407            self.position += 1;
2408        } else {
2409            args.push(self.parse_expression()?);
2410        }
2411
2412        while self.position < self.tokens.len() {
2413            self.skip_whitespace();
2414            if self.position >= self.tokens.len() {
2415                break;
2416            }
2417
2418            let token = &self.tokens[self.position];
2419            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2420                self.position += 1;
2421                self.skip_whitespace();
2422                if self.position < self.tokens.len() {
2423                    let next_token = &self.tokens[self.position];
2424                    if next_token.token_type == TokenType::Sep
2425                        && next_token.subtype == TokenSubType::Arg
2426                    {
2427                        args.push(ASTNode::new(
2428                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2429                            None,
2430                        ));
2431                    } else if next_token.token_type == TokenType::Func
2432                        && next_token.subtype == TokenSubType::Close
2433                    {
2434                        args.push(ASTNode::new(
2435                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2436                            None,
2437                        ));
2438                        self.position += 1;
2439                        break;
2440                    } else {
2441                        args.push(self.parse_expression()?);
2442                    }
2443                } else {
2444                    args.push(ASTNode::new(
2445                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2446                        None,
2447                    ));
2448                }
2449            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2450                self.position += 1;
2451                break;
2452            } else {
2453                return Err(ParserError {
2454                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2455                    position: Some(self.position),
2456                });
2457            }
2458        }
2459
2460        Ok(args)
2461    }
2462
2463    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2464        let mut rows = Vec::new();
2465        let mut current_row = Vec::new();
2466
2467        self.skip_whitespace();
2468        if self.position < self.tokens.len()
2469            && self.tokens[self.position].token_type == TokenType::Array
2470            && self.tokens[self.position].subtype == TokenSubType::Close
2471        {
2472            self.position += 1;
2473            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2474        }
2475
2476        current_row.push(self.parse_expression()?);
2477
2478        while self.position < self.tokens.len() {
2479            self.skip_whitespace();
2480            if self.position >= self.tokens.len() {
2481                break;
2482            }
2483            let token = &self.tokens[self.position];
2484
2485            if token.token_type == TokenType::Sep {
2486                if token.subtype == TokenSubType::Arg {
2487                    self.position += 1;
2488                    current_row.push(self.parse_expression()?);
2489                } else if token.subtype == TokenSubType::Row {
2490                    self.position += 1;
2491                    rows.push(current_row);
2492                    current_row = vec![self.parse_expression()?];
2493                }
2494            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2495                self.position += 1;
2496                rows.push(current_row);
2497                break;
2498            } else {
2499                return Err(ParserError {
2500                    message: format!("Unexpected token in array: {token:?}"),
2501                    position: Some(self.position),
2502                });
2503            }
2504        }
2505
2506        let contains_volatile = rows
2507            .iter()
2508            .flat_map(|r| r.iter())
2509            .any(|n| n.contains_volatile);
2510
2511        Ok(ASTNode::new_with_volatile(
2512            ASTNodeType::Array(rows),
2513            None,
2514            contains_volatile,
2515        ))
2516    }
2517}
2518
2519/// Normalise a reference string to its canonical form
2520pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
2521    let ref_type = ReferenceType::from_string(reference)?;
2522    Ok(ref_type.to_string())
2523}
2524
2525pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
2526    parse_with_dialect(formula, FormulaDialect::Excel)
2527}
2528
2529pub fn parse_with_dialect<T: AsRef<str>>(
2530    formula: T,
2531    dialect: FormulaDialect,
2532) -> Result<ASTNode, ParserError> {
2533    let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2534    let mut parser = SpanParser::new(formula.as_ref(), &spans, dialect);
2535    parser.parse()
2536}
2537
2538/// Parse a single formula and annotate volatility using the provided classifier.
2539/// This is a convenience wrapper around `Parser::new_with_classifier`.
2540pub fn parse_with_volatility_classifier<T, F>(
2541    formula: T,
2542    classifier: F,
2543) -> Result<ASTNode, ParserError>
2544where
2545    T: AsRef<str>,
2546    F: Fn(&str) -> bool + Send + Sync + 'static,
2547{
2548    parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
2549}
2550
2551pub fn parse_with_dialect_and_volatility_classifier<T, F>(
2552    formula: T,
2553    dialect: FormulaDialect,
2554    classifier: F,
2555) -> Result<ASTNode, ParserError>
2556where
2557    T: AsRef<str>,
2558    F: Fn(&str) -> bool + Send + Sync + 'static,
2559{
2560    let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2561    let mut parser =
2562        SpanParser::new(formula.as_ref(), &spans, dialect).with_volatility_classifier(classifier);
2563    parser.parse()
2564}
2565
2566/// Efficient batch parser with an internal token cache and optional volatility classifier.
2567///
2568/// The cache is keyed by the original formula string; repeated formulas across a batch
2569/// (very common in spreadsheets) will avoid re-tokenization and whitespace filtering.
2570pub struct BatchParser {
2571    include_whitespace: bool,
2572    volatility_classifier: Option<VolatilityClassifierArc>,
2573    token_cache: std::collections::HashMap<String, Arc<[crate::tokenizer::TokenSpan]>>, // cached tokens
2574    dialect: FormulaDialect,
2575}
2576
2577impl BatchParser {
2578    pub fn builder() -> BatchParserBuilder {
2579        BatchParserBuilder::default()
2580    }
2581
2582    /// Parse a formula using the internal cache and configured classifier.
2583    pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
2584        let spans = if let Some(tokens) = self.token_cache.get(formula) {
2585            Arc::clone(tokens)
2586        } else {
2587            let mut spans = crate::tokenizer::tokenize_spans_with_dialect(formula, self.dialect)?;
2588            if !self.include_whitespace {
2589                spans.retain(|t| t.token_type != TokenType::Whitespace);
2590            }
2591
2592            let spans: Arc<[crate::tokenizer::TokenSpan]> = Arc::from(spans.into_boxed_slice());
2593            self.token_cache
2594                .insert(formula.to_string(), Arc::clone(&spans));
2595            spans
2596        };
2597
2598        let mut parser = SpanParser::new(formula, spans.as_ref(), self.dialect);
2599        if let Some(classifier) = self.volatility_classifier.clone() {
2600            parser = parser.with_volatility_classifier(move |name| classifier(name));
2601        }
2602        parser.parse()
2603    }
2604}
2605
2606#[derive(Default)]
2607pub struct BatchParserBuilder {
2608    include_whitespace: bool,
2609    volatility_classifier: Option<VolatilityClassifierArc>,
2610    dialect: FormulaDialect,
2611}
2612
2613impl BatchParserBuilder {
2614    pub fn include_whitespace(mut self, include: bool) -> Self {
2615        self.include_whitespace = include;
2616        self
2617    }
2618
2619    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
2620    where
2621        F: Fn(&str) -> bool + Send + Sync + 'static,
2622    {
2623        self.volatility_classifier = Some(Arc::new(f));
2624        self
2625    }
2626
2627    pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
2628        self.dialect = dialect;
2629        self
2630    }
2631
2632    pub fn build(self) -> BatchParser {
2633        BatchParser {
2634            include_whitespace: self.include_whitespace,
2635            volatility_classifier: self.volatility_classifier,
2636            token_cache: std::collections::HashMap::new(),
2637            dialect: self.dialect,
2638        }
2639    }
2640}