formualizer_parse/
parser.rs

1use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
2use crate::types::{FormulaDialect, ParsingError};
3use crate::{ExcelError, LiteralValue};
4
5use crate::hasher::FormulaHasher;
6use once_cell::sync::Lazy;
7use smallvec::SmallVec;
8use std::error::Error;
9use std::fmt::{self, Display};
10use std::hash::{Hash, Hasher};
11use std::sync::Arc;
12
13type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
14type VolatilityClassifierBox = Box<VolatilityFn>;
15type VolatilityClassifierArc = Arc<VolatilityFn>;
16
17/// A custom error type for the parser.
18#[derive(Debug)]
19pub struct ParserError {
20    pub message: String,
21    pub position: Option<usize>,
22}
23
24impl Display for ParserError {
25    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26        if let Some(pos) = self.position {
27            write!(f, "ParserError at position {}: {}", pos, self.message)
28        } else {
29            write!(f, "ParserError: {}", self.message)
30        }
31    }
32}
33
34impl Error for ParserError {}
35
36// Column lookup table for common columns (A-ZZ = 702 columns)
37static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
38    let mut cols = Vec::with_capacity(702);
39    // Single letters A-Z
40    for c in b'A'..=b'Z' {
41        cols.push(String::from(c as char));
42    }
43    // Double letters AA-ZZ
44    for c1 in b'A'..=b'Z' {
45        for c2 in b'A'..=b'Z' {
46            cols.push(format!("{}{}", c1 as char, c2 as char));
47        }
48    }
49    cols
50});
51
52/// A structured table reference specifier for accessing specific parts of a table
53#[derive(Debug, Clone, PartialEq, Hash)]
54pub enum TableSpecifier {
55    /// The entire table
56    All,
57    /// The data area of the table (no headers or totals)
58    Data,
59    /// The headers row
60    Headers,
61    /// The totals row
62    Totals,
63    /// A specific row
64    Row(TableRowSpecifier),
65    /// A specific column
66    Column(String),
67    /// A range of columns
68    ColumnRange(String, String),
69    /// Special items like #Headers, #Data, #Totals, etc.
70    SpecialItem(SpecialItem),
71    /// A combination of specifiers, for complex references
72    Combination(Vec<Box<TableSpecifier>>),
73}
74
75/// Specifies which row(s) to use in a table reference
76#[derive(Debug, Clone, PartialEq, Hash)]
77pub enum TableRowSpecifier {
78    /// The current row (context dependent)
79    Current,
80    /// All rows
81    All,
82    /// Data rows only
83    Data,
84    /// Headers row
85    Headers,
86    /// Totals row
87    Totals,
88    /// Specific row by index (1-based)
89    Index(u32),
90}
91
92/// Special items in structured references
93#[derive(Debug, Clone, PartialEq, Hash)]
94pub enum SpecialItem {
95    /// The #Headers item
96    Headers,
97    /// The #Data item
98    Data,
99    /// The #Totals item
100    Totals,
101    /// The #All item (the whole table)
102    All,
103    /// The @ item (current row)
104    ThisRow,
105}
106
107/// A reference to a table including specifiers
108#[derive(Debug, Clone, PartialEq, Hash)]
109pub struct TableReference {
110    /// The name of the table
111    pub name: String,
112    /// Optional specifier for which part of the table to use
113    pub specifier: Option<TableSpecifier>,
114}
115
116/// A reference to something outside the cell.
117#[derive(Debug, Clone, PartialEq, Hash)]
118pub enum ReferenceType {
119    Cell {
120        sheet: Option<String>,
121        row: u32,
122        col: u32,
123    },
124    Range {
125        sheet: Option<String>,
126        start_row: Option<u32>,
127        start_col: Option<u32>,
128        end_row: Option<u32>,
129        end_col: Option<u32>,
130    },
131    Table(TableReference),
132    NamedRange(String),
133}
134
135impl Display for TableSpecifier {
136    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
137        match self {
138            TableSpecifier::All => write!(f, "#All"),
139            TableSpecifier::Data => write!(f, "#Data"),
140            TableSpecifier::Headers => write!(f, "#Headers"),
141            TableSpecifier::Totals => write!(f, "#Totals"),
142            TableSpecifier::Row(row) => write!(f, "{row}"),
143            TableSpecifier::Column(column) => write!(f, "{column}"),
144            TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
145            TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
146            TableSpecifier::Combination(specs) => {
147                // Emit nested bracketed parts so the surrounding Table formatter prints
148                // canonical structured refs like Table[[#Headers],[Column1]:[Column2]]
149                let parts: Vec<String> = specs.iter().map(|s| format!("[{s}]")).collect();
150                write!(f, "{}", parts.join(","))
151            }
152        }
153    }
154}
155
156impl Display for TableRowSpecifier {
157    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158        match self {
159            TableRowSpecifier::Current => write!(f, "@"),
160            TableRowSpecifier::All => write!(f, "#All"),
161            TableRowSpecifier::Data => write!(f, "#Data"),
162            TableRowSpecifier::Headers => write!(f, "#Headers"),
163            TableRowSpecifier::Totals => write!(f, "#Totals"),
164            TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
165        }
166    }
167}
168
169impl Display for SpecialItem {
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        match self {
172            SpecialItem::Headers => write!(f, "#Headers"),
173            SpecialItem::Data => write!(f, "#Data"),
174            SpecialItem::Totals => write!(f, "#Totals"),
175            SpecialItem::All => write!(f, "#All"),
176            SpecialItem::ThisRow => write!(f, "@"),
177        }
178    }
179}
180
181/// Check if a sheet name needs to be quoted in Excel formulas
182fn sheet_name_needs_quoting(name: &str) -> bool {
183    if name.is_empty() {
184        return false;
185    }
186
187    let bytes = name.as_bytes();
188
189    // Check if starts with a digit
190    if bytes[0].is_ascii_digit() {
191        return true;
192    }
193
194    // Check for any special characters that require quoting
195    // This includes: space, !, ", #, $, %, &, ', (, ), *, +, comma, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, `, {, |, }, ~
196    for &byte in bytes {
197        match byte {
198            b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
199            | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
200            | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
201            _ => {}
202        }
203    }
204
205    // Check for Excel reserved words (case-insensitive)
206    let upper = name.to_uppercase();
207    matches!(
208        upper.as_str(),
209        "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
210    )
211}
212
213#[derive(Debug, Clone)]
214struct OpenFormulaRefPart {
215    sheet: Option<String>,
216    coord: String,
217}
218
219impl ReferenceType {
220    /// Create a reference from a string. Can be A1, A:A, A1:B2, Table1[Column], etc.
221    pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
222        Self::parse_excel_reference(reference)
223    }
224
225    /// Create a reference from a string using the specified formula dialect.
226    pub fn from_string_with_dialect(
227        reference: &str,
228        dialect: FormulaDialect,
229    ) -> Result<Self, ParsingError> {
230        match dialect {
231            FormulaDialect::Excel => Self::parse_excel_reference(reference),
232            FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
233                .or_else(|_| Self::parse_excel_reference(reference)),
234        }
235    }
236
237    fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
238        // First check if this is a table reference (contains '[')
239        if reference.contains('[') {
240            return Self::parse_table_reference(reference);
241        }
242
243        // Extract sheet name if present
244        let (sheet, ref_part) = Self::extract_sheet_name(reference);
245
246        if ref_part.contains(':') {
247            // Range reference
248            Self::parse_range_reference(&ref_part, sheet)
249        } else {
250            // Try to parse as a single cell reference
251            match Self::parse_cell_reference(&ref_part) {
252                Ok((col, row)) => Ok(ReferenceType::Cell { sheet, row, col }),
253                Err(_) => {
254                    // Treat it as a named range
255                    Ok(ReferenceType::NamedRange(reference.to_string()))
256                }
257            }
258        }
259    }
260
261    /// Parse a range reference like "A1:B2", "A:A", or "1:1"
262    fn parse_range_reference(reference: &str, sheet: Option<String>) -> Result<Self, ParsingError> {
263        let mut parts = reference.splitn(2, ':');
264        let start = parts.next().unwrap();
265        let end = parts
266            .next()
267            .ok_or_else(|| ParsingError::InvalidReference(format!("Invalid range: {reference}")))?;
268
269        let (start_col, start_row) = Self::parse_range_part(start)?;
270        let (end_col, end_row) = Self::parse_range_part(end)?;
271
272        Ok(ReferenceType::Range {
273            sheet,
274            start_row,
275            start_col,
276            end_row,
277            end_col,
278        })
279    }
280
281    /// Parse a part of a range reference (either start or end).
282    /// Returns (column, row) where either can be None for infinite ranges.
283    fn parse_range_part(part: &str) -> Result<(Option<u32>, Option<u32>), ParsingError> {
284        // Try to parse as a normal cell reference (A1, B2, etc.)
285        if let Ok((col, row)) = Self::parse_cell_reference(part) {
286            return Ok((Some(col), Some(row)));
287        }
288
289        // Try to parse as column-only or row-only
290        let bytes = part.as_bytes();
291        let mut i = 0;
292
293        // Skip optional $
294        if i < bytes.len() && bytes[i] == b'$' {
295            i += 1;
296        }
297
298        // Check if we have letters (column)
299        let col_start = i;
300        while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
301            i += 1;
302        }
303
304        if i > col_start {
305            // We have a column
306            let col_str = &part[col_start..i];
307            let col = Self::column_to_number(col_str)?;
308
309            // Skip optional $ before row
310            if i < bytes.len() && bytes[i] == b'$' {
311                i += 1;
312            }
313
314            // Check if we have digits (row)
315            let row_start = i;
316            while i < bytes.len() && bytes[i].is_ascii_digit() {
317                i += 1;
318            }
319
320            if i > row_start && i == bytes.len() {
321                // We have both column and row (shouldn't happen as parse_cell_reference should have caught it)
322                let row_str = &part[row_start..i];
323                let row = row_str.parse::<u32>().map_err(|_| {
324                    ParsingError::InvalidReference(format!("Invalid row: {row_str}"))
325                })?;
326                return Ok((Some(col), Some(row)));
327            } else if i == col_start + col_str.len()
328                || (i == col_start + col_str.len() + 1 && bytes[col_start + col_str.len()] == b'$')
329            {
330                // Just a column
331                return Ok((Some(col), None));
332            }
333        } else {
334            // No column, check for row-only reference
335            i = 0;
336            if i < bytes.len() && bytes[i] == b'$' {
337                i += 1;
338            }
339
340            let row_start = i;
341            while i < bytes.len() && bytes[i].is_ascii_digit() {
342                i += 1;
343            }
344
345            if i > row_start && i == bytes.len() {
346                let row_str = &part[row_start..i];
347                let row = row_str.parse::<u32>().map_err(|_| {
348                    ParsingError::InvalidReference(format!("Invalid row: {row_str}"))
349                })?;
350                return Ok((None, Some(row)));
351            }
352        }
353
354        Err(ParsingError::InvalidReference(format!(
355            "Invalid range part: {part}"
356        )))
357    }
358
359    /// Parse a cell reference like "A1" into (column, row) using byte-based parsing.
360    fn parse_cell_reference(reference: &str) -> Result<(u32, u32), ParsingError> {
361        let bytes = reference.as_bytes();
362        let mut i = 0;
363
364        // Skip optional $ for absolute column reference
365        if i < bytes.len() && bytes[i] == b'$' {
366            i += 1;
367        }
368
369        // Parse column letters
370        let col_start = i;
371        while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
372            i += 1;
373        }
374
375        if i == col_start {
376            return Err(ParsingError::InvalidReference(format!(
377                "Invalid cell reference: {reference}"
378            )));
379        }
380
381        let col_str = &reference[col_start..i];
382        let col = Self::column_to_number(col_str)?;
383
384        // Skip optional $ for absolute row reference
385        if i < bytes.len() && bytes[i] == b'$' {
386            i += 1;
387        }
388
389        // Parse row number
390        let row_start = i;
391        while i < bytes.len() && bytes[i].is_ascii_digit() {
392            i += 1;
393        }
394
395        if i == row_start || i != bytes.len() {
396            return Err(ParsingError::InvalidReference(format!(
397                "Invalid cell reference: {reference}"
398            )));
399        }
400
401        let row_str = &reference[row_start..i];
402        let row = row_str
403            .parse::<u32>()
404            .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
405
406        Ok((col, row))
407    }
408
409    /// Convert a column letter (e.g., "A", "BC") to a column number (1-based) using byte operations.
410    pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
411        let bytes = column.as_bytes();
412
413        // Excel column names have a practical limit (XFD = 16384 is the max in Excel)
414        // Anything longer than 3 characters is likely not a column reference
415        if bytes.is_empty() || bytes.len() > 3 {
416            return Err(ParsingError::InvalidReference(format!(
417                "Invalid column: {column}"
418            )));
419        }
420
421        let mut result = 0u32;
422
423        for &b in bytes {
424            if !b.is_ascii_alphabetic() {
425                return Err(ParsingError::InvalidReference(format!(
426                    "Invalid column: {column}"
427                )));
428            }
429            // Use checked arithmetic to prevent overflow
430            result = result
431                .checked_mul(26)
432                .and_then(|r| r.checked_add((b.to_ascii_uppercase() - b'A' + 1) as u32))
433                .ok_or_else(|| {
434                    ParsingError::InvalidReference(format!("Invalid column: {column}"))
435                })?;
436        }
437
438        Ok(result)
439    }
440
441    /// Convert a column number to a column letter using lookup table for common values.
442    pub(crate) fn number_to_column(mut num: u32) -> String {
443        // Use lookup table for common columns (1-702 covers A-ZZ)
444        if num > 0 && num <= 702 {
445            return COLUMN_LOOKUP[(num - 1) as usize].clone();
446        }
447
448        // Fallback for larger column numbers
449        let mut result = String::with_capacity(3);
450        while num > 0 {
451            num -= 1;
452            result.insert(0, ((num % 26) as u8 + b'A') as char);
453            num /= 26;
454        }
455        result
456    }
457}
458
459impl Display for ReferenceType {
460    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
461        write!(
462            f,
463            "{}",
464            match self {
465                ReferenceType::Cell { sheet, row, col } => {
466                    let col_str = Self::number_to_column(*col);
467                    let row_str = row.to_string();
468
469                    if let Some(sheet_name) = sheet {
470                        if sheet_name_needs_quoting(sheet_name) {
471                            // Escape any single quotes in the sheet name by doubling them
472                            let escaped_name = sheet_name.replace('\'', "''");
473                            format!("'{escaped_name}'!{col_str}{row_str}")
474                        } else {
475                            format!("{sheet_name}!{col_str}{row_str}")
476                        }
477                    } else {
478                        format!("{col_str}{row_str}")
479                    }
480                }
481                ReferenceType::Range {
482                    sheet,
483                    start_row,
484                    start_col,
485                    end_row,
486                    end_col,
487                } => {
488                    // Format start reference
489                    let start_ref = match (start_col, start_row) {
490                        (Some(col), Some(row)) => {
491                            format!("{}{}", Self::number_to_column(*col), row)
492                        }
493                        (Some(col), None) => Self::number_to_column(*col),
494                        (None, Some(row)) => row.to_string(),
495                        (None, None) => "".to_string(), // Should not happen in normal usage
496                    };
497
498                    // Format end reference
499                    let end_ref = match (end_col, end_row) {
500                        (Some(col), Some(row)) => {
501                            format!("{}{}", Self::number_to_column(*col), row)
502                        }
503                        (Some(col), None) => Self::number_to_column(*col),
504                        (None, Some(row)) => row.to_string(),
505                        (None, None) => "".to_string(), // Should not happen in normal usage
506                    };
507
508                    let range_part = format!("{start_ref}:{end_ref}");
509
510                    if let Some(sheet_name) = sheet {
511                        if sheet_name_needs_quoting(sheet_name) {
512                            // Escape any single quotes in the sheet name by doubling them
513                            let escaped_name = sheet_name.replace('\'', "''");
514                            format!("'{escaped_name}'!{range_part}")
515                        } else {
516                            format!("{sheet_name}!{range_part}")
517                        }
518                    } else {
519                        range_part
520                    }
521                }
522                ReferenceType::Table(table_ref) => {
523                    if let Some(specifier) = &table_ref.specifier {
524                        // For table references, we need to handle column specifiers specially
525                        // to remove leading/trailing whitespace
526                        match specifier {
527                            TableSpecifier::Column(column) => {
528                                format!("{}[{}]", table_ref.name, column.trim())
529                            }
530                            TableSpecifier::ColumnRange(start, end) => {
531                                format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
532                            }
533                            _ => {
534                                // For other specifiers, use the standard formatting
535                                format!("{}[{}]", table_ref.name, specifier)
536                            }
537                        }
538                    } else {
539                        table_ref.name.clone()
540                    }
541                }
542                ReferenceType::NamedRange(name) => name.clone(),
543            }
544        )
545    }
546}
547
548impl ReferenceType {
549    /// Normalise the reference string (convert to canonical form)
550    pub fn normalise(&self) -> String {
551        format!("{self}")
552    }
553
554    /// Extract a sheet name from a reference using byte operations.
555    fn extract_sheet_name(reference: &str) -> (Option<String>, String) {
556        let bytes = reference.as_bytes();
557        let mut i = 0;
558
559        // Handle quoted sheet names
560        if i < bytes.len() && bytes[i] == b'\'' {
561            i += 1;
562            let start = i;
563
564            // Find closing quote
565            while i < bytes.len() {
566                if bytes[i] == b'\'' {
567                    // Check if next char is '!'
568                    if i + 1 < bytes.len() && bytes[i + 1] == b'!' {
569                        let sheet = String::from(&reference[start..i]);
570                        let ref_part = String::from(&reference[i + 2..]);
571                        return (Some(sheet), ref_part);
572                    }
573                }
574                i += 1;
575            }
576        }
577
578        // Handle unquoted sheet names
579        i = 0;
580        while i < bytes.len() {
581            if bytes[i] == b'!' && i > 0 {
582                let sheet = String::from(&reference[0..i]);
583                let ref_part = String::from(&reference[i + 1..]);
584                return (Some(sheet), ref_part);
585            }
586            i += 1;
587        }
588
589        (None, reference.to_string())
590    }
591
592    /// Parse a table reference like "Table1[Column1]" or more complex ones like "Table1[[#All],[Column1]:[Column2]]".
593    fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
594        // Find the first '[' to separate table name from specifier
595        if let Some(bracket_pos) = reference.find('[') {
596            let table_name = reference[..bracket_pos].trim();
597            if table_name.is_empty() {
598                return Err(ParsingError::InvalidReference(reference.to_string()));
599            }
600
601            let specifier_str = &reference[bracket_pos..];
602            let specifier = Self::parse_table_specifier(specifier_str)?;
603
604            Ok(ReferenceType::Table(TableReference {
605                name: table_name.to_string(),
606                specifier,
607            }))
608        } else {
609            Err(ParsingError::InvalidReference(reference.to_string()))
610        }
611    }
612
613    /// Parse a table specifier like "[Column1]" or "[[#All],[Column1]:[Column2]]"
614    fn parse_table_specifier(specifier_str: &str) -> Result<Option<TableSpecifier>, ParsingError> {
615        if specifier_str.is_empty() || !specifier_str.starts_with('[') {
616            return Ok(None);
617        }
618
619        // Find balanced closing bracket
620        let mut depth = 0;
621        let mut end_pos = 0;
622
623        for (i, c) in specifier_str.chars().enumerate() {
624            if c == '[' {
625                depth += 1;
626            } else if c == ']' {
627                depth -= 1;
628                if depth == 0 {
629                    end_pos = i;
630                    break;
631                }
632            }
633        }
634
635        if depth != 0 || end_pos == 0 {
636            return Err(ParsingError::InvalidReference(format!(
637                "Unbalanced brackets in table specifier: {specifier_str}"
638            )));
639        }
640
641        // Extract content between outermost brackets
642        let content = &specifier_str[1..end_pos];
643
644        // Handle different types of specifiers
645        if content.is_empty() {
646            // Empty brackets means the whole table
647            return Ok(Some(TableSpecifier::All));
648        }
649
650        // Handle special items
651        if content.starts_with("#") {
652            return Self::parse_special_item(content);
653        }
654
655        // Handle column references
656        if !content.contains('[') && !content.contains('#') {
657            // Check for column range using iterator instead of split().collect()
658            if let Some(colon_pos) = content.find(':') {
659                let start = content[..colon_pos].trim();
660                let end = content[colon_pos + 1..].trim();
661                return Ok(Some(TableSpecifier::ColumnRange(
662                    start.to_string(),
663                    end.to_string(),
664                )));
665            } else {
666                // Single column
667                return Ok(Some(TableSpecifier::Column(content.trim().to_string())));
668            }
669        }
670
671        // Handle complex structured references with nested brackets
672        if content.contains('[') {
673            return Self::parse_complex_table_specifier(content);
674        }
675
676        // If we can't determine the type, just use the raw specifier
677        Ok(Some(TableSpecifier::Column(content.trim().to_string())))
678    }
679
680    fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
681        if reference.starts_with('[') && reference.ends_with(']') {
682            let inner = &reference[1..reference.len() - 1];
683            if inner.is_empty() {
684                return Err(ParsingError::InvalidReference(
685                    "Empty OpenFormula reference".to_string(),
686                ));
687            }
688
689            let mut parts = inner.splitn(2, ':');
690            let start_part_str = parts.next().unwrap();
691            let end_part_str = parts.next();
692
693            let start_part = Self::parse_openformula_part(start_part_str)?;
694            let end_part = if let Some(part) = end_part_str {
695                Some(Self::parse_openformula_part(part)?)
696            } else {
697                None
698            };
699
700            let sheet = match (&start_part.sheet, &end_part) {
701                (Some(sheet), Some(end)) => {
702                    if let Some(end_sheet) = &end.sheet {
703                        if end_sheet != sheet {
704                            return Err(ParsingError::InvalidReference(format!(
705                                "Mismatched sheets in reference: {sheet} vs {end_sheet}"
706                            )));
707                        }
708                    }
709                    Some(sheet.clone())
710                }
711                (Some(sheet), None) => Some(sheet.clone()),
712                (None, Some(end)) => end.sheet.clone(),
713                (None, None) => None,
714            };
715
716            let mut excel_like = String::new();
717            if let Some(sheet_name) = sheet {
718                if sheet_name_needs_quoting(&sheet_name) {
719                    let escaped = sheet_name.replace('\'', "''");
720                    excel_like.push('\'');
721                    excel_like.push_str(&escaped);
722                    excel_like.push('\'');
723                } else {
724                    excel_like.push_str(&sheet_name);
725                }
726                excel_like.push('!');
727            }
728
729            excel_like.push_str(&start_part.coord);
730            if let Some(end) = end_part {
731                excel_like.push(':');
732                excel_like.push_str(&end.coord);
733            }
734
735            return Self::parse_excel_reference(&excel_like);
736        }
737
738        Err(ParsingError::InvalidReference(format!(
739            "Unsupported OpenFormula reference: {reference}"
740        )))
741    }
742
743    fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
744        let trimmed = part.trim();
745        if trimmed.is_empty() {
746            return Err(ParsingError::InvalidReference(
747                "Empty component in OpenFormula reference".to_string(),
748            ));
749        }
750
751        if trimmed == "." {
752            return Err(ParsingError::InvalidReference(
753                "Incomplete OpenFormula reference component".to_string(),
754            ));
755        }
756
757        if trimmed.starts_with('[') {
758            // Nested brackets are not expected here
759            return Err(ParsingError::InvalidReference(format!(
760                "Unexpected '[' in OpenFormula reference component: {trimmed}"
761            )));
762        }
763
764        let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
765            (None, stripped.trim())
766        } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
767            let sheet_part = trimmed[..dot_idx].trim();
768            let coord_part = trimmed[dot_idx + 1..].trim();
769            if coord_part.is_empty() {
770                return Err(ParsingError::InvalidReference(format!(
771                    "Missing coordinate in OpenFormula reference component: {trimmed}"
772                )));
773            }
774            let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
775            (Some(sheet_name), coord_part)
776        } else {
777            (None, trimmed)
778        };
779
780        let coord = coord_slice.trim_start_matches('.').trim().to_string();
781
782        if coord.is_empty() {
783            return Err(ParsingError::InvalidReference(format!(
784                "Missing coordinate in OpenFormula reference component: {trimmed}"
785            )));
786        }
787
788        Ok(OpenFormulaRefPart { sheet, coord })
789    }
790
791    fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
792        let without_abs = sheet.trim().trim_start_matches('$');
793
794        if without_abs.starts_with('\'') {
795            if without_abs.len() < 2 || !without_abs.ends_with('\'') {
796                return Err(ParsingError::InvalidReference(format!(
797                    "Unterminated sheet name in OpenFormula reference: {sheet}"
798                )));
799            }
800            let inner = &without_abs[1..without_abs.len() - 1];
801            Ok(inner.replace("''", "'"))
802        } else {
803            Ok(without_abs.to_string())
804        }
805    }
806
807    fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
808        let bytes = part.as_bytes();
809        let mut i = 0;
810        let mut in_quotes = false;
811
812        while i < bytes.len() {
813            match bytes[i] {
814                b'\'' => {
815                    if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
816                        i += 2;
817                        continue;
818                    }
819                    in_quotes = !in_quotes;
820                    i += 1;
821                }
822                b'.' if !in_quotes => return Some(i),
823                _ => i += 1,
824            }
825        }
826
827        None
828    }
829
830    /// Parse a special item specifier like "#Headers", "#Data", etc.
831    fn parse_special_item(content: &str) -> Result<Option<TableSpecifier>, ParsingError> {
832        match content {
833            "#All" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::All))),
834            "#Headers" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Headers))),
835            "#Data" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Data))),
836            "#Totals" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Totals))),
837            "@" => Ok(Some(TableSpecifier::Row(TableRowSpecifier::Current))),
838            _ => Err(ParsingError::InvalidReference(format!(
839                "Unknown special item: {content}"
840            ))),
841        }
842    }
843
844    /// Parse complex table specifiers with nested brackets
845    fn parse_complex_table_specifier(
846        content: &str,
847    ) -> Result<Option<TableSpecifier>, ParsingError> {
848        // This is a more complex case like [[#Headers],[Column1]:[Column2]]
849        // For now, we'll just store the raw specifier and enhance this in the future
850
851        // Try to identify common patterns
852        if content.contains("[#Headers]")
853            || content.contains("[#All]")
854            || content.contains("[#Data]")
855            || content.contains("[#Totals]")
856            || content.contains("[@]")
857        {
858            // This is a combination of specifiers
859            // Parse them into a vector
860            let mut specifiers = Vec::new();
861
862            // Simple parsing - this would need enhancement for full support
863            if content.contains("[#Headers]") {
864                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Headers)));
865            }
866            if content.contains("[#Data]") {
867                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Data)));
868            }
869            if content.contains("[#Totals]") {
870                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Totals)));
871            }
872            if content.contains("[#All]") {
873                specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::All)));
874            }
875
876            if !specifiers.is_empty() {
877                return Ok(Some(TableSpecifier::Combination(specifiers)));
878            }
879        }
880
881        // Fallback to storing as a column specifier
882        Ok(Some(TableSpecifier::Column(content.trim().to_string())))
883    }
884
885    /// Get the Excel-style string representation of this reference
886    pub fn to_excel_string(&self) -> String {
887        match self {
888            ReferenceType::Cell { sheet, row, col } => {
889                if let Some(s) = sheet {
890                    if sheet_name_needs_quoting(s) {
891                        let escaped_name = s.replace('\'', "''");
892                        format!("'{}'!{}{}", escaped_name, Self::number_to_column(*col), row)
893                    } else {
894                        format!("{}!{}{}", s, Self::number_to_column(*col), row)
895                    }
896                } else {
897                    format!("{}{}", Self::number_to_column(*col), row)
898                }
899            }
900            ReferenceType::Range {
901                sheet,
902                start_row,
903                start_col,
904                end_row,
905                end_col,
906            } => {
907                // Format start reference
908                let start_ref = match (start_col, start_row) {
909                    (Some(col), Some(row)) => format!("{}{}", Self::number_to_column(*col), row),
910                    (Some(col), None) => Self::number_to_column(*col),
911                    (None, Some(row)) => row.to_string(),
912                    (None, None) => "".to_string(), // Should not happen in normal usage
913                };
914
915                // Format end reference
916                let end_ref = match (end_col, end_row) {
917                    (Some(col), Some(row)) => format!("{}{}", Self::number_to_column(*col), row),
918                    (Some(col), None) => Self::number_to_column(*col),
919                    (None, Some(row)) => row.to_string(),
920                    (None, None) => "".to_string(), // Should not happen in normal usage
921                };
922
923                let range_part = format!("{start_ref}:{end_ref}");
924
925                if let Some(s) = sheet {
926                    if sheet_name_needs_quoting(s) {
927                        let escaped_name = s.replace('\'', "''");
928                        format!("'{escaped_name}'!{range_part}")
929                    } else {
930                        format!("{s}!{range_part}")
931                    }
932                } else {
933                    range_part
934                }
935            }
936            ReferenceType::Table(table_ref) => {
937                if let Some(specifier) = &table_ref.specifier {
938                    format!("{}[{}]", table_ref.name, specifier)
939                } else {
940                    table_ref.name.clone()
941                }
942            }
943            ReferenceType::NamedRange(name) => name.clone(),
944        }
945    }
946}
947
948/// The different types of AST nodes.
949#[derive(Debug, Clone, PartialEq, Hash)]
950pub enum ASTNodeType {
951    Literal(LiteralValue),
952    Reference {
953        original: String, // Original reference string (preserved for display/debugging)
954        reference: ReferenceType, // Parsed reference
955    },
956    UnaryOp {
957        op: String,
958        expr: Box<ASTNode>,
959    },
960    BinaryOp {
961        op: String,
962        left: Box<ASTNode>,
963        right: Box<ASTNode>,
964    },
965    Function {
966        name: String,
967        args: Vec<ASTNode>, // Most functions have <= 4 args
968    },
969    Array(Vec<Vec<ASTNode>>), // Most arrays are small
970}
971
972impl Display for ASTNodeType {
973    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
974        match self {
975            ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
976            ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
977            ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
978            ASTNodeType::BinaryOp { op, left, right } => {
979                write!(f, "BinaryOp({op}, {left}, {right})")
980            }
981            ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
982            ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
983        }
984    }
985}
986
987/// An AST node represents a parsed formula element
988#[derive(Debug, Clone, PartialEq)]
989pub struct ASTNode {
990    pub node_type: ASTNodeType,
991    pub source_token: Option<Token>,
992    /// True if this AST contains any volatile function calls.
993    ///
994    /// This is set by the parser when a volatility classifier is provided.
995    /// For ASTs constructed manually (e.g., in tests), this defaults to false.
996    pub contains_volatile: bool,
997}
998
999impl ASTNode {
1000    pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1001        ASTNode {
1002            node_type,
1003            source_token,
1004            contains_volatile: false,
1005        }
1006    }
1007
1008    /// Create an ASTNode while explicitly setting contains_volatile.
1009    pub fn new_with_volatile(
1010        node_type: ASTNodeType,
1011        source_token: Option<Token>,
1012        contains_volatile: bool,
1013    ) -> Self {
1014        ASTNode {
1015            node_type,
1016            source_token,
1017            contains_volatile,
1018        }
1019    }
1020
1021    /// Whether this AST contains any volatile functions.
1022    pub fn contains_volatile(&self) -> bool {
1023        self.contains_volatile
1024    }
1025
1026    pub fn fingerprint(&self) -> u64 {
1027        self.calculate_hash()
1028    }
1029
1030    /// Calculate a hash for this ASTNode
1031    pub fn calculate_hash(&self) -> u64 {
1032        let mut hasher = FormulaHasher::new();
1033        self.hash_node(&mut hasher);
1034        hasher.finish()
1035    }
1036
1037    fn hash_node(&self, hasher: &mut FormulaHasher) {
1038        match &self.node_type {
1039            ASTNodeType::Literal(value) => {
1040                hasher.write(&[1]); // Discriminant for Literal
1041                value.hash(hasher);
1042            }
1043            ASTNodeType::Reference { reference, .. } => {
1044                hasher.write(&[2]); // Discriminant for Reference
1045                reference.hash(hasher);
1046            }
1047            ASTNodeType::UnaryOp { op, expr } => {
1048                hasher.write(&[3]); // Discriminant for UnaryOp
1049                hasher.write(op.as_bytes());
1050                expr.hash_node(hasher);
1051            }
1052            ASTNodeType::BinaryOp { op, left, right } => {
1053                hasher.write(&[4]); // Discriminant for BinaryOp
1054                hasher.write(op.as_bytes());
1055                left.hash_node(hasher);
1056                right.hash_node(hasher);
1057            }
1058            ASTNodeType::Function { name, args } => {
1059                hasher.write(&[5]); // Discriminant for Function
1060                // Use lowercase function name to be case-insensitive
1061                let name_lower = name.to_lowercase();
1062                hasher.write(name_lower.as_bytes());
1063                hasher.write_usize(args.len());
1064                for arg in args {
1065                    arg.hash_node(hasher);
1066                }
1067            }
1068            ASTNodeType::Array(rows) => {
1069                hasher.write(&[6]); // Discriminant for Array
1070                hasher.write_usize(rows.len());
1071                for row in rows {
1072                    hasher.write_usize(row.len());
1073                    for item in row {
1074                        item.hash_node(hasher);
1075                    }
1076                }
1077            }
1078        }
1079    }
1080
1081    pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1082        let mut dependencies = Vec::new();
1083        self.collect_dependencies(&mut dependencies);
1084        dependencies
1085    }
1086
1087    pub fn get_dependency_strings(&self) -> Vec<String> {
1088        self.get_dependencies()
1089            .into_iter()
1090            .map(|dep| format!("{dep}"))
1091            .collect()
1092    }
1093
1094    fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1095        match &self.node_type {
1096            ASTNodeType::Reference { reference, .. } => {
1097                dependencies.push(reference);
1098            }
1099            ASTNodeType::UnaryOp { expr, .. } => {
1100                expr.collect_dependencies(dependencies);
1101            }
1102            ASTNodeType::BinaryOp { left, right, .. } => {
1103                left.collect_dependencies(dependencies);
1104                right.collect_dependencies(dependencies);
1105            }
1106            ASTNodeType::Function { args, .. } => {
1107                for arg in args {
1108                    arg.collect_dependencies(dependencies);
1109                }
1110            }
1111            ASTNodeType::Array(rows) => {
1112                for row in rows {
1113                    for item in row {
1114                        item.collect_dependencies(dependencies);
1115                    }
1116                }
1117            }
1118            _ => {}
1119        }
1120    }
1121
1122    /// Lightweight borrowed view of a reference encountered during AST traversal.
1123    /// This mirrors ReferenceType variants but borrows sheet/name strings to avoid allocation.
1124    pub fn refs(&self) -> RefIter<'_> {
1125        RefIter {
1126            stack: smallvec::smallvec![self],
1127        }
1128    }
1129
1130    /// Visit all references in this AST without allocating intermediates.
1131    pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1132        let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1133        stack.push(self);
1134        while let Some(node) = stack.pop() {
1135            match &node.node_type {
1136                ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1137                ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1138                ASTNodeType::BinaryOp { left, right, .. } => {
1139                    // Push right first so left is visited first (stable-ish order)
1140                    stack.push(right);
1141                    stack.push(left);
1142                }
1143                ASTNodeType::Function { args, .. } => {
1144                    for a in args.iter().rev() {
1145                        stack.push(a);
1146                    }
1147                }
1148                ASTNodeType::Array(rows) => {
1149                    for r in rows.iter().rev() {
1150                        for item in r.iter().rev() {
1151                            stack.push(item);
1152                        }
1153                    }
1154                }
1155                ASTNodeType::Literal(_) => {}
1156            }
1157        }
1158    }
1159
1160    /// Convenience: collect references into a small, inline vector based on a policy.
1161    pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1162        let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1163        self.visit_refs(|rv| match rv {
1164            RefView::Cell { sheet, row, col } => out.push(ReferenceType::Cell {
1165                sheet: sheet.map(|s| s.to_string()),
1166                row,
1167                col,
1168            }),
1169            RefView::Range {
1170                sheet,
1171                start_row,
1172                start_col,
1173                end_row,
1174                end_col,
1175            } => {
1176                // Optionally expand very small finite ranges into individual cells
1177                if policy.expand_small_ranges {
1178                    if let (Some(sr), Some(sc), Some(er), Some(ec)) =
1179                        (start_row, start_col, end_row, end_col)
1180                    {
1181                        let rows = er.saturating_sub(sr) + 1;
1182                        let cols = ec.saturating_sub(sc) + 1;
1183                        let area = rows.saturating_mul(cols);
1184                        if area as usize <= policy.range_expansion_limit {
1185                            for r in sr..=er {
1186                                for c in sc..=ec {
1187                                    out.push(ReferenceType::Cell {
1188                                        sheet: sheet.map(|s| s.to_string()),
1189                                        row: r,
1190                                        col: c,
1191                                    });
1192                                }
1193                            }
1194                            return; // handled
1195                        }
1196                    }
1197                }
1198                out.push(ReferenceType::Range {
1199                    sheet: sheet.map(|s| s.to_string()),
1200                    start_row,
1201                    start_col,
1202                    end_row,
1203                    end_col,
1204                });
1205            }
1206            RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
1207                name: name.to_string(),
1208                specifier: specifier.cloned(),
1209            })),
1210            RefView::NamedRange { name } => {
1211                if policy.include_names {
1212                    out.push(ReferenceType::NamedRange(name.to_string()));
1213                }
1214            }
1215        });
1216        out
1217    }
1218}
1219
1220/// A borrowing view over a ReferenceType. Avoids cloning sheet/names while walking.
1221#[derive(Clone, Copy, Debug)]
1222pub enum RefView<'a> {
1223    Cell {
1224        sheet: Option<&'a str>,
1225        row: u32,
1226        col: u32,
1227    },
1228    Range {
1229        sheet: Option<&'a str>,
1230        start_row: Option<u32>,
1231        start_col: Option<u32>,
1232        end_row: Option<u32>,
1233        end_col: Option<u32>,
1234    },
1235    Table {
1236        name: &'a str,
1237        specifier: Option<&'a TableSpecifier>,
1238    },
1239    NamedRange {
1240        name: &'a str,
1241    },
1242}
1243
1244impl<'a> From<&'a ReferenceType> for RefView<'a> {
1245    fn from(r: &'a ReferenceType) -> Self {
1246        match r {
1247            ReferenceType::Cell { sheet, row, col } => RefView::Cell {
1248                sheet: sheet.as_deref(),
1249                row: *row,
1250                col: *col,
1251            },
1252            ReferenceType::Range {
1253                sheet,
1254                start_row,
1255                start_col,
1256                end_row,
1257                end_col,
1258            } => RefView::Range {
1259                sheet: sheet.as_deref(),
1260                start_row: *start_row,
1261                start_col: *start_col,
1262                end_row: *end_row,
1263                end_col: *end_col,
1264            },
1265            ReferenceType::Table(tr) => RefView::Table {
1266                name: tr.name.as_str(),
1267                specifier: tr.specifier.as_ref(),
1268            },
1269            ReferenceType::NamedRange(name) => RefView::NamedRange { name },
1270        }
1271    }
1272}
1273
1274/// Iterator over RefView for an AST, implemented via an explicit stack to avoid recursion allocation.
1275pub struct RefIter<'a> {
1276    stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
1277}
1278
1279impl<'a> Iterator for RefIter<'a> {
1280    type Item = RefView<'a>;
1281    fn next(&mut self) -> Option<Self::Item> {
1282        while let Some(node) = self.stack.pop() {
1283            match &node.node_type {
1284                ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
1285                ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
1286                ASTNodeType::BinaryOp { left, right, .. } => {
1287                    self.stack.push(right);
1288                    self.stack.push(left);
1289                }
1290                ASTNodeType::Function { args, .. } => {
1291                    for a in args.iter().rev() {
1292                        self.stack.push(a);
1293                    }
1294                }
1295                ASTNodeType::Array(rows) => {
1296                    for r in rows.iter().rev() {
1297                        for item in r.iter().rev() {
1298                            self.stack.push(item);
1299                        }
1300                    }
1301                }
1302                ASTNodeType::Literal(_) => {}
1303            }
1304        }
1305        None
1306    }
1307}
1308
1309/// Policy controlling how references are collected.
1310#[derive(Debug, Clone)]
1311pub struct CollectPolicy {
1312    pub expand_small_ranges: bool,
1313    pub range_expansion_limit: usize,
1314    pub include_names: bool,
1315}
1316
1317impl Default for CollectPolicy {
1318    fn default() -> Self {
1319        Self {
1320            expand_small_ranges: false,
1321            range_expansion_limit: 0,
1322            include_names: true,
1323        }
1324    }
1325}
1326
1327impl Display for ASTNode {
1328    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1329        write!(f, "{}", self.node_type)
1330    }
1331}
1332
1333impl std::hash::Hash for ASTNode {
1334    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1335        let hash = self.calculate_hash();
1336        state.write_u64(hash);
1337    }
1338}
1339
1340/// A parser for converting tokens into an AST.
1341pub struct Parser {
1342    tokens: Vec<Token>,
1343    position: usize,
1344    /// Optional classifier to determine whether a function name is volatile.
1345    volatility_classifier: Option<VolatilityClassifierBox>,
1346    dialect: FormulaDialect,
1347}
1348
1349impl<T> From<T> for Parser
1350where
1351    T: AsRef<str>,
1352{
1353    fn from(formula: T) -> Self {
1354        let tokens = Tokenizer::new(formula.as_ref()).unwrap().items;
1355        Self::new(tokens, false)
1356    }
1357}
1358
1359impl Parser {
1360    pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
1361        Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
1362    }
1363
1364    pub fn new_with_dialect(
1365        tokens: Vec<Token>,
1366        include_whitespace: bool,
1367        dialect: FormulaDialect,
1368    ) -> Self {
1369        let filtered_tokens = if include_whitespace {
1370            tokens
1371        } else {
1372            tokens
1373                .into_iter()
1374                .filter(|t| t.token_type != TokenType::Whitespace)
1375                .collect()
1376        };
1377        Parser {
1378            tokens: filtered_tokens,
1379            position: 0,
1380            volatility_classifier: None,
1381            dialect,
1382        }
1383    }
1384
1385    /// Provide a function-volatility classifier for this parser.
1386    /// If set, the parser will annotate ASTs with a contains_volatile bit.
1387    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1388    where
1389        F: Fn(&str) -> bool + Send + Sync + 'static,
1390    {
1391        self.volatility_classifier = Some(Box::new(f));
1392        self
1393    }
1394
1395    /// Convenience constructor to set a classifier alongside other options.
1396    pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
1397    where
1398        F: Fn(&str) -> bool + Send + Sync + 'static,
1399    {
1400        Self::new(tokens, include_whitespace).with_volatility_classifier(f)
1401    }
1402
1403    pub fn new_with_classifier_and_dialect<F>(
1404        tokens: Vec<Token>,
1405        include_whitespace: bool,
1406        dialect: FormulaDialect,
1407        f: F,
1408    ) -> Self
1409    where
1410        F: Fn(&str) -> bool + Send + Sync + 'static,
1411    {
1412        Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
1413    }
1414
1415    /// Parse the tokens into an AST.
1416    pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
1417        if self.tokens.is_empty() {
1418            return Err(ParserError {
1419                message: "No tokens to parse".to_string(),
1420                position: None,
1421            });
1422        }
1423
1424        // Check for literal formula (doesn't start with '=')
1425        if self.tokens[0].token_type == TokenType::Literal {
1426            let token = self.tokens[0].clone();
1427            return Ok(ASTNode::new(
1428                ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
1429                Some(token),
1430            ));
1431        }
1432
1433        let ast = self.parse_expression()?;
1434        if self.position < self.tokens.len() {
1435            return Err(ParserError {
1436                message: format!(
1437                    "Unexpected token at position {}: {:?}",
1438                    self.position, self.tokens[self.position]
1439                ),
1440                position: Some(self.position),
1441            });
1442        }
1443        Ok(ast)
1444    }
1445
1446    fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
1447        self.parse_binary_op(0)
1448    }
1449
1450    fn parse_binary_op(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
1451        let mut left = self.parse_unary_op()?;
1452
1453        while self.position < self.tokens.len() {
1454            let token = &self.tokens[self.position];
1455            if token.token_type != TokenType::OpInfix {
1456                break;
1457            }
1458
1459            let (precedence, associativity) =
1460                token.get_precedence().unwrap_or((0, Associativity::Left));
1461            if precedence < min_precedence {
1462                break;
1463            }
1464
1465            let op_token = self.tokens[self.position].clone();
1466            self.position += 1;
1467
1468            let next_min_precedence = if associativity == Associativity::Left {
1469                precedence + 1
1470            } else {
1471                precedence
1472            };
1473
1474            let right = self.parse_binary_op(next_min_precedence)?;
1475            let contains_volatile = left.contains_volatile || right.contains_volatile;
1476            left = ASTNode::new_with_volatile(
1477                ASTNodeType::BinaryOp {
1478                    op: op_token.value.clone(),
1479                    left: Box::new(left),
1480                    right: Box::new(right),
1481                },
1482                Some(op_token),
1483                contains_volatile,
1484            );
1485        }
1486
1487        Ok(left)
1488    }
1489
1490    fn parse_unary_op(&mut self) -> Result<ASTNode, ParserError> {
1491        if self.position < self.tokens.len()
1492            && self.tokens[self.position].token_type == TokenType::OpPrefix
1493        {
1494            let op_token = self.tokens[self.position].clone();
1495            self.position += 1;
1496            let expr = self.parse_unary_op()?;
1497            let contains_volatile = expr.contains_volatile;
1498            return Ok(ASTNode::new_with_volatile(
1499                ASTNodeType::UnaryOp {
1500                    op: op_token.value.clone(),
1501                    expr: Box::new(expr),
1502                },
1503                Some(op_token),
1504                contains_volatile,
1505            ));
1506        }
1507        self.parse_postfix_op()
1508    }
1509
1510    fn parse_postfix_op(&mut self) -> Result<ASTNode, ParserError> {
1511        let mut expr = self.parse_primary()?;
1512
1513        while self.position < self.tokens.len()
1514            && self.tokens[self.position].token_type == TokenType::OpPostfix
1515        {
1516            let op_token = self.tokens[self.position].clone();
1517            self.position += 1;
1518            let contains_volatile = expr.contains_volatile;
1519            expr = ASTNode::new_with_volatile(
1520                ASTNodeType::UnaryOp {
1521                    op: op_token.value.clone(),
1522                    expr: Box::new(expr),
1523                },
1524                Some(op_token),
1525                contains_volatile,
1526            );
1527        }
1528
1529        Ok(expr)
1530    }
1531
1532    fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
1533        if self.position >= self.tokens.len() {
1534            return Err(ParserError {
1535                message: "Unexpected end of tokens".to_string(),
1536                position: Some(self.position),
1537            });
1538        }
1539
1540        let token = &self.tokens[self.position];
1541        match token.token_type {
1542            TokenType::Operand => {
1543                let operand_token = self.tokens[self.position].clone();
1544                self.position += 1;
1545                self.parse_operand(operand_token)
1546            }
1547            TokenType::Func => {
1548                let func_token = self.tokens[self.position].clone();
1549                self.position += 1;
1550                self.parse_function(func_token)
1551            }
1552            TokenType::Paren if token.subtype == TokenSubType::Open => {
1553                self.position += 1;
1554                let expr = self.parse_expression()?;
1555                if self.position >= self.tokens.len()
1556                    || self.tokens[self.position].token_type != TokenType::Paren
1557                    || self.tokens[self.position].subtype != TokenSubType::Close
1558                {
1559                    return Err(ParserError {
1560                        message: "Expected closing parenthesis".to_string(),
1561                        position: Some(self.position),
1562                    });
1563                }
1564                self.position += 1;
1565                Ok(expr)
1566            }
1567            TokenType::Array if token.subtype == TokenSubType::Open => {
1568                self.position += 1;
1569                self.parse_array()
1570            }
1571            _ => Err(ParserError {
1572                message: format!("Unexpected token: {token:?}"),
1573                position: Some(self.position),
1574            }),
1575        }
1576    }
1577
1578    fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
1579        match token.subtype {
1580            TokenSubType::Number => {
1581                let value = token.value.parse::<f64>().map_err(|_| ParserError {
1582                    message: format!("Invalid number: {}", token.value),
1583                    position: Some(self.position),
1584                })?;
1585                Ok(ASTNode::new(
1586                    ASTNodeType::Literal(LiteralValue::Number(value)),
1587                    Some(token),
1588                ))
1589            }
1590            TokenSubType::Text => {
1591                // Strip surrounding quotes from text literals
1592                let mut text = token.value.clone();
1593                if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
1594                    text = text[1..text.len() - 1].to_string();
1595                    // Handle escaped quotes
1596                    text = text.replace("\"\"", "\"");
1597                }
1598                Ok(ASTNode::new(
1599                    ASTNodeType::Literal(LiteralValue::Text(text)),
1600                    Some(token),
1601                ))
1602            }
1603            TokenSubType::Logical => {
1604                let value = token.value.to_uppercase() == "TRUE";
1605                Ok(ASTNode::new(
1606                    ASTNodeType::Literal(LiteralValue::Boolean(value)),
1607                    Some(token),
1608                ))
1609            }
1610            TokenSubType::Error => {
1611                let error = ExcelError::from_error_string(&token.value);
1612                Ok(ASTNode::new(
1613                    ASTNodeType::Literal(LiteralValue::Error(error)),
1614                    Some(token),
1615                ))
1616            }
1617            TokenSubType::Range => {
1618                let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
1619                    .map_err(|e| ParserError {
1620                        message: format!("Invalid reference '{}': {}", token.value, e),
1621                        position: Some(self.position),
1622                    })?;
1623                Ok(ASTNode::new(
1624                    ASTNodeType::Reference {
1625                        original: token.value.clone(),
1626                        reference,
1627                    },
1628                    Some(token),
1629                ))
1630            }
1631            _ => Err(ParserError {
1632                message: format!("Unexpected operand subtype: {:?}", token.subtype),
1633                position: Some(self.position),
1634            }),
1635        }
1636    }
1637
1638    fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
1639        let name = func_token.value[..func_token.value.len() - 1].to_string();
1640        let args = self.parse_function_arguments()?;
1641        // Determine volatility for this function
1642        let this_is_volatile = self
1643            .volatility_classifier
1644            .as_ref()
1645            .map(|f| f(name.as_str()))
1646            .unwrap_or(false);
1647        let args_volatile = args.iter().any(|a| a.contains_volatile);
1648
1649        Ok(ASTNode::new_with_volatile(
1650            ASTNodeType::Function { name, args },
1651            Some(func_token),
1652            this_is_volatile || args_volatile,
1653        ))
1654    }
1655
1656    /// Parse function arguments.
1657    fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
1658        let mut args = Vec::new();
1659
1660        // Check for closing parenthesis (empty arguments)
1661        if self.position < self.tokens.len()
1662            && self.tokens[self.position].token_type == TokenType::Func
1663            && self.tokens[self.position].subtype == TokenSubType::Close
1664        {
1665            self.position += 1;
1666            return Ok(args);
1667        }
1668
1669        // Handle optional arguments (consecutive separators)
1670        // Check if we start with a separator (empty first argument)
1671        if self.position < self.tokens.len()
1672            && self.tokens[self.position].token_type == TokenType::Sep
1673            && self.tokens[self.position].subtype == TokenSubType::Arg
1674        {
1675            // Empty first argument - represented as empty text literal for compatibility
1676            args.push(ASTNode::new(
1677                ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1678                None,
1679            ));
1680            self.position += 1;
1681        } else {
1682            // Parse first argument
1683            args.push(self.parse_expression()?);
1684        }
1685
1686        // Parse remaining arguments
1687        while self.position < self.tokens.len() {
1688            let token = &self.tokens[self.position];
1689
1690            if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
1691                self.position += 1;
1692                // Check for consecutive separators (empty argument)
1693                if self.position < self.tokens.len() {
1694                    let next_token = &self.tokens[self.position];
1695                    if next_token.token_type == TokenType::Sep
1696                        && next_token.subtype == TokenSubType::Arg
1697                    {
1698                        // Empty argument - represented as empty text literal for compatibility
1699                        args.push(ASTNode::new(
1700                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1701                            None,
1702                        ));
1703                    } else if next_token.token_type == TokenType::Func
1704                        && next_token.subtype == TokenSubType::Close
1705                    {
1706                        // Empty last argument
1707                        args.push(ASTNode::new(
1708                            ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1709                            None,
1710                        ));
1711                        self.position += 1;
1712                        break;
1713                    } else {
1714                        args.push(self.parse_expression()?);
1715                    }
1716                } else {
1717                    // Trailing separator at end of formula
1718                    args.push(ASTNode::new(
1719                        ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1720                        None,
1721                    ));
1722                }
1723            } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
1724                self.position += 1;
1725                break;
1726            } else {
1727                return Err(ParserError {
1728                    message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
1729                    position: Some(self.position),
1730                });
1731            }
1732        }
1733
1734        Ok(args)
1735    }
1736
1737    fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
1738        let mut rows = Vec::new();
1739        let mut current_row = Vec::new();
1740
1741        // Check for empty array
1742        if self.position < self.tokens.len()
1743            && self.tokens[self.position].token_type == TokenType::Array
1744            && self.tokens[self.position].subtype == TokenSubType::Close
1745        {
1746            self.position += 1;
1747            return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
1748        }
1749
1750        // Parse first element
1751        current_row.push(self.parse_expression()?);
1752
1753        while self.position < self.tokens.len() {
1754            let token = &self.tokens[self.position];
1755
1756            if token.token_type == TokenType::Sep {
1757                if token.subtype == TokenSubType::Arg {
1758                    // Column separator
1759                    self.position += 1;
1760                    current_row.push(self.parse_expression()?);
1761                } else if token.subtype == TokenSubType::Row {
1762                    // Row separator
1763                    self.position += 1;
1764                    rows.push(current_row);
1765                    current_row = vec![self.parse_expression()?];
1766                }
1767            } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
1768                self.position += 1;
1769                rows.push(current_row);
1770                break;
1771            } else {
1772                return Err(ParserError {
1773                    message: format!("Unexpected token in array: {token:?}"),
1774                    position: Some(self.position),
1775                });
1776            }
1777        }
1778
1779        // Array volatility is the OR of element volatility
1780        let contains_volatile = rows
1781            .iter()
1782            .flat_map(|r| r.iter())
1783            .any(|n| n.contains_volatile);
1784        Ok(ASTNode::new_with_volatile(
1785            ASTNodeType::Array(rows),
1786            None,
1787            contains_volatile,
1788        ))
1789    }
1790}
1791
1792impl From<TokenizerError> for ParserError {
1793    fn from(err: TokenizerError) -> Self {
1794        ParserError {
1795            message: err.message,
1796            position: Some(err.pos),
1797        }
1798    }
1799}
1800
1801/// Normalise a reference string to its canonical form
1802pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
1803    let ref_type = ReferenceType::from_string(reference)?;
1804    Ok(ref_type.to_string())
1805}
1806
1807pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
1808    parse_with_dialect(formula, FormulaDialect::Excel)
1809}
1810
1811pub fn parse_with_dialect<T: AsRef<str>>(
1812    formula: T,
1813    dialect: FormulaDialect,
1814) -> Result<ASTNode, ParserError> {
1815    let tokenizer = Tokenizer::new_with_dialect(formula.as_ref(), dialect)?;
1816    let mut parser = Parser::new_with_dialect(tokenizer.items, false, dialect);
1817    parser.parse()
1818}
1819
1820/// Parse a single formula and annotate volatility using the provided classifier.
1821/// This is a convenience wrapper around `Parser::new_with_classifier`.
1822pub fn parse_with_volatility_classifier<T, F>(
1823    formula: T,
1824    classifier: F,
1825) -> Result<ASTNode, ParserError>
1826where
1827    T: AsRef<str>,
1828    F: Fn(&str) -> bool + Send + Sync + 'static,
1829{
1830    parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
1831}
1832
1833pub fn parse_with_dialect_and_volatility_classifier<T, F>(
1834    formula: T,
1835    dialect: FormulaDialect,
1836    classifier: F,
1837) -> Result<ASTNode, ParserError>
1838where
1839    T: AsRef<str>,
1840    F: Fn(&str) -> bool + Send + Sync + 'static,
1841{
1842    let tokenizer = Tokenizer::new_with_dialect(formula.as_ref(), dialect)?;
1843    let mut parser =
1844        Parser::new_with_classifier_and_dialect(tokenizer.items, false, dialect, classifier);
1845    parser.parse()
1846}
1847
1848/// Efficient batch parser with an internal token cache and optional volatility classifier.
1849///
1850/// The cache is keyed by the original formula string; repeated formulas across a batch
1851/// (very common in spreadsheets) will avoid re-tokenization and whitespace filtering.
1852pub struct BatchParser {
1853    include_whitespace: bool,
1854    volatility_classifier: Option<VolatilityClassifierArc>,
1855    token_cache: std::collections::HashMap<String, Vec<Token>>, // filtered tokens
1856    dialect: FormulaDialect,
1857}
1858
1859impl BatchParser {
1860    pub fn builder() -> BatchParserBuilder {
1861        BatchParserBuilder::default()
1862    }
1863
1864    /// Parse a formula using the internal cache and configured classifier.
1865    pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
1866        // Get or build filtered tokens
1867        let filtered = if let Some(tokens) = self.token_cache.get(formula) {
1868            tokens.clone()
1869        } else {
1870            let mut tokens = Tokenizer::new_with_dialect(formula, self.dialect)?.items;
1871            if !self.include_whitespace {
1872                tokens.retain(|t| t.token_type != TokenType::Whitespace);
1873            }
1874            self.token_cache.insert(formula.to_string(), tokens.clone());
1875            tokens
1876        };
1877
1878        let mut parser = Parser::new_with_dialect(filtered, true, self.dialect); // already filtered per include_whitespace
1879        if let Some(classifier) = self.volatility_classifier.clone() {
1880            parser = parser.with_volatility_classifier(move |name| classifier(name));
1881        }
1882        parser.parse()
1883    }
1884}
1885
1886#[derive(Default)]
1887pub struct BatchParserBuilder {
1888    include_whitespace: bool,
1889    volatility_classifier: Option<VolatilityClassifierArc>,
1890    dialect: FormulaDialect,
1891}
1892
1893impl BatchParserBuilder {
1894    pub fn include_whitespace(mut self, include: bool) -> Self {
1895        self.include_whitespace = include;
1896        self
1897    }
1898
1899    pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1900    where
1901        F: Fn(&str) -> bool + Send + Sync + 'static,
1902    {
1903        self.volatility_classifier = Some(Arc::new(f));
1904        self
1905    }
1906
1907    pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
1908        self.dialect = dialect;
1909        self
1910    }
1911
1912    pub fn build(self) -> BatchParser {
1913        BatchParser {
1914            include_whitespace: self.include_whitespace,
1915            volatility_classifier: self.volatility_classifier,
1916            token_cache: std::collections::HashMap::new(),
1917            dialect: self.dialect,
1918        }
1919    }
1920}