1use crate::structured_ref;
2use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
3use crate::types::{FormulaDialect, ParsingError};
4use crate::{ExcelError, LiteralValue};
5
6#[cfg(feature = "serde")]
7use serde::{Deserialize, Serialize};
8
9use crate::hasher::FormulaHasher;
10use formualizer_common::coord::{
11 col_index_from_letters_1based, col_letters_from_1based, parse_a1_1based,
12};
13use formualizer_common::{
14 AxisBound, RelativeCoord, SheetCellRef, SheetLocator, SheetRangeRef, SheetRef,
15};
16use once_cell::sync::Lazy;
17use smallvec::SmallVec;
18use std::error::Error;
19use std::fmt::{self, Display};
20use std::hash::{Hash, Hasher};
21use std::str::FromStr;
22use std::sync::Arc;
23
24type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
25type VolatilityClassifierBox = Box<VolatilityFn>;
26type VolatilityClassifierArc = Arc<VolatilityFn>;
27
28#[derive(Debug)]
30pub struct ParserError {
31 pub message: String,
32 pub position: Option<usize>,
33}
34
35impl Display for ParserError {
36 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37 if let Some(pos) = self.position {
38 write!(f, "ParserError at position {}: {}", pos, self.message)
39 } else {
40 write!(f, "ParserError: {}", self.message)
41 }
42 }
43}
44
45impl Error for ParserError {}
46
47static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
49 let mut cols = Vec::with_capacity(702);
50 for c in b'A'..=b'Z' {
52 cols.push(String::from(c as char));
53 }
54 for c1 in b'A'..=b'Z' {
56 for c2 in b'A'..=b'Z' {
57 cols.push(format!("{}{}", c1 as char, c2 as char));
58 }
59 }
60 cols
61});
62
63#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
65#[derive(Debug, Clone, PartialEq, Hash)]
66pub enum TableSpecifier {
67 All,
69 Data,
71 Headers,
73 Totals,
75 Row(TableRowSpecifier),
77 Column(String),
79 ColumnRange(String, String),
81 SpecialItem(SpecialItem),
83 Combination(Vec<Box<TableSpecifier>>),
85}
86
87#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
89#[derive(Debug, Clone, PartialEq, Hash)]
90pub enum TableRowSpecifier {
91 Current,
93 All,
95 Data,
97 Headers,
99 Totals,
101 Index(u32),
103}
104
105#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
107#[derive(Debug, Clone, PartialEq, Hash)]
108pub enum SpecialItem {
109 Headers,
111 Data,
113 Totals,
115 All,
117 ThisRow,
119}
120
121#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
123#[derive(Debug, Clone, PartialEq, Hash)]
124pub struct TableReference {
125 pub name: String,
127 pub specifier: Option<TableSpecifier>,
129}
130
131#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
132#[derive(Debug, Clone, PartialEq, Hash)]
133pub enum ExternalBookRef {
134 Token(String),
135}
136
137impl ExternalBookRef {
138 pub fn token(&self) -> &str {
139 match self {
140 ExternalBookRef::Token(s) => s,
141 }
142 }
143}
144
145#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
146#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
147pub enum ExternalRefKind {
148 Cell {
149 row: u32,
150 col: u32,
151 row_abs: bool,
152 col_abs: bool,
153 },
154 Range {
155 start_row: Option<u32>,
156 start_col: Option<u32>,
157 end_row: Option<u32>,
158 end_col: Option<u32>,
159 start_row_abs: bool,
160 start_col_abs: bool,
161 end_row_abs: bool,
162 end_col_abs: bool,
163 },
164}
165
166impl ExternalRefKind {
167 pub fn cell(row: u32, col: u32) -> Self {
168 Self::Cell {
169 row,
170 col,
171 row_abs: false,
172 col_abs: false,
173 }
174 }
175
176 pub fn cell_with_abs(row: u32, col: u32, row_abs: bool, col_abs: bool) -> Self {
177 Self::Cell {
178 row,
179 col,
180 row_abs,
181 col_abs,
182 }
183 }
184
185 pub fn range(
186 start_row: Option<u32>,
187 start_col: Option<u32>,
188 end_row: Option<u32>,
189 end_col: Option<u32>,
190 ) -> Self {
191 Self::Range {
192 start_row,
193 start_col,
194 end_row,
195 end_col,
196 start_row_abs: false,
197 start_col_abs: false,
198 end_row_abs: false,
199 end_col_abs: false,
200 }
201 }
202
203 #[allow(clippy::too_many_arguments)]
206 pub fn range_with_abs(
207 start_row: Option<u32>,
208 start_col: Option<u32>,
209 end_row: Option<u32>,
210 end_col: Option<u32>,
211 start_row_abs: bool,
212 start_col_abs: bool,
213 end_row_abs: bool,
214 end_col_abs: bool,
215 ) -> Self {
216 Self::Range {
217 start_row,
218 start_col,
219 end_row,
220 end_col,
221 start_row_abs,
222 start_col_abs,
223 end_row_abs,
224 end_col_abs,
225 }
226 }
227}
228
229#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
230#[derive(Debug, Clone, PartialEq, Hash)]
231pub struct ExternalReference {
232 pub raw: String,
233 pub book: ExternalBookRef,
234 pub sheet: String,
235 pub kind: ExternalRefKind,
236}
237
238#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
240#[derive(Debug, Clone, PartialEq, Hash)]
241pub enum ReferenceType {
242 Cell {
243 sheet: Option<String>,
244 row: u32,
245 col: u32,
246 row_abs: bool,
247 col_abs: bool,
248 },
249 Range {
250 sheet: Option<String>,
251 start_row: Option<u32>,
252 start_col: Option<u32>,
253 end_row: Option<u32>,
254 end_col: Option<u32>,
255 start_row_abs: bool,
256 start_col_abs: bool,
257 end_row_abs: bool,
258 end_col_abs: bool,
259 },
260 Cell3D {
265 sheet_first: String,
266 sheet_last: String,
267 row: u32,
268 col: u32,
269 row_abs: bool,
270 col_abs: bool,
271 },
272 Range3D {
274 sheet_first: String,
275 sheet_last: String,
276 start_row: Option<u32>,
277 start_col: Option<u32>,
278 end_row: Option<u32>,
279 end_col: Option<u32>,
280 start_row_abs: bool,
281 start_col_abs: bool,
282 end_row_abs: bool,
283 end_col_abs: bool,
284 },
285 External(ExternalReference),
286 Table(TableReference),
287 NamedRange(String),
288}
289
290impl Display for TableSpecifier {
291 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
292 match self {
293 TableSpecifier::All => write!(f, "#All"),
294 TableSpecifier::Data => write!(f, "#Data"),
295 TableSpecifier::Headers => write!(f, "#Headers"),
296 TableSpecifier::Totals => write!(f, "#Totals"),
297 TableSpecifier::Row(row) => write!(f, "{row}"),
298 TableSpecifier::Column(column) => write!(f, "{column}"),
299 TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
300 TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
301 TableSpecifier::Combination(specs) => {
302 let mut first = true;
308 for spec in specs {
309 if !first {
310 write!(f, ",")?;
311 }
312 first = false;
313 match spec.as_ref() {
314 TableSpecifier::ColumnRange(start, end) => {
315 write!(f, "[{start}]:[{end}]")?;
316 }
317 other => write!(f, "[{other}]")?,
318 }
319 }
320 Ok(())
321 }
322 }
323 }
324}
325
326impl Display for TableRowSpecifier {
327 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
328 match self {
329 TableRowSpecifier::Current => write!(f, "@"),
330 TableRowSpecifier::All => write!(f, "#All"),
331 TableRowSpecifier::Data => write!(f, "#Data"),
332 TableRowSpecifier::Headers => write!(f, "#Headers"),
333 TableRowSpecifier::Totals => write!(f, "#Totals"),
334 TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
335 }
336 }
337}
338
339impl Display for SpecialItem {
340 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
341 match self {
342 SpecialItem::Headers => write!(f, "#Headers"),
343 SpecialItem::Data => write!(f, "#Data"),
344 SpecialItem::Totals => write!(f, "#Totals"),
345 SpecialItem::All => write!(f, "#All"),
346 SpecialItem::ThisRow => write!(f, "@"),
347 }
348 }
349}
350
351fn sheet_name_needs_quoting(name: &str) -> bool {
353 if name.is_empty() {
354 return false;
355 }
356
357 let bytes = name.as_bytes();
358
359 if bytes[0].is_ascii_digit() {
361 return true;
362 }
363
364 for &byte in bytes {
367 match byte {
368 b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
369 | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
370 | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
371 _ => {}
372 }
373 }
374
375 let upper = name.to_uppercase();
377 matches!(
378 upper.as_str(),
379 "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
380 )
381}
382
383#[derive(Debug, Clone)]
384struct OpenFormulaRefPart {
385 sheet: Option<String>,
386 coord: String,
387}
388
389type AxisPartWithAbs = Option<(u32, bool)>;
390type RangePartWithAbs = (AxisPartWithAbs, AxisPartWithAbs);
391
392#[derive(Debug, Clone)]
394enum SheetSpec {
395 None,
397 Single(String),
399 Range { first: String, last: String },
401}
402
403impl ReferenceType {
404 pub fn cell(sheet: Option<String>, row: u32, col: u32) -> Self {
406 Self::Cell {
407 sheet,
408 row,
409 col,
410 row_abs: false,
411 col_abs: false,
412 }
413 }
414
415 pub fn cell_with_abs(
417 sheet: Option<String>,
418 row: u32,
419 col: u32,
420 row_abs: bool,
421 col_abs: bool,
422 ) -> Self {
423 Self::Cell {
424 sheet,
425 row,
426 col,
427 row_abs,
428 col_abs,
429 }
430 }
431
432 pub fn range(
434 sheet: Option<String>,
435 start_row: Option<u32>,
436 start_col: Option<u32>,
437 end_row: Option<u32>,
438 end_col: Option<u32>,
439 ) -> Self {
440 Self::Range {
441 sheet,
442 start_row,
443 start_col,
444 end_row,
445 end_col,
446 start_row_abs: false,
447 start_col_abs: false,
448 end_row_abs: false,
449 end_col_abs: false,
450 }
451 }
452
453 #[allow(clippy::too_many_arguments)]
457 pub fn range_with_abs(
458 sheet: Option<String>,
459 start_row: Option<u32>,
460 start_col: Option<u32>,
461 end_row: Option<u32>,
462 end_col: Option<u32>,
463 start_row_abs: bool,
464 start_col_abs: bool,
465 end_row_abs: bool,
466 end_col_abs: bool,
467 ) -> Self {
468 Self::Range {
469 sheet,
470 start_row,
471 start_col,
472 end_row,
473 end_col,
474 start_row_abs,
475 start_col_abs,
476 end_row_abs,
477 end_col_abs,
478 }
479 }
480
481 pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
483 Self::parse_excel_reference(reference)
484 }
485
486 pub fn from_string_with_dialect(
488 reference: &str,
489 dialect: FormulaDialect,
490 ) -> Result<Self, ParsingError> {
491 match dialect {
492 FormulaDialect::Excel => Self::parse_excel_reference(reference),
493 FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
494 .or_else(|_| Self::parse_excel_reference(reference)),
495 }
496 }
497
498 pub fn parse_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
502 Self::parse_sheet_ref_with_dialect(reference, FormulaDialect::Excel)
503 }
504
505 pub fn parse_sheet_ref_with_dialect(
507 reference: &str,
508 dialect: FormulaDialect,
509 ) -> Result<SheetRef<'static>, ParsingError> {
510 match dialect {
511 FormulaDialect::Excel => Self::parse_excel_sheet_ref(reference),
512 FormulaDialect::OpenFormula => Self::parse_openformula_sheet_ref(reference)
513 .or_else(|_| Self::parse_excel_sheet_ref(reference)),
514 }
515 }
516
517 pub fn to_sheet_ref_lossy(&self) -> Option<SheetRef<'_>> {
520 match self {
521 ReferenceType::Cell {
522 sheet,
523 row,
524 col,
525 row_abs,
526 col_abs,
527 } => {
528 let row0 = row.checked_sub(1)?;
529 let col0 = col.checked_sub(1)?;
530 let sheet_loc = match sheet.as_deref() {
531 Some(name) => SheetLocator::from_name(name),
532 None => SheetLocator::Current,
533 };
534 let coord = RelativeCoord::new(row0, col0, *row_abs, *col_abs);
535 Some(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
536 }
537 ReferenceType::Range {
538 sheet,
539 start_row,
540 start_col,
541 end_row,
542 end_col,
543 start_row_abs,
544 start_col_abs,
545 end_row_abs,
546 end_col_abs,
547 } => {
548 let sheet_loc = match sheet.as_deref() {
549 Some(name) => SheetLocator::from_name(name),
550 None => SheetLocator::Current,
551 };
552 let sr = start_row
553 .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_row_abs)));
554 if start_row.is_some() && sr.is_none() {
555 return None;
556 }
557 let sc = start_col
558 .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_col_abs)));
559 if start_col.is_some() && sc.is_none() {
560 return None;
561 }
562 let er =
563 end_row.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_row_abs)));
564 if end_row.is_some() && er.is_none() {
565 return None;
566 }
567 let ec =
568 end_col.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_col_abs)));
569 if end_col.is_some() && ec.is_none() {
570 return None;
571 }
572 let range = SheetRangeRef::from_parts(sheet_loc, sr, sc, er, ec).ok()?;
573 Some(SheetRef::Range(range))
574 }
575 _ => None,
576 }
577 }
578
579 fn parse_excel_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
580 let (spec, ref_part) = Self::extract_sheet_spec(reference);
581 if matches!(spec, SheetSpec::Range { .. }) {
582 return Err(ParsingError::InvalidReference(
583 "3D references are not supported for SheetRef".to_string(),
584 ));
585 }
586 let sheet = match spec {
587 SheetSpec::None => None,
588 SheetSpec::Single(name) => Some(name),
589 SheetSpec::Range { .. } => unreachable!(),
590 };
591
592 if ref_part.contains('[') {
593 return Err(ParsingError::InvalidReference(
594 "Table references are not supported for SheetRef".to_string(),
595 ));
596 }
597
598 let sheet_loc: SheetLocator<'static> = match sheet {
599 Some(name) => SheetLocator::from_name(name),
600 None => SheetLocator::Current,
601 };
602
603 if ref_part.contains(':') {
604 let mut parts = ref_part.splitn(2, ':');
605 let start = parts.next().unwrap();
606 let end = parts.next().ok_or_else(|| {
607 ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
608 })?;
609
610 let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
611 let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
612
613 let start_col = Self::axis_bound_from_1based(start_col)?;
614 let start_row = Self::axis_bound_from_1based(start_row)?;
615 let end_col = Self::axis_bound_from_1based(end_col)?;
616 let end_row = Self::axis_bound_from_1based(end_row)?;
617
618 let range =
619 SheetRangeRef::from_parts(sheet_loc, start_row, start_col, end_row, end_col)
620 .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
621 Ok(SheetRef::Range(range))
622 } else {
623 let (row, col, row_abs, col_abs) = parse_a1_1based(&ref_part)
624 .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
625 let coord = RelativeCoord::new(row - 1, col - 1, row_abs, col_abs);
626 Ok(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
627 }
628 }
629
630 fn parse_openformula_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
631 Self::parse_excel_sheet_ref(reference)
632 }
633
634 fn axis_bound_from_1based(
635 bound: Option<(u32, bool)>,
636 ) -> Result<Option<AxisBound>, ParsingError> {
637 match bound {
638 Some((index, abs)) => AxisBound::from_excel_1based(index, abs)
639 .map(Some)
640 .map_err(|err| ParsingError::InvalidReference(err.to_string())),
641 None => Ok(None),
642 }
643 }
644
645 fn parse_range_part_with_abs(part: &str) -> Result<RangePartWithAbs, ParsingError> {
646 if let Ok((row, col, row_abs, col_abs)) = parse_a1_1based(part) {
647 return Ok((Some((col, col_abs)), Some((row, row_abs))));
648 }
649
650 let bytes = part.as_bytes();
651 let len = bytes.len();
652 let mut i = 0usize;
653
654 let mut col_abs = false;
655 let mut row_abs = false;
656
657 if i < len && bytes[i] == b'$' {
658 col_abs = true;
659 i += 1;
660 }
661
662 let col_start = i;
663 while i < len && bytes[i].is_ascii_alphabetic() {
664 i += 1;
665 }
666
667 if i > col_start {
668 let col_str = &part[col_start..i];
669 let col1 = Self::column_to_number(col_str)?;
670
671 if i == len {
672 return Ok((Some((col1, col_abs)), None));
673 }
674
675 if i < len && bytes[i] == b'$' {
676 row_abs = true;
677 i += 1;
678 }
679
680 if i >= len {
681 return Err(ParsingError::InvalidReference(format!(
682 "Invalid range part: {part}"
683 )));
684 }
685
686 let row_start = i;
687 while i < len && bytes[i].is_ascii_digit() {
688 i += 1;
689 }
690
691 if row_start == i || i != len {
692 return Err(ParsingError::InvalidReference(format!(
693 "Invalid range part: {part}"
694 )));
695 }
696
697 let row_str = &part[row_start..i];
698 let row1 = row_str
699 .parse::<u32>()
700 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
701 if row1 == 0 {
702 return Err(ParsingError::InvalidReference(format!(
703 "Invalid range part: {part}"
704 )));
705 }
706
707 return Ok((Some((col1, col_abs)), Some((row1, row_abs))));
708 }
709
710 i = 0;
711 if i < len && bytes[i] == b'$' {
712 row_abs = true;
713 i += 1;
714 }
715
716 let row_start = i;
717 while i < len && bytes[i].is_ascii_digit() {
718 i += 1;
719 }
720
721 if row_start == i || i != len {
722 return Err(ParsingError::InvalidReference(format!(
723 "Invalid range part: {part}"
724 )));
725 }
726
727 let row_str = &part[row_start..i];
728 let row1 = row_str
729 .parse::<u32>()
730 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
731 if row1 == 0 {
732 return Err(ParsingError::InvalidReference(format!(
733 "Invalid range part: {part}"
734 )));
735 }
736
737 Ok((None, Some((row1, row_abs))))
738 }
739
740 fn parse_3d_reference(first: &str, last: &str, ref_part: &str) -> Result<Self, ParsingError> {
741 if first.is_empty() || last.is_empty() {
742 return Err(ParsingError::InvalidReference(format!(
743 "3D reference requires two sheet names: {first}:{last}!{ref_part}"
744 )));
745 }
746 if ref_part.is_empty() {
747 return Err(ParsingError::InvalidReference(format!(
748 "3D reference {first}:{last}! is missing a cell or range"
749 )));
750 }
751 if ref_part.contains('[') {
753 return Err(ParsingError::InvalidReference(format!(
754 "3D reference {first}:{last}!{ref_part} cannot target a table"
755 )));
756 }
757
758 if ref_part.contains(':') {
759 let mut parts = ref_part.splitn(2, ':');
760 let start = parts.next().unwrap();
761 let end = parts.next().ok_or_else(|| {
762 ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
763 })?;
764 let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
765 let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
766
767 let split = |bound: Option<(u32, bool)>| match bound {
768 Some((index, abs)) => (Some(index), abs),
769 None => (None, false),
770 };
771 let (start_col, start_col_abs) = split(start_col);
772 let (start_row, start_row_abs) = split(start_row);
773 let (end_col, end_col_abs) = split(end_col);
774 let (end_row, end_row_abs) = split(end_row);
775
776 Ok(ReferenceType::Range3D {
777 sheet_first: first.to_string(),
778 sheet_last: last.to_string(),
779 start_row,
780 start_col,
781 end_row,
782 end_col,
783 start_row_abs,
784 start_col_abs,
785 end_row_abs,
786 end_col_abs,
787 })
788 } else {
789 let (col, row, col_abs, row_abs) =
790 Self::parse_cell_reference(ref_part).map_err(|_| {
791 ParsingError::InvalidReference(format!(
792 "Invalid 3D reference target: {ref_part}"
793 ))
794 })?;
795 Ok(ReferenceType::Cell3D {
796 sheet_first: first.to_string(),
797 sheet_last: last.to_string(),
798 row,
799 col,
800 row_abs,
801 col_abs,
802 })
803 }
804 }
805
806 fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
807 if reference.starts_with('[') && reference.ends_with(']') && !reference.contains('!') {
814 return Self::parse_bracketed_structured_reference(reference);
815 }
816
817 let (sheet_spec, ref_part) = Self::extract_sheet_spec(reference);
819
820 if let SheetSpec::Range { first, last, .. } = &sheet_spec {
823 return Self::parse_3d_reference(first, last, &ref_part);
824 }
825
826 let sheet = match sheet_spec {
827 SheetSpec::None => None,
828 SheetSpec::Single(name) => Some(name),
829 SheetSpec::Range { .. } => unreachable!(),
831 };
832
833 if ref_part.contains('[') {
836 if Self::is_r1c1_shape(&ref_part) {
844 return Ok(ReferenceType::NamedRange(reference.to_string()));
845 }
846 return Self::parse_table_reference(&ref_part);
847 }
848
849 let external_sheet = sheet.as_deref().and_then(|s| {
850 let lb = s.rfind('[')?;
854 let rb_rel = s[lb..].find(']')?;
855 let rb = lb + rb_rel;
856 if lb >= rb {
857 return None;
858 }
859
860 let token = &s[..=rb];
861 let sheet_name = &s[rb + 1..];
862 if sheet_name.is_empty() {
863 None
864 } else {
865 Some((token, sheet_name))
866 }
867 });
868
869 if ref_part.contains(':') {
870 let mut parts = ref_part.splitn(2, ':');
872 let start = parts.next().unwrap();
873 let end = parts.next().ok_or_else(|| {
874 ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
875 })?;
876 let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
877 let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
878
879 let split = |bound: Option<(u32, bool)>| match bound {
880 Some((index, abs)) => (Some(index), abs),
881 None => (None, false),
882 };
883 let (start_col, start_col_abs) = split(start_col);
884 let (start_row, start_row_abs) = split(start_row);
885 let (end_col, end_col_abs) = split(end_col);
886 let (end_row, end_row_abs) = split(end_row);
887
888 if let Some((book_token, sheet_name)) = external_sheet {
889 Ok(ReferenceType::External(ExternalReference {
890 raw: reference.to_string(),
891 book: ExternalBookRef::Token(book_token.to_string()),
892 sheet: sheet_name.to_string(),
893 kind: ExternalRefKind::Range {
894 start_row,
895 start_col,
896 end_row,
897 end_col,
898 start_row_abs,
899 start_col_abs,
900 end_row_abs,
901 end_col_abs,
902 },
903 }))
904 } else {
905 Ok(ReferenceType::Range {
906 sheet,
907 start_row,
908 start_col,
909 end_row,
910 end_col,
911 start_row_abs,
912 start_col_abs,
913 end_row_abs,
914 end_col_abs,
915 })
916 }
917 } else {
918 match Self::parse_cell_reference(&ref_part) {
920 Ok((col, row, col_abs, row_abs)) => {
921 if let Some((book_token, sheet_name)) = external_sheet {
922 Ok(ReferenceType::External(ExternalReference {
923 raw: reference.to_string(),
924 book: ExternalBookRef::Token(book_token.to_string()),
925 sheet: sheet_name.to_string(),
926 kind: ExternalRefKind::Cell {
927 row,
928 col,
929 row_abs,
930 col_abs,
931 },
932 }))
933 } else {
934 Ok(ReferenceType::Cell {
935 sheet,
936 row,
937 col,
938 row_abs,
939 col_abs,
940 })
941 }
942 }
943 Err(_) => {
944 Ok(ReferenceType::NamedRange(reference.to_string()))
946 }
947 }
948 }
949 }
950
951 fn parse_cell_reference(reference: &str) -> Result<(u32, u32, bool, bool), ParsingError> {
953 parse_a1_1based(reference)
954 .map(|(row, col, row_abs, col_abs)| (col, row, col_abs, row_abs))
955 .map_err(|_| {
956 ParsingError::InvalidReference(format!("Invalid cell reference: {reference}"))
957 })
958 }
959
960 pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
962 col_index_from_letters_1based(column)
963 .map_err(|_| ParsingError::InvalidReference(format!("Invalid column: {column}")))
964 }
965
966 pub(crate) fn number_to_column(num: u32) -> String {
968 if num == 0 {
969 return String::new();
970 }
971 if num > 0 && num <= 702 {
973 return COLUMN_LOOKUP[(num - 1) as usize].clone();
974 }
975
976 col_letters_from_1based(num).unwrap_or_default()
977 }
978
979 fn format_col(col: u32, abs: bool) -> String {
980 if abs {
981 format!("${}", Self::number_to_column(col))
982 } else {
983 Self::number_to_column(col)
984 }
985 }
986
987 fn format_row(row: u32, abs: bool) -> String {
988 if abs {
989 format!("${row}")
990 } else {
991 row.to_string()
992 }
993 }
994}
995
996impl Display for ReferenceType {
997 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
998 write!(
999 f,
1000 "{}",
1001 match self {
1002 ReferenceType::Cell {
1003 sheet,
1004 row,
1005 col,
1006 row_abs,
1007 col_abs,
1008 } => {
1009 let col_str = Self::format_col(*col, *col_abs);
1010 let row_str = Self::format_row(*row, *row_abs);
1011
1012 if let Some(sheet_name) = sheet {
1013 if sheet_name_needs_quoting(sheet_name) {
1014 let escaped_name = sheet_name.replace('\'', "''");
1016 format!("'{escaped_name}'!{col_str}{row_str}")
1017 } else {
1018 format!("{sheet_name}!{col_str}{row_str}")
1019 }
1020 } else {
1021 format!("{col_str}{row_str}")
1022 }
1023 }
1024 ReferenceType::Range {
1025 sheet,
1026 start_row,
1027 start_col,
1028 end_row,
1029 end_col,
1030 start_row_abs,
1031 start_col_abs,
1032 end_row_abs,
1033 end_col_abs,
1034 } => {
1035 let start_ref = match (start_col, start_row) {
1037 (Some(col), Some(row)) => format!(
1038 "{}{}",
1039 Self::format_col(*col, *start_col_abs),
1040 Self::format_row(*row, *start_row_abs)
1041 ),
1042 (Some(col), None) => Self::format_col(*col, *start_col_abs),
1043 (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1044 (None, None) => "".to_string(), };
1046
1047 let end_ref = match (end_col, end_row) {
1049 (Some(col), Some(row)) => format!(
1050 "{}{}",
1051 Self::format_col(*col, *end_col_abs),
1052 Self::format_row(*row, *end_row_abs)
1053 ),
1054 (Some(col), None) => Self::format_col(*col, *end_col_abs),
1055 (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1056 (None, None) => "".to_string(), };
1058
1059 let range_part = format!("{start_ref}:{end_ref}");
1060
1061 if let Some(sheet_name) = sheet {
1062 if sheet_name_needs_quoting(sheet_name) {
1063 let escaped_name = sheet_name.replace('\'', "''");
1065 format!("'{escaped_name}'!{range_part}")
1066 } else {
1067 format!("{sheet_name}!{range_part}")
1068 }
1069 } else {
1070 range_part
1071 }
1072 }
1073 ReferenceType::Cell3D {
1074 sheet_first,
1075 sheet_last,
1076 row,
1077 col,
1078 row_abs,
1079 col_abs,
1080 } => {
1081 let col_str = Self::format_col(*col, *col_abs);
1082 let row_str = Self::format_row(*row, *row_abs);
1083 let prefix = format_3d_sheet_prefix(sheet_first, sheet_last);
1084 format!("{prefix}!{col_str}{row_str}")
1085 }
1086 ReferenceType::Range3D {
1087 sheet_first,
1088 sheet_last,
1089 start_row,
1090 start_col,
1091 end_row,
1092 end_col,
1093 start_row_abs,
1094 start_col_abs,
1095 end_row_abs,
1096 end_col_abs,
1097 } => {
1098 let start_ref = match (start_col, start_row) {
1099 (Some(col), Some(row)) => format!(
1100 "{}{}",
1101 Self::format_col(*col, *start_col_abs),
1102 Self::format_row(*row, *start_row_abs)
1103 ),
1104 (Some(col), None) => Self::format_col(*col, *start_col_abs),
1105 (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1106 (None, None) => "".to_string(),
1107 };
1108 let end_ref = match (end_col, end_row) {
1109 (Some(col), Some(row)) => format!(
1110 "{}{}",
1111 Self::format_col(*col, *end_col_abs),
1112 Self::format_row(*row, *end_row_abs)
1113 ),
1114 (Some(col), None) => Self::format_col(*col, *end_col_abs),
1115 (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1116 (None, None) => "".to_string(),
1117 };
1118 let range_part = format!("{start_ref}:{end_ref}");
1119 let prefix = format_3d_sheet_prefix(sheet_first, sheet_last);
1120 format!("{prefix}!{range_part}")
1121 }
1122 ReferenceType::External(ext) => ext.raw.clone(),
1123 ReferenceType::Table(table_ref) => {
1124 if let Some(specifier) = &table_ref.specifier {
1125 match specifier {
1128 TableSpecifier::Column(column) => {
1129 format!("{}[{}]", table_ref.name, column.trim())
1130 }
1131 TableSpecifier::ColumnRange(start, end) => {
1132 format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
1133 }
1134 _ => {
1135 format!("{}[{}]", table_ref.name, specifier)
1137 }
1138 }
1139 } else {
1140 table_ref.name.clone()
1141 }
1142 }
1143 ReferenceType::NamedRange(name) => name.clone(),
1144 }
1145 )
1146 }
1147}
1148
1149fn format_3d_sheet_prefix(first: &str, last: &str) -> String {
1153 let format_one = |name: &str| -> String {
1154 if sheet_name_needs_quoting(name) {
1155 let escaped = name.replace('\'', "''");
1156 format!("'{escaped}'")
1157 } else {
1158 name.to_string()
1159 }
1160 };
1161 format!("{}:{}", format_one(first), format_one(last))
1162}
1163
1164impl TryFrom<&str> for ReferenceType {
1165 type Error = ParsingError;
1166
1167 fn try_from(value: &str) -> Result<Self, Self::Error> {
1168 ReferenceType::from_string(value)
1169 }
1170}
1171
1172impl FromStr for ReferenceType {
1173 type Err = ParsingError;
1174
1175 fn from_str(s: &str) -> Result<Self, Self::Err> {
1176 ReferenceType::from_string(s)
1177 }
1178}
1179
1180impl ReferenceType {
1181 pub fn normalise(&self) -> String {
1183 format!("{self}")
1184 }
1185
1186 fn read_sheet_segment(reference: &str, start: usize) -> Option<(String, usize, bool)> {
1191 let bytes = reference.as_bytes();
1192 if start >= bytes.len() {
1193 return None;
1194 }
1195
1196 if bytes[start] == b'\'' {
1197 let mut i = start + 1;
1199 let body_start = i;
1200 while i < bytes.len() {
1201 if bytes[i] == b'\'' {
1202 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
1203 i += 2;
1204 continue;
1205 }
1206 let raw = &reference[body_start..i];
1207 let name = raw.replace("''", "'");
1208 return Some((name, i + 1, true));
1209 }
1210 i += 1;
1211 }
1212 None
1213 } else {
1214 let mut i = start;
1217 while i < bytes.len() {
1218 let b = bytes[i];
1219 match b {
1220 b':' | b'!' | b'\'' | b' ' | b'\t' | b'\n' | b'\r' => break,
1221 _ => i += 1,
1222 }
1223 }
1224 if i == start {
1225 None
1226 } else {
1227 Some((reference[start..i].to_string(), i, false))
1228 }
1229 }
1230 }
1231
1232 fn extract_sheet_spec(reference: &str) -> (SheetSpec, String) {
1235 let Some((first_name, after_first, first_quoted)) = Self::read_sheet_segment(reference, 0)
1236 else {
1237 return Self::extract_sheet_spec_fallback(reference);
1240 };
1241 let _ = first_quoted;
1242
1243 let bytes = reference.as_bytes();
1244
1245 if after_first < bytes.len() && bytes[after_first] == b':' {
1247 let second_start = after_first + 1;
1248 if let Some((second_name, after_second, _)) =
1249 Self::read_sheet_segment(reference, second_start)
1250 && after_second < bytes.len()
1251 && bytes[after_second] == b'!'
1252 {
1253 let ref_part = reference[after_second + 1..].to_string();
1254 return (
1255 SheetSpec::Range {
1256 first: first_name,
1257 last: second_name,
1258 },
1259 ref_part,
1260 );
1261 }
1262
1263 if second_start < bytes.len() {
1269 if let Some(bang) = reference[second_start..].find('!') {
1270 let ref_part = reference[second_start + bang + 1..].to_string();
1271 return (
1272 SheetSpec::Range {
1273 first: first_name,
1274 last: String::new(),
1275 },
1276 ref_part,
1277 );
1278 }
1279 }
1280 }
1281
1282 if after_first < bytes.len() && bytes[after_first] == b'!' {
1284 let ref_part = reference[after_first + 1..].to_string();
1285 return (SheetSpec::Single(first_name), ref_part);
1286 }
1287
1288 Self::extract_sheet_spec_fallback(reference)
1291 }
1292
1293 fn extract_sheet_spec_fallback(reference: &str) -> (SheetSpec, String) {
1294 let bytes = reference.as_bytes();
1295 let mut i = 0;
1300 while i < bytes.len() {
1301 if bytes[i] == b'!' && i > 0 {
1302 let sheet = reference[..i].to_string();
1303 let ref_part = reference[i + 1..].to_string();
1304 return (SheetSpec::Single(sheet), ref_part);
1305 }
1306 i += 1;
1307 }
1308
1309 (SheetSpec::None, reference.to_string())
1310 }
1311
1312 fn is_r1c1_shape(s: &str) -> bool {
1322 let bytes = s.as_bytes();
1323 let len = bytes.len();
1324 let mut i = 0usize;
1325 let mut anchored = false;
1326
1327 if i >= len || bytes[i] != b'R' {
1328 return false;
1329 }
1330 i += 1;
1331
1332 let row_digits_start = i;
1333 while i < len && bytes[i].is_ascii_digit() {
1334 i += 1;
1335 }
1336 if i > row_digits_start {
1337 anchored = true;
1338 }
1339
1340 if i < len && bytes[i] == b'[' {
1341 i += 1;
1342 if i < len && bytes[i] == b'-' {
1343 i += 1;
1344 }
1345 let n_start = i;
1346 while i < len && bytes[i].is_ascii_digit() {
1347 i += 1;
1348 }
1349 if i == n_start || i >= len || bytes[i] != b']' {
1350 return false;
1351 }
1352 i += 1;
1353 anchored = true;
1354 }
1355
1356 if i >= len || bytes[i] != b'C' {
1357 return false;
1358 }
1359 i += 1;
1360
1361 let col_digits_start = i;
1362 while i < len && bytes[i].is_ascii_digit() {
1363 i += 1;
1364 }
1365 if i > col_digits_start {
1366 anchored = true;
1367 }
1368
1369 if i < len && bytes[i] == b'[' {
1370 i += 1;
1371 if i < len && bytes[i] == b'-' {
1372 i += 1;
1373 }
1374 let n_start = i;
1375 while i < len && bytes[i].is_ascii_digit() {
1376 i += 1;
1377 }
1378 if i == n_start || i >= len || bytes[i] != b']' {
1379 return false;
1380 }
1381 i += 1;
1382 anchored = true;
1383 }
1384
1385 i == len && anchored
1386 }
1387
1388 fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
1394 let bracket_pos = reference.find('[').ok_or_else(|| {
1395 ParsingError::InvalidReference(format!("Missing '[' in table reference: {reference}"))
1396 })?;
1397 let table_name = reference[..bracket_pos].trim();
1398 if table_name.is_empty() {
1399 return Err(ParsingError::InvalidReference(reference.to_string()));
1400 }
1401
1402 let specifier_str = &reference[bracket_pos..];
1403 let specifier = structured_ref::parse_full_specifier(specifier_str)?;
1404
1405 Ok(ReferenceType::Table(TableReference {
1406 name: table_name.to_string(),
1407 specifier,
1408 }))
1409 }
1410
1411 fn parse_bracketed_structured_reference(reference: &str) -> Result<Self, ParsingError> {
1418 debug_assert!(reference.starts_with('[') && reference.ends_with(']'));
1419 let specifier = structured_ref::parse_full_specifier(reference)?;
1420
1421 match specifier {
1422 Some(TableSpecifier::Column(name)) => Ok(ReferenceType::Table(TableReference {
1423 name,
1424 specifier: Some(TableSpecifier::SpecialItem(SpecialItem::Data)),
1425 })),
1426 other => Ok(ReferenceType::Table(TableReference {
1427 name: String::new(),
1428 specifier: other,
1429 })),
1430 }
1431 }
1432
1433 fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
1434 if reference.starts_with('[') && reference.ends_with(']') {
1435 let inner = &reference[1..reference.len() - 1];
1436 if inner.is_empty() {
1437 return Err(ParsingError::InvalidReference(
1438 "Empty OpenFormula reference".to_string(),
1439 ));
1440 }
1441
1442 let mut parts = inner.splitn(2, ':');
1443 let start_part_str = parts.next().unwrap();
1444 let end_part_str = parts.next();
1445
1446 let start_part = Self::parse_openformula_part(start_part_str)?;
1447 let end_part = if let Some(part) = end_part_str {
1448 Some(Self::parse_openformula_part(part)?)
1449 } else {
1450 None
1451 };
1452
1453 let sheet = match (&start_part.sheet, &end_part) {
1454 (Some(sheet), Some(end)) => {
1455 if let Some(end_sheet) = &end.sheet {
1456 if end_sheet != sheet {
1457 return Err(ParsingError::InvalidReference(format!(
1458 "Mismatched sheets in reference: {sheet} vs {end_sheet}"
1459 )));
1460 }
1461 }
1462 Some(sheet.clone())
1463 }
1464 (Some(sheet), None) => Some(sheet.clone()),
1465 (None, Some(end)) => end.sheet.clone(),
1466 (None, None) => None,
1467 };
1468
1469 let mut excel_like = String::new();
1470 if let Some(sheet_name) = sheet {
1471 if sheet_name_needs_quoting(&sheet_name) {
1472 let escaped = sheet_name.replace('\'', "''");
1473 excel_like.push('\'');
1474 excel_like.push_str(&escaped);
1475 excel_like.push('\'');
1476 } else {
1477 excel_like.push_str(&sheet_name);
1478 }
1479 excel_like.push('!');
1480 }
1481
1482 excel_like.push_str(&start_part.coord);
1483 if let Some(end) = end_part {
1484 excel_like.push(':');
1485 excel_like.push_str(&end.coord);
1486 }
1487
1488 return Self::parse_excel_reference(&excel_like);
1489 }
1490
1491 Err(ParsingError::InvalidReference(format!(
1492 "Unsupported OpenFormula reference: {reference}"
1493 )))
1494 }
1495
1496 fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
1497 let trimmed = part.trim();
1498 if trimmed.is_empty() {
1499 return Err(ParsingError::InvalidReference(
1500 "Empty component in OpenFormula reference".to_string(),
1501 ));
1502 }
1503
1504 if trimmed == "." {
1505 return Err(ParsingError::InvalidReference(
1506 "Incomplete OpenFormula reference component".to_string(),
1507 ));
1508 }
1509
1510 if trimmed.starts_with('[') {
1511 return Err(ParsingError::InvalidReference(format!(
1513 "Unexpected '[' in OpenFormula reference component: {trimmed}"
1514 )));
1515 }
1516
1517 let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
1518 (None, stripped.trim())
1519 } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
1520 let sheet_part = trimmed[..dot_idx].trim();
1521 let coord_part = trimmed[dot_idx + 1..].trim();
1522 if coord_part.is_empty() {
1523 return Err(ParsingError::InvalidReference(format!(
1524 "Missing coordinate in OpenFormula reference component: {trimmed}"
1525 )));
1526 }
1527 let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
1528 (Some(sheet_name), coord_part)
1529 } else {
1530 (None, trimmed)
1531 };
1532
1533 let coord = coord_slice.trim_start_matches('.').trim().to_string();
1534
1535 if coord.is_empty() {
1536 return Err(ParsingError::InvalidReference(format!(
1537 "Missing coordinate in OpenFormula reference component: {trimmed}"
1538 )));
1539 }
1540
1541 Ok(OpenFormulaRefPart { sheet, coord })
1542 }
1543
1544 fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
1545 let without_abs = sheet.trim().trim_start_matches('$');
1546
1547 if without_abs.starts_with('\'') {
1548 if without_abs.len() < 2 || !without_abs.ends_with('\'') {
1549 return Err(ParsingError::InvalidReference(format!(
1550 "Unterminated sheet name in OpenFormula reference: {sheet}"
1551 )));
1552 }
1553 let inner = &without_abs[1..without_abs.len() - 1];
1554 Ok(inner.replace("''", "'"))
1555 } else {
1556 Ok(without_abs.to_string())
1557 }
1558 }
1559
1560 fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
1561 let bytes = part.as_bytes();
1562 let mut i = 0;
1563 let mut in_quotes = false;
1564
1565 while i < bytes.len() {
1566 match bytes[i] {
1567 b'\'' => {
1568 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
1569 i += 2;
1570 continue;
1571 }
1572 in_quotes = !in_quotes;
1573 i += 1;
1574 }
1575 b'.' if !in_quotes => return Some(i),
1576 _ => i += 1,
1577 }
1578 }
1579
1580 None
1581 }
1582
1583 pub fn to_excel_string(&self) -> String {
1590 match self {
1591 ReferenceType::Cell {
1592 sheet,
1593 row,
1594 col,
1595 row_abs,
1596 col_abs,
1597 } => {
1598 let col_str = Self::format_col(*col, *col_abs);
1599 let row_str = Self::format_row(*row, *row_abs);
1600 if let Some(s) = sheet {
1601 if sheet_name_needs_quoting(s) {
1602 let escaped_name = s.replace('\'', "''");
1603 format!("'{}'!{}{}", escaped_name, col_str, row_str)
1604 } else {
1605 format!("{}!{}{}", s, col_str, row_str)
1606 }
1607 } else {
1608 format!("{}{}", col_str, row_str)
1609 }
1610 }
1611 ReferenceType::Range {
1612 sheet,
1613 start_row,
1614 start_col,
1615 end_row,
1616 end_col,
1617 start_row_abs,
1618 start_col_abs,
1619 end_row_abs,
1620 end_col_abs,
1621 } => {
1622 let start_ref = match (start_col, start_row) {
1624 (Some(col), Some(row)) => format!(
1625 "{}{}",
1626 Self::format_col(*col, *start_col_abs),
1627 Self::format_row(*row, *start_row_abs)
1628 ),
1629 (Some(col), None) => Self::format_col(*col, *start_col_abs),
1630 (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1631 (None, None) => "".to_string(), };
1633
1634 let end_ref = match (end_col, end_row) {
1636 (Some(col), Some(row)) => format!(
1637 "{}{}",
1638 Self::format_col(*col, *end_col_abs),
1639 Self::format_row(*row, *end_row_abs)
1640 ),
1641 (Some(col), None) => Self::format_col(*col, *end_col_abs),
1642 (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1643 (None, None) => "".to_string(), };
1645
1646 let range_part = format!("{start_ref}:{end_ref}");
1647
1648 if let Some(s) = sheet {
1649 if sheet_name_needs_quoting(s) {
1650 let escaped_name = s.replace('\'', "''");
1651 format!("'{escaped_name}'!{range_part}")
1652 } else {
1653 format!("{s}!{range_part}")
1654 }
1655 } else {
1656 range_part
1657 }
1658 }
1659 ReferenceType::Cell3D { .. } | ReferenceType::Range3D { .. } => format!("{self}"),
1660 ReferenceType::External(ext) => ext.raw.clone(),
1661 ReferenceType::Table(table_ref) => {
1662 if let Some(specifier) = &table_ref.specifier {
1663 format!("{}[{}]", table_ref.name, specifier)
1664 } else {
1665 table_ref.name.clone()
1666 }
1667 }
1668 ReferenceType::NamedRange(name) => name.clone(),
1669 }
1670 }
1671}
1672
1673#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1675#[derive(Debug, Clone, PartialEq, Hash)]
1676pub enum ASTNodeType {
1677 Literal(LiteralValue),
1678 Reference {
1679 original: String, reference: ReferenceType, },
1682 UnaryOp {
1683 op: String,
1684 expr: Box<ASTNode>,
1685 },
1686 BinaryOp {
1687 op: String,
1688 left: Box<ASTNode>,
1689 right: Box<ASTNode>,
1690 },
1691 Function {
1692 name: String,
1693 args: Vec<ASTNode>, },
1695 Call {
1698 callee: Box<ASTNode>,
1699 args: Vec<ASTNode>,
1700 },
1701 Array(Vec<Vec<ASTNode>>), }
1703
1704impl Display for ASTNodeType {
1705 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1706 match self {
1707 ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
1708 ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
1709 ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
1710 ASTNodeType::BinaryOp { op, left, right } => {
1711 write!(f, "BinaryOp({op}, {left}, {right})")
1712 }
1713 ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
1714 ASTNodeType::Call { callee, args } => write!(f, "Call({callee}, {args:?})"),
1715 ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
1716 }
1717 }
1718}
1719
1720#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1722#[derive(Debug, Clone, PartialEq)]
1723pub struct ASTNode {
1724 pub node_type: ASTNodeType,
1725 pub source_token: Option<Token>,
1726 pub contains_volatile: bool,
1731}
1732
1733impl ASTNode {
1734 pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1735 ASTNode {
1736 node_type,
1737 source_token,
1738 contains_volatile: false,
1739 }
1740 }
1741
1742 pub fn new_with_volatile(
1744 node_type: ASTNodeType,
1745 source_token: Option<Token>,
1746 contains_volatile: bool,
1747 ) -> Self {
1748 ASTNode {
1749 node_type,
1750 source_token,
1751 contains_volatile,
1752 }
1753 }
1754
1755 pub fn contains_volatile(&self) -> bool {
1757 self.contains_volatile
1758 }
1759
1760 pub fn fingerprint(&self) -> u64 {
1761 self.calculate_hash()
1762 }
1763
1764 pub fn calculate_hash(&self) -> u64 {
1766 let mut hasher = FormulaHasher::new();
1767 self.hash_node(&mut hasher);
1768 hasher.finish()
1769 }
1770
1771 fn hash_node(&self, hasher: &mut FormulaHasher) {
1772 match &self.node_type {
1773 ASTNodeType::Literal(value) => {
1774 hasher.write(&[1]); value.hash(hasher);
1776 }
1777 ASTNodeType::Reference { reference, .. } => {
1778 hasher.write(&[2]); reference.hash(hasher);
1780 }
1781 ASTNodeType::UnaryOp { op, expr } => {
1782 hasher.write(&[3]); hasher.write(op.as_bytes());
1784 expr.hash_node(hasher);
1785 }
1786 ASTNodeType::BinaryOp { op, left, right } => {
1787 hasher.write(&[4]); hasher.write(op.as_bytes());
1789 left.hash_node(hasher);
1790 right.hash_node(hasher);
1791 }
1792 ASTNodeType::Function { name, args } => {
1793 hasher.write(&[5]); let name_lower = name.to_lowercase();
1796 hasher.write(name_lower.as_bytes());
1797 hasher.write_usize(args.len());
1798 for arg in args {
1799 arg.hash_node(hasher);
1800 }
1801 }
1802 ASTNodeType::Call { callee, args } => {
1803 hasher.write(&[7]); callee.hash_node(hasher);
1805 hasher.write_usize(args.len());
1806 for arg in args {
1807 arg.hash_node(hasher);
1808 }
1809 }
1810 ASTNodeType::Array(rows) => {
1811 hasher.write(&[6]); hasher.write_usize(rows.len());
1813 for row in rows {
1814 hasher.write_usize(row.len());
1815 for item in row {
1816 item.hash_node(hasher);
1817 }
1818 }
1819 }
1820 }
1821 }
1822
1823 pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1824 let mut dependencies = Vec::new();
1825 self.collect_dependencies(&mut dependencies);
1826 dependencies
1827 }
1828
1829 pub fn get_dependency_strings(&self) -> Vec<String> {
1830 self.get_dependencies()
1831 .into_iter()
1832 .map(|dep| format!("{dep}"))
1833 .collect()
1834 }
1835
1836 fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1837 match &self.node_type {
1838 ASTNodeType::Reference { reference, .. } => {
1839 dependencies.push(reference);
1840 }
1841 ASTNodeType::UnaryOp { expr, .. } => {
1842 expr.collect_dependencies(dependencies);
1843 }
1844 ASTNodeType::BinaryOp { left, right, .. } => {
1845 left.collect_dependencies(dependencies);
1846 right.collect_dependencies(dependencies);
1847 }
1848 ASTNodeType::Function { args, .. } => {
1849 for arg in args {
1850 arg.collect_dependencies(dependencies);
1851 }
1852 }
1853 ASTNodeType::Call { callee, args } => {
1854 callee.collect_dependencies(dependencies);
1855 for arg in args {
1856 arg.collect_dependencies(dependencies);
1857 }
1858 }
1859 ASTNodeType::Array(rows) => {
1860 for row in rows {
1861 for item in row {
1862 item.collect_dependencies(dependencies);
1863 }
1864 }
1865 }
1866 _ => {}
1867 }
1868 }
1869
1870 pub fn refs(&self) -> RefIter<'_> {
1873 RefIter {
1874 stack: smallvec::smallvec![self],
1875 }
1876 }
1877
1878 pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1880 let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1881 stack.push(self);
1882 while let Some(node) = stack.pop() {
1883 match &node.node_type {
1884 ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1885 ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1886 ASTNodeType::BinaryOp { left, right, .. } => {
1887 stack.push(right);
1889 stack.push(left);
1890 }
1891 ASTNodeType::Function { args, .. } => {
1892 for a in args.iter().rev() {
1893 stack.push(a);
1894 }
1895 }
1896 ASTNodeType::Call { callee, args } => {
1897 for a in args.iter().rev() {
1898 stack.push(a);
1899 }
1900 stack.push(callee);
1901 }
1902 ASTNodeType::Array(rows) => {
1903 for r in rows.iter().rev() {
1904 for item in r.iter().rev() {
1905 stack.push(item);
1906 }
1907 }
1908 }
1909 ASTNodeType::Literal(_) => {}
1910 }
1911 }
1912 }
1913
1914 pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1916 let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1917 self.visit_refs(|rv| match rv {
1918 RefView::Cell {
1919 sheet,
1920 row,
1921 col,
1922 row_abs,
1923 col_abs,
1924 } => out.push(ReferenceType::Cell {
1925 sheet: sheet.map(|s| s.to_string()),
1926 row,
1927 col,
1928 row_abs,
1929 col_abs,
1930 }),
1931 RefView::Range {
1932 sheet,
1933 start_row,
1934 start_col,
1935 end_row,
1936 end_col,
1937 start_row_abs,
1938 start_col_abs,
1939 end_row_abs,
1940 end_col_abs,
1941 } => {
1942 if policy.expand_small_ranges {
1944 if let (Some(sr), Some(sc), Some(er), Some(ec)) =
1945 (start_row, start_col, end_row, end_col)
1946 {
1947 let rows = er.saturating_sub(sr) + 1;
1948 let cols = ec.saturating_sub(sc) + 1;
1949 let area = rows.saturating_mul(cols);
1950 if area as usize <= policy.range_expansion_limit {
1951 let row_abs = start_row_abs && end_row_abs;
1952 let col_abs = start_col_abs && end_col_abs;
1953 for r in sr..=er {
1954 for c in sc..=ec {
1955 out.push(ReferenceType::Cell {
1956 sheet: sheet.map(|s| s.to_string()),
1957 row: r,
1958 col: c,
1959 row_abs,
1960 col_abs,
1961 });
1962 }
1963 }
1964 return; }
1966 }
1967 }
1968 out.push(ReferenceType::Range {
1969 sheet: sheet.map(|s| s.to_string()),
1970 start_row,
1971 start_col,
1972 end_row,
1973 end_col,
1974 start_row_abs,
1975 start_col_abs,
1976 end_row_abs,
1977 end_col_abs,
1978 });
1979 }
1980 RefView::Cell3D {
1981 sheet_first,
1982 sheet_last,
1983 row,
1984 col,
1985 row_abs,
1986 col_abs,
1987 } => out.push(ReferenceType::Cell3D {
1988 sheet_first: sheet_first.to_string(),
1989 sheet_last: sheet_last.to_string(),
1990 row,
1991 col,
1992 row_abs,
1993 col_abs,
1994 }),
1995 RefView::Range3D {
1996 sheet_first,
1997 sheet_last,
1998 start_row,
1999 start_col,
2000 end_row,
2001 end_col,
2002 start_row_abs,
2003 start_col_abs,
2004 end_row_abs,
2005 end_col_abs,
2006 } => out.push(ReferenceType::Range3D {
2007 sheet_first: sheet_first.to_string(),
2008 sheet_last: sheet_last.to_string(),
2009 start_row,
2010 start_col,
2011 end_row,
2012 end_col,
2013 start_row_abs,
2014 start_col_abs,
2015 end_row_abs,
2016 end_col_abs,
2017 }),
2018 RefView::External {
2019 raw,
2020 book,
2021 sheet,
2022 kind,
2023 } => out.push(ReferenceType::External(ExternalReference {
2024 raw: raw.to_string(),
2025 book: ExternalBookRef::Token(book.to_string()),
2026 sheet: sheet.to_string(),
2027 kind,
2028 })),
2029 RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
2030 name: name.to_string(),
2031 specifier: specifier.cloned(),
2032 })),
2033 RefView::NamedRange { name } => {
2034 if policy.include_names {
2035 out.push(ReferenceType::NamedRange(name.to_string()));
2036 }
2037 }
2038 });
2039 out
2040 }
2041 pub fn update_sheet_references(&mut self, target_name: Option<&str>, new_name: &str) {
2047 match &mut self.node_type {
2048 ASTNodeType::Reference {
2049 reference: ReferenceType::Cell { sheet, .. } | ReferenceType::Range { sheet, .. },
2050 ..
2051 } => {
2052 if let Some(current_sheet) = sheet
2053 && (target_name.is_none() || target_name == Some(current_sheet.as_str()))
2054 {
2055 *sheet = Some(new_name.to_string());
2056 }
2057 }
2058 ASTNodeType::Reference {
2059 reference:
2060 ReferenceType::Cell3D {
2061 sheet_first,
2062 sheet_last,
2063 ..
2064 }
2065 | ReferenceType::Range3D {
2066 sheet_first,
2067 sheet_last,
2068 ..
2069 },
2070 ..
2071 } => {
2072 if target_name.is_none() || target_name == Some(sheet_first.as_str()) {
2073 *sheet_first = new_name.to_string();
2074 }
2075 if target_name.is_none() || target_name == Some(sheet_last.as_str()) {
2076 *sheet_last = new_name.to_string();
2077 }
2078 }
2079 ASTNodeType::UnaryOp { expr, .. } => {
2080 expr.update_sheet_references(target_name, new_name);
2081 }
2082 ASTNodeType::BinaryOp { left, right, .. } => {
2083 left.update_sheet_references(target_name, new_name);
2084 right.update_sheet_references(target_name, new_name);
2085 }
2086 ASTNodeType::Function { args, .. } => {
2087 for arg in args {
2088 arg.update_sheet_references(target_name, new_name);
2089 }
2090 }
2091 ASTNodeType::Call { callee, args } => {
2092 callee.update_sheet_references(target_name, new_name);
2093 for arg in args {
2094 arg.update_sheet_references(target_name, new_name);
2095 }
2096 }
2097 ASTNodeType::Array(rows) => {
2098 for row in rows {
2099 for cell in row {
2100 cell.update_sheet_references(target_name, new_name);
2101 }
2102 }
2103 }
2104 _ => {}
2105 }
2106 }
2107}
2108
2109#[derive(Clone, Copy, Debug)]
2111pub enum RefView<'a> {
2112 Cell {
2113 sheet: Option<&'a str>,
2114 row: u32,
2115 col: u32,
2116 row_abs: bool,
2117 col_abs: bool,
2118 },
2119 Range {
2120 sheet: Option<&'a str>,
2121 start_row: Option<u32>,
2122 start_col: Option<u32>,
2123 end_row: Option<u32>,
2124 end_col: Option<u32>,
2125 start_row_abs: bool,
2126 start_col_abs: bool,
2127 end_row_abs: bool,
2128 end_col_abs: bool,
2129 },
2130 Cell3D {
2132 sheet_first: &'a str,
2133 sheet_last: &'a str,
2134 row: u32,
2135 col: u32,
2136 row_abs: bool,
2137 col_abs: bool,
2138 },
2139 Range3D {
2141 sheet_first: &'a str,
2142 sheet_last: &'a str,
2143 start_row: Option<u32>,
2144 start_col: Option<u32>,
2145 end_row: Option<u32>,
2146 end_col: Option<u32>,
2147 start_row_abs: bool,
2148 start_col_abs: bool,
2149 end_row_abs: bool,
2150 end_col_abs: bool,
2151 },
2152 External {
2153 raw: &'a str,
2154 book: &'a str,
2155 sheet: &'a str,
2156 kind: ExternalRefKind,
2157 },
2158 Table {
2159 name: &'a str,
2160 specifier: Option<&'a TableSpecifier>,
2161 },
2162 NamedRange {
2163 name: &'a str,
2164 },
2165}
2166
2167impl<'a> From<&'a ReferenceType> for RefView<'a> {
2168 fn from(r: &'a ReferenceType) -> Self {
2169 match r {
2170 ReferenceType::Cell {
2171 sheet,
2172 row,
2173 col,
2174 row_abs,
2175 col_abs,
2176 } => RefView::Cell {
2177 sheet: sheet.as_deref(),
2178 row: *row,
2179 col: *col,
2180 row_abs: *row_abs,
2181 col_abs: *col_abs,
2182 },
2183 ReferenceType::Range {
2184 sheet,
2185 start_row,
2186 start_col,
2187 end_row,
2188 end_col,
2189 start_row_abs,
2190 start_col_abs,
2191 end_row_abs,
2192 end_col_abs,
2193 } => RefView::Range {
2194 sheet: sheet.as_deref(),
2195 start_row: *start_row,
2196 start_col: *start_col,
2197 end_row: *end_row,
2198 end_col: *end_col,
2199 start_row_abs: *start_row_abs,
2200 start_col_abs: *start_col_abs,
2201 end_row_abs: *end_row_abs,
2202 end_col_abs: *end_col_abs,
2203 },
2204 ReferenceType::Cell3D {
2205 sheet_first,
2206 sheet_last,
2207 row,
2208 col,
2209 row_abs,
2210 col_abs,
2211 } => RefView::Cell3D {
2212 sheet_first: sheet_first.as_str(),
2213 sheet_last: sheet_last.as_str(),
2214 row: *row,
2215 col: *col,
2216 row_abs: *row_abs,
2217 col_abs: *col_abs,
2218 },
2219 ReferenceType::Range3D {
2220 sheet_first,
2221 sheet_last,
2222 start_row,
2223 start_col,
2224 end_row,
2225 end_col,
2226 start_row_abs,
2227 start_col_abs,
2228 end_row_abs,
2229 end_col_abs,
2230 } => RefView::Range3D {
2231 sheet_first: sheet_first.as_str(),
2232 sheet_last: sheet_last.as_str(),
2233 start_row: *start_row,
2234 start_col: *start_col,
2235 end_row: *end_row,
2236 end_col: *end_col,
2237 start_row_abs: *start_row_abs,
2238 start_col_abs: *start_col_abs,
2239 end_row_abs: *end_row_abs,
2240 end_col_abs: *end_col_abs,
2241 },
2242 ReferenceType::External(ext) => RefView::External {
2243 raw: ext.raw.as_str(),
2244 book: ext.book.token(),
2245 sheet: ext.sheet.as_str(),
2246 kind: ext.kind,
2247 },
2248 ReferenceType::Table(tr) => RefView::Table {
2249 name: tr.name.as_str(),
2250 specifier: tr.specifier.as_ref(),
2251 },
2252 ReferenceType::NamedRange(name) => RefView::NamedRange { name },
2253 }
2254 }
2255}
2256
2257pub struct RefIter<'a> {
2259 stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
2260}
2261
2262impl<'a> Iterator for RefIter<'a> {
2263 type Item = RefView<'a>;
2264 fn next(&mut self) -> Option<Self::Item> {
2265 while let Some(node) = self.stack.pop() {
2266 match &node.node_type {
2267 ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
2268 ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
2269 ASTNodeType::BinaryOp { left, right, .. } => {
2270 self.stack.push(right);
2271 self.stack.push(left);
2272 }
2273 ASTNodeType::Function { args, .. } => {
2274 for a in args.iter().rev() {
2275 self.stack.push(a);
2276 }
2277 }
2278 ASTNodeType::Call { callee, args } => {
2279 for a in args.iter().rev() {
2280 self.stack.push(a);
2281 }
2282 self.stack.push(callee);
2283 }
2284 ASTNodeType::Array(rows) => {
2285 for r in rows.iter().rev() {
2286 for item in r.iter().rev() {
2287 self.stack.push(item);
2288 }
2289 }
2290 }
2291 ASTNodeType::Literal(_) => {}
2292 }
2293 }
2294 None
2295 }
2296}
2297
2298#[derive(Debug, Clone)]
2300pub struct CollectPolicy {
2301 pub expand_small_ranges: bool,
2302 pub range_expansion_limit: usize,
2303 pub include_names: bool,
2304}
2305
2306impl Default for CollectPolicy {
2307 fn default() -> Self {
2308 Self {
2309 expand_small_ranges: false,
2310 range_expansion_limit: 0,
2311 include_names: true,
2312 }
2313 }
2314}
2315
2316impl Display for ASTNode {
2317 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2318 write!(f, "{}", self.node_type)
2319 }
2320}
2321
2322impl std::hash::Hash for ASTNode {
2323 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
2324 let hash = self.calculate_hash();
2325 state.write_u64(hash);
2326 }
2327}
2328
2329pub struct Parser {
2331 tokens: Arc<[Token]>,
2332 position: usize,
2333 volatility_classifier: Option<VolatilityClassifierBox>,
2335 dialect: FormulaDialect,
2336 in_call_args_depth: usize,
2339}
2340
2341impl TryFrom<&str> for Parser {
2342 type Error = TokenizerError;
2343
2344 fn try_from(formula: &str) -> Result<Self, Self::Error> {
2345 let tokens = Tokenizer::new(formula)?.items;
2346 Ok(Self::new(tokens, false))
2347 }
2348}
2349
2350impl TryFrom<String> for Parser {
2351 type Error = TokenizerError;
2352
2353 fn try_from(formula: String) -> Result<Self, Self::Error> {
2354 Self::try_from(formula.as_str())
2355 }
2356}
2357
2358impl Parser {
2359 pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
2360 Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
2361 }
2362
2363 pub fn new_with_dialect(
2364 mut tokens: Vec<Token>,
2365 include_whitespace: bool,
2366 dialect: FormulaDialect,
2367 ) -> Self {
2368 if !include_whitespace {
2369 tokens.retain(|t| t.token_type != TokenType::Whitespace);
2370 }
2371
2372 Parser {
2373 tokens: Arc::from(tokens.into_boxed_slice()),
2374 position: 0,
2375 volatility_classifier: None,
2376 dialect,
2377 in_call_args_depth: 0,
2378 }
2379 }
2380
2381 pub fn try_from_formula(formula: &str) -> Result<Self, TokenizerError> {
2382 let tokens = Tokenizer::new(formula)?.items;
2383 Ok(Self::new(tokens, false))
2384 }
2385
2386 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
2389 where
2390 F: Fn(&str) -> bool + Send + Sync + 'static,
2391 {
2392 self.volatility_classifier = Some(Box::new(f));
2393 self
2394 }
2395
2396 pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
2398 where
2399 F: Fn(&str) -> bool + Send + Sync + 'static,
2400 {
2401 Self::new(tokens, include_whitespace).with_volatility_classifier(f)
2402 }
2403
2404 pub fn new_with_classifier_and_dialect<F>(
2405 tokens: Vec<Token>,
2406 include_whitespace: bool,
2407 dialect: FormulaDialect,
2408 f: F,
2409 ) -> Self
2410 where
2411 F: Fn(&str) -> bool + Send + Sync + 'static,
2412 {
2413 Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
2414 }
2415
2416 fn skip_whitespace(&mut self) {
2417 while self.position < self.tokens.len()
2418 && self.tokens[self.position].token_type == TokenType::Whitespace
2419 {
2420 self.position += 1;
2421 }
2422 }
2423
2424 pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
2426 if self.tokens.is_empty() {
2427 return Err(ParserError {
2428 message: "No tokens to parse".to_string(),
2429 position: None,
2430 });
2431 }
2432
2433 self.skip_whitespace();
2434 if self.position >= self.tokens.len() {
2435 return Err(ParserError {
2436 message: "No tokens to parse".to_string(),
2437 position: None,
2438 });
2439 }
2440
2441 if self.tokens[self.position].token_type == TokenType::Literal {
2443 let token = self.tokens[self.position].clone();
2444 self.position += 1;
2445 self.skip_whitespace();
2446 if self.position < self.tokens.len() {
2447 return Err(ParserError {
2448 message: format!(
2449 "Unexpected token at position {}: {:?}",
2450 self.position, self.tokens[self.position]
2451 ),
2452 position: Some(self.position),
2453 });
2454 }
2455 return Ok(ASTNode::new(
2456 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2457 Some(token),
2458 ));
2459 }
2460
2461 let ast = self.parse_expression()?;
2462 self.skip_whitespace();
2463 if self.position < self.tokens.len() {
2464 return Err(ParserError {
2465 message: format!(
2466 "Unexpected token at position {}: {:?}",
2467 self.position, self.tokens[self.position]
2468 ),
2469 position: Some(self.position),
2470 });
2471 }
2472 Ok(ast)
2473 }
2474
2475 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2476 self.parse_bp(0)
2477 }
2478
2479 fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2482 let mut left = self.parse_prefix()?;
2483
2484 loop {
2485 self.skip_whitespace();
2486 if self.position >= self.tokens.len() {
2487 break;
2488 }
2489
2490 if self.tokens[self.position].token_type == TokenType::Paren
2493 && self.tokens[self.position].subtype == TokenSubType::Open
2494 {
2495 self.position += 1;
2496 let args = self.parse_call_arguments()?;
2497 let call_volatile =
2498 left.contains_volatile || args.iter().any(|a| a.contains_volatile);
2499 left = ASTNode::new_with_volatile(
2500 ASTNodeType::Call {
2501 callee: Box::new(left),
2502 args,
2503 },
2504 None,
2505 call_volatile,
2506 );
2507 continue;
2508 }
2509
2510 if self.tokens[self.position].token_type == TokenType::OpPostfix {
2512 let (precedence, _) = self.tokens[self.position]
2513 .get_precedence()
2514 .unwrap_or((0, Associativity::Left));
2515 if precedence < min_precedence {
2516 break;
2517 }
2518
2519 let op_token = self.tokens[self.position].clone();
2520 self.position += 1;
2521 let contains_volatile = left.contains_volatile;
2522 left = ASTNode::new_with_volatile(
2523 ASTNodeType::UnaryOp {
2524 op: op_token.value.clone(),
2525 expr: Box::new(left),
2526 },
2527 Some(op_token),
2528 contains_volatile,
2529 );
2530 continue;
2531 }
2532
2533 let token = &self.tokens[self.position];
2534 if token.token_type != TokenType::OpInfix {
2535 break;
2536 }
2537
2538 if self.in_call_args_depth > 0 && token.value == "," {
2541 break;
2542 }
2543
2544 let (precedence, associativity) =
2545 token.get_precedence().unwrap_or((0, Associativity::Left));
2546 if precedence < min_precedence {
2547 break;
2548 }
2549
2550 let op_token = self.tokens[self.position].clone();
2551 self.position += 1;
2552
2553 let next_min_precedence = if associativity == Associativity::Left {
2554 precedence + 1
2555 } else {
2556 precedence
2557 };
2558
2559 let right = self.parse_bp(next_min_precedence)?;
2560 let contains_volatile = left.contains_volatile || right.contains_volatile;
2561 left = ASTNode::new_with_volatile(
2562 ASTNodeType::BinaryOp {
2563 op: op_token.value.clone(),
2564 left: Box::new(left),
2565 right: Box::new(right),
2566 },
2567 Some(op_token),
2568 contains_volatile,
2569 );
2570 }
2571
2572 Ok(left)
2573 }
2574
2575 fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2576 self.skip_whitespace();
2577 if self.position < self.tokens.len()
2578 && self.tokens[self.position].token_type == TokenType::OpPrefix
2579 {
2580 let op_token = self.tokens[self.position].clone();
2581 self.position += 1;
2582
2583 let (precedence, _) = op_token
2586 .get_precedence()
2587 .unwrap_or((0, Associativity::Right));
2588
2589 let expr = self.parse_bp(precedence)?;
2590 let contains_volatile = expr.contains_volatile;
2591 return Ok(ASTNode::new_with_volatile(
2592 ASTNodeType::UnaryOp {
2593 op: op_token.value.clone(),
2594 expr: Box::new(expr),
2595 },
2596 Some(op_token),
2597 contains_volatile,
2598 ));
2599 }
2600
2601 self.parse_primary()
2602 }
2603
2604 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2605 self.skip_whitespace();
2606 if self.position >= self.tokens.len() {
2607 return Err(ParserError {
2608 message: "Unexpected end of tokens".to_string(),
2609 position: Some(self.position),
2610 });
2611 }
2612
2613 let token = &self.tokens[self.position];
2614 match token.token_type {
2615 TokenType::Operand => {
2616 let operand_token = self.tokens[self.position].clone();
2617 self.position += 1;
2618 self.parse_operand(operand_token)
2619 }
2620 TokenType::Func => {
2621 let func_token = self.tokens[self.position].clone();
2622 self.position += 1;
2623 self.parse_function(func_token)
2624 }
2625 TokenType::Paren if token.subtype == TokenSubType::Open => {
2626 self.position += 1;
2627 let expr = self.parse_expression()?;
2628 if self.position >= self.tokens.len()
2629 || self.tokens[self.position].token_type != TokenType::Paren
2630 || self.tokens[self.position].subtype != TokenSubType::Close
2631 {
2632 return Err(ParserError {
2633 message: "Expected closing parenthesis".to_string(),
2634 position: Some(self.position),
2635 });
2636 }
2637 self.position += 1;
2638 Ok(expr)
2639 }
2640 TokenType::Array if token.subtype == TokenSubType::Open => {
2641 self.position += 1;
2642 self.parse_array()
2643 }
2644 _ => Err(ParserError {
2645 message: format!("Unexpected token: {token:?}"),
2646 position: Some(self.position),
2647 }),
2648 }
2649 }
2650
2651 fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
2652 match token.subtype {
2653 TokenSubType::Number => {
2654 let value = token.value.parse::<f64>().map_err(|_| ParserError {
2655 message: format!("Invalid number: {}", token.value),
2656 position: Some(self.position),
2657 })?;
2658 Ok(ASTNode::new(
2659 ASTNodeType::Literal(LiteralValue::Number(value)),
2660 Some(token),
2661 ))
2662 }
2663 TokenSubType::Text => {
2664 let mut text = token.value.clone();
2666 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2667 text = text[1..text.len() - 1].to_string();
2668 text = text.replace("\"\"", "\"");
2670 }
2671 Ok(ASTNode::new(
2672 ASTNodeType::Literal(LiteralValue::Text(text)),
2673 Some(token),
2674 ))
2675 }
2676 TokenSubType::Logical => {
2677 let value = token.value.eq_ignore_ascii_case("TRUE");
2678 Ok(ASTNode::new(
2679 ASTNodeType::Literal(LiteralValue::Boolean(value)),
2680 Some(token),
2681 ))
2682 }
2683 TokenSubType::Error => {
2684 let error = ExcelError::from_error_string(&token.value);
2685 Ok(ASTNode::new(
2686 ASTNodeType::Literal(LiteralValue::Error(error)),
2687 Some(token),
2688 ))
2689 }
2690 TokenSubType::Range => {
2691 let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
2692 .map_err(|e| ParserError {
2693 message: format!("Invalid reference '{}': {}", token.value, e),
2694 position: Some(self.position),
2695 })?;
2696 Ok(ASTNode::new(
2697 ASTNodeType::Reference {
2698 original: token.value.clone(),
2699 reference,
2700 },
2701 Some(token),
2702 ))
2703 }
2704 _ => Err(ParserError {
2705 message: format!("Unexpected operand subtype: {:?}", token.subtype),
2706 position: Some(self.position),
2707 }),
2708 }
2709 }
2710
2711 fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
2712 let name = func_token.value[..func_token.value.len() - 1].to_string();
2713 let args = self.parse_function_arguments()?;
2714 let this_is_volatile = self
2716 .volatility_classifier
2717 .as_ref()
2718 .map(|f| f(name.as_str()))
2719 .unwrap_or(false);
2720 let args_volatile = args.iter().any(|a| a.contains_volatile);
2721
2722 Ok(ASTNode::new_with_volatile(
2723 ASTNodeType::Function { name, args },
2724 Some(func_token),
2725 this_is_volatile || args_volatile,
2726 ))
2727 }
2728
2729 fn parse_call_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2737 let mut args: Vec<ASTNode> = Vec::new();
2738
2739 self.skip_whitespace();
2740 if self.position < self.tokens.len()
2742 && self.tokens[self.position].token_type == TokenType::Paren
2743 && self.tokens[self.position].subtype == TokenSubType::Close
2744 {
2745 self.position += 1;
2746 return Ok(args);
2747 }
2748
2749 self.in_call_args_depth += 1;
2750 let result = (|| -> Result<Vec<ASTNode>, ParserError> {
2751 args.push(self.parse_expression()?);
2752 loop {
2753 self.skip_whitespace();
2754 if self.position >= self.tokens.len() {
2755 return Err(ParserError {
2756 message: "Unterminated call argument list".to_string(),
2757 position: Some(self.position),
2758 });
2759 }
2760 let token = &self.tokens[self.position];
2761 let is_separator = (token.token_type == TokenType::Sep
2762 && token.subtype == TokenSubType::Arg)
2763 || (token.token_type == TokenType::OpInfix && token.value == ",");
2764 if is_separator {
2765 self.position += 1;
2766 args.push(self.parse_expression()?);
2767 } else if token.token_type == TokenType::Paren
2768 && token.subtype == TokenSubType::Close
2769 {
2770 self.position += 1;
2771 return Ok(std::mem::take(&mut args));
2772 } else {
2773 return Err(ParserError {
2774 message: format!("Expected ',' or ')' in call arguments, got {token:?}"),
2775 position: Some(self.position),
2776 });
2777 }
2778 }
2779 })();
2780 self.in_call_args_depth -= 1;
2781 result
2782 }
2783
2784 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2786 let mut args = Vec::new();
2787
2788 if self.position < self.tokens.len()
2790 && self.tokens[self.position].token_type == TokenType::Func
2791 && self.tokens[self.position].subtype == TokenSubType::Close
2792 {
2793 self.position += 1;
2794 return Ok(args);
2795 }
2796
2797 if self.position < self.tokens.len()
2800 && self.tokens[self.position].token_type == TokenType::Sep
2801 && self.tokens[self.position].subtype == TokenSubType::Arg
2802 {
2803 args.push(ASTNode::new(
2805 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2806 None,
2807 ));
2808 self.position += 1;
2809 } else {
2810 args.push(self.parse_expression()?);
2812 }
2813
2814 while self.position < self.tokens.len() {
2816 let token = &self.tokens[self.position];
2817
2818 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2819 self.position += 1;
2820 if self.position < self.tokens.len() {
2822 let next_token = &self.tokens[self.position];
2823 if next_token.token_type == TokenType::Sep
2824 && next_token.subtype == TokenSubType::Arg
2825 {
2826 args.push(ASTNode::new(
2828 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2829 None,
2830 ));
2831 } else if next_token.token_type == TokenType::Func
2832 && next_token.subtype == TokenSubType::Close
2833 {
2834 args.push(ASTNode::new(
2836 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2837 None,
2838 ));
2839 self.position += 1;
2840 break;
2841 } else {
2842 args.push(self.parse_expression()?);
2843 }
2844 } else {
2845 args.push(ASTNode::new(
2847 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2848 None,
2849 ));
2850 }
2851 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2852 self.position += 1;
2853 break;
2854 } else {
2855 return Err(ParserError {
2856 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2857 position: Some(self.position),
2858 });
2859 }
2860 }
2861
2862 Ok(args)
2863 }
2864
2865 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2866 let mut rows = Vec::new();
2867 let mut current_row = Vec::new();
2868
2869 if self.position < self.tokens.len()
2871 && self.tokens[self.position].token_type == TokenType::Array
2872 && self.tokens[self.position].subtype == TokenSubType::Close
2873 {
2874 self.position += 1;
2875 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2876 }
2877
2878 current_row.push(self.parse_expression()?);
2880
2881 while self.position < self.tokens.len() {
2882 let token = &self.tokens[self.position];
2883
2884 if token.token_type == TokenType::Sep {
2885 if token.subtype == TokenSubType::Arg {
2886 self.position += 1;
2888 current_row.push(self.parse_expression()?);
2889 } else if token.subtype == TokenSubType::Row {
2890 self.position += 1;
2892 rows.push(current_row);
2893 current_row = vec![self.parse_expression()?];
2894 }
2895 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2896 self.position += 1;
2897 rows.push(current_row);
2898 break;
2899 } else {
2900 return Err(ParserError {
2901 message: format!("Unexpected token in array: {token:?}"),
2902 position: Some(self.position),
2903 });
2904 }
2905 }
2906
2907 let contains_volatile = rows
2909 .iter()
2910 .flat_map(|r| r.iter())
2911 .any(|n| n.contains_volatile);
2912 Ok(ASTNode::new_with_volatile(
2913 ASTNodeType::Array(rows),
2914 None,
2915 contains_volatile,
2916 ))
2917 }
2918}
2919
2920impl From<TokenizerError> for ParserError {
2921 fn from(err: TokenizerError) -> Self {
2922 ParserError {
2923 message: err.message,
2924 position: Some(err.pos),
2925 }
2926 }
2927}
2928
2929struct SpanParser<'a> {
2930 source: &'a str,
2931 tokens: &'a [crate::tokenizer::TokenSpan],
2932 position: usize,
2933 volatility_classifier: Option<VolatilityClassifierBox>,
2934 dialect: FormulaDialect,
2935 in_call_args_depth: usize,
2937}
2938
2939impl<'a> SpanParser<'a> {
2940 fn new(
2941 source: &'a str,
2942 tokens: &'a [crate::tokenizer::TokenSpan],
2943 dialect: FormulaDialect,
2944 ) -> Self {
2945 SpanParser {
2946 source,
2947 tokens,
2948 position: 0,
2949 volatility_classifier: None,
2950 dialect,
2951 in_call_args_depth: 0,
2952 }
2953 }
2954
2955 fn with_volatility_classifier<F>(mut self, f: F) -> Self
2956 where
2957 F: Fn(&str) -> bool + Send + Sync + 'static,
2958 {
2959 self.volatility_classifier = Some(Box::new(f));
2960 self
2961 }
2962
2963 fn skip_whitespace(&mut self) {
2964 while self.position < self.tokens.len()
2965 && self.tokens[self.position].token_type == TokenType::Whitespace
2966 {
2967 self.position += 1;
2968 }
2969 }
2970
2971 fn span_value(&self, span: &crate::tokenizer::TokenSpan) -> &str {
2972 &self.source[span.start..span.end]
2973 }
2974
2975 fn span_to_token(&self, span: &crate::tokenizer::TokenSpan) -> Token {
2976 Token::new_with_span(
2977 self.span_value(span).to_string(),
2978 span.token_type,
2979 span.subtype,
2980 span.start,
2981 span.end,
2982 )
2983 }
2984
2985 fn span_precedence(&self, span: &crate::tokenizer::TokenSpan) -> Option<(u8, Associativity)> {
2986 if !matches!(
2987 span.token_type,
2988 TokenType::OpPrefix | TokenType::OpInfix | TokenType::OpPostfix
2989 ) {
2990 return None;
2991 }
2992
2993 let op = if span.token_type == TokenType::OpPrefix {
2994 "u"
2995 } else {
2996 self.span_value(span)
2997 };
2998
2999 match op {
3000 "#" => Some((11, Associativity::Left)),
3001 ":" => Some((10, Associativity::Left)),
3002 " " => Some((9, Associativity::Left)),
3003 "," => Some((8, Associativity::Left)),
3004 "%" => Some((7, Associativity::Left)),
3005 "u" => Some((6, Associativity::Right)),
3006 "^" => Some((5, Associativity::Right)),
3007 "*" | "/" => Some((4, Associativity::Left)),
3008 "+" | "-" => Some((3, Associativity::Left)),
3009 "&" => Some((2, Associativity::Left)),
3010 "=" | "<" | ">" | "<=" | ">=" | "<>" => Some((1, Associativity::Left)),
3011 _ => None,
3012 }
3013 }
3014
3015 fn parse(&mut self) -> Result<ASTNode, ParserError> {
3016 if self.tokens.is_empty() {
3017 return Err(ParserError {
3018 message: "No tokens to parse".to_string(),
3019 position: None,
3020 });
3021 }
3022
3023 self.skip_whitespace();
3024 if self.position >= self.tokens.len() {
3025 return Err(ParserError {
3026 message: "No tokens to parse".to_string(),
3027 position: None,
3028 });
3029 }
3030
3031 if self.tokens[self.position].token_type == TokenType::Literal {
3032 let span = self.tokens[self.position];
3033 self.position += 1;
3034 self.skip_whitespace();
3035 if self.position < self.tokens.len() {
3036 return Err(ParserError {
3037 message: format!(
3038 "Unexpected token at position {}: {:?}",
3039 self.position, self.tokens[self.position]
3040 ),
3041 position: Some(self.position),
3042 });
3043 }
3044
3045 let token = self.span_to_token(&span);
3046 return Ok(ASTNode::new(
3047 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
3048 Some(token),
3049 ));
3050 }
3051
3052 let ast = self.parse_expression()?;
3053 self.skip_whitespace();
3054 if self.position < self.tokens.len() {
3055 return Err(ParserError {
3056 message: format!(
3057 "Unexpected token at position {}: {:?}",
3058 self.position, self.tokens[self.position]
3059 ),
3060 position: Some(self.position),
3061 });
3062 }
3063 Ok(ast)
3064 }
3065
3066 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
3067 self.parse_bp(0)
3068 }
3069
3070 fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
3071 let mut left = self.parse_prefix()?;
3072
3073 loop {
3074 self.skip_whitespace();
3075 if self.position >= self.tokens.len() {
3076 break;
3077 }
3078
3079 if self.tokens[self.position].token_type == TokenType::Paren
3082 && self.tokens[self.position].subtype == TokenSubType::Open
3083 {
3084 self.position += 1;
3085 let args = self.parse_call_arguments()?;
3086 let call_volatile =
3087 left.contains_volatile || args.iter().any(|a| a.contains_volatile);
3088 left = ASTNode::new_with_volatile(
3089 ASTNodeType::Call {
3090 callee: Box::new(left),
3091 args,
3092 },
3093 None,
3094 call_volatile,
3095 );
3096 continue;
3097 }
3098
3099 if self.tokens[self.position].token_type == TokenType::OpPostfix {
3100 let (precedence, _) = self
3101 .span_precedence(&self.tokens[self.position])
3102 .unwrap_or((0, Associativity::Left));
3103 if precedence < min_precedence {
3104 break;
3105 }
3106
3107 let op_span = self.tokens[self.position];
3108 self.position += 1;
3109 let op_token = self.span_to_token(&op_span);
3110 let contains_volatile = left.contains_volatile;
3111 left = ASTNode::new_with_volatile(
3112 ASTNodeType::UnaryOp {
3113 op: op_token.value.clone(),
3114 expr: Box::new(left),
3115 },
3116 Some(op_token),
3117 contains_volatile,
3118 );
3119 continue;
3120 }
3121
3122 let token = &self.tokens[self.position];
3123 if token.token_type != TokenType::OpInfix {
3124 break;
3125 }
3126
3127 if self.in_call_args_depth > 0 && self.span_value(token) == "," {
3130 break;
3131 }
3132
3133 let (precedence, associativity) = self
3134 .span_precedence(token)
3135 .unwrap_or((0, Associativity::Left));
3136 if precedence < min_precedence {
3137 break;
3138 }
3139
3140 let op_span = self.tokens[self.position];
3141 self.position += 1;
3142
3143 let next_min_precedence = if associativity == Associativity::Left {
3144 precedence + 1
3145 } else {
3146 precedence
3147 };
3148
3149 let right = self.parse_bp(next_min_precedence)?;
3150 let op_token = self.span_to_token(&op_span);
3151 let contains_volatile = left.contains_volatile || right.contains_volatile;
3152 left = ASTNode::new_with_volatile(
3153 ASTNodeType::BinaryOp {
3154 op: op_token.value.clone(),
3155 left: Box::new(left),
3156 right: Box::new(right),
3157 },
3158 Some(op_token),
3159 contains_volatile,
3160 );
3161 }
3162
3163 Ok(left)
3164 }
3165
3166 fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
3167 self.skip_whitespace();
3168 if self.position < self.tokens.len()
3169 && self.tokens[self.position].token_type == TokenType::OpPrefix
3170 {
3171 let op_span = self.tokens[self.position];
3172 self.position += 1;
3173
3174 let (precedence, _) = self
3175 .span_precedence(&op_span)
3176 .unwrap_or((0, Associativity::Right));
3177
3178 let expr = self.parse_bp(precedence)?;
3179 let op_token = self.span_to_token(&op_span);
3180 let contains_volatile = expr.contains_volatile;
3181 return Ok(ASTNode::new_with_volatile(
3182 ASTNodeType::UnaryOp {
3183 op: op_token.value.clone(),
3184 expr: Box::new(expr),
3185 },
3186 Some(op_token),
3187 contains_volatile,
3188 ));
3189 }
3190
3191 self.parse_primary()
3192 }
3193
3194 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
3195 self.skip_whitespace();
3196 if self.position >= self.tokens.len() {
3197 return Err(ParserError {
3198 message: "Unexpected end of tokens".to_string(),
3199 position: Some(self.position),
3200 });
3201 }
3202
3203 let token = &self.tokens[self.position];
3204 match token.token_type {
3205 TokenType::Operand => {
3206 let span = self.tokens[self.position];
3207 self.position += 1;
3208 self.parse_operand(span)
3209 }
3210 TokenType::Func => {
3211 let span = self.tokens[self.position];
3212 self.position += 1;
3213 self.parse_function(span)
3214 }
3215 TokenType::Paren if token.subtype == TokenSubType::Open => {
3216 self.position += 1;
3217 let expr = self.parse_expression()?;
3218 self.skip_whitespace();
3219 if self.position >= self.tokens.len()
3220 || self.tokens[self.position].token_type != TokenType::Paren
3221 || self.tokens[self.position].subtype != TokenSubType::Close
3222 {
3223 return Err(ParserError {
3224 message: "Expected closing parenthesis".to_string(),
3225 position: Some(self.position),
3226 });
3227 }
3228 self.position += 1;
3229 Ok(expr)
3230 }
3231 TokenType::Array if token.subtype == TokenSubType::Open => {
3232 self.position += 1;
3233 self.parse_array()
3234 }
3235 _ => Err(ParserError {
3236 message: format!("Unexpected token: {token:?}"),
3237 position: Some(self.position),
3238 }),
3239 }
3240 }
3241
3242 fn parse_operand(&mut self, span: crate::tokenizer::TokenSpan) -> Result<ASTNode, ParserError> {
3243 let value = self.span_value(&span);
3244 let token = self.span_to_token(&span);
3245
3246 match span.subtype {
3247 TokenSubType::Number => {
3248 let value = value.parse::<f64>().map_err(|_| ParserError {
3249 message: format!("Invalid number: {value}"),
3250 position: Some(self.position),
3251 })?;
3252 Ok(ASTNode::new(
3253 ASTNodeType::Literal(LiteralValue::Number(value)),
3254 Some(token),
3255 ))
3256 }
3257 TokenSubType::Text => {
3258 let mut text = value.to_string();
3259 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
3260 text = text[1..text.len() - 1].to_string();
3261 text = text.replace("\"\"", "\"");
3262 }
3263 Ok(ASTNode::new(
3264 ASTNodeType::Literal(LiteralValue::Text(text)),
3265 Some(token),
3266 ))
3267 }
3268 TokenSubType::Logical => {
3269 let v = value.eq_ignore_ascii_case("TRUE");
3270 Ok(ASTNode::new(
3271 ASTNodeType::Literal(LiteralValue::Boolean(v)),
3272 Some(token),
3273 ))
3274 }
3275 TokenSubType::Error => {
3276 let error = ExcelError::from_error_string(value);
3277 Ok(ASTNode::new(
3278 ASTNodeType::Literal(LiteralValue::Error(error)),
3279 Some(token),
3280 ))
3281 }
3282 TokenSubType::Range => {
3283 let reference = ReferenceType::from_string_with_dialect(value, self.dialect)
3284 .map_err(|e| ParserError {
3285 message: format!("Invalid reference '{value}': {e}"),
3286 position: Some(self.position),
3287 })?;
3288 Ok(ASTNode::new(
3289 ASTNodeType::Reference {
3290 original: value.to_string(),
3291 reference,
3292 },
3293 Some(token),
3294 ))
3295 }
3296 _ => Err(ParserError {
3297 message: format!("Unexpected operand subtype: {:?}", span.subtype),
3298 position: Some(self.position),
3299 }),
3300 }
3301 }
3302
3303 fn parse_function(
3304 &mut self,
3305 func_span: crate::tokenizer::TokenSpan,
3306 ) -> Result<ASTNode, ParserError> {
3307 let func_value = self.span_value(&func_span);
3308 if func_value.is_empty() {
3309 return Err(ParserError {
3310 message: "Invalid function token".to_string(),
3311 position: Some(self.position),
3312 });
3313 }
3314 let name = func_value[..func_value.len() - 1].to_string();
3315 let args = self.parse_function_arguments()?;
3316
3317 let this_is_volatile = self
3318 .volatility_classifier
3319 .as_ref()
3320 .map(|f| f(name.as_str()))
3321 .unwrap_or(false);
3322 let args_volatile = args.iter().any(|a| a.contains_volatile);
3323
3324 let func_token = self.span_to_token(&func_span);
3325 Ok(ASTNode::new_with_volatile(
3326 ASTNodeType::Function { name, args },
3327 Some(func_token),
3328 this_is_volatile || args_volatile,
3329 ))
3330 }
3331
3332 fn parse_call_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
3337 let mut args: Vec<ASTNode> = Vec::new();
3338
3339 self.skip_whitespace();
3340 if self.position < self.tokens.len()
3341 && self.tokens[self.position].token_type == TokenType::Paren
3342 && self.tokens[self.position].subtype == TokenSubType::Close
3343 {
3344 self.position += 1;
3345 return Ok(args);
3346 }
3347
3348 self.in_call_args_depth += 1;
3349 let result = (|| -> Result<Vec<ASTNode>, ParserError> {
3350 args.push(self.parse_expression()?);
3351 loop {
3352 self.skip_whitespace();
3353 if self.position >= self.tokens.len() {
3354 return Err(ParserError {
3355 message: "Unterminated call argument list".to_string(),
3356 position: Some(self.position),
3357 });
3358 }
3359 let token = &self.tokens[self.position];
3360 let is_separator = (token.token_type == TokenType::Sep
3361 && token.subtype == TokenSubType::Arg)
3362 || (token.token_type == TokenType::OpInfix && self.span_value(token) == ",");
3363 if is_separator {
3364 self.position += 1;
3365 args.push(self.parse_expression()?);
3366 } else if token.token_type == TokenType::Paren
3367 && token.subtype == TokenSubType::Close
3368 {
3369 self.position += 1;
3370 return Ok(std::mem::take(&mut args));
3371 } else {
3372 return Err(ParserError {
3373 message: format!("Expected ',' or ')' in call arguments, got {token:?}"),
3374 position: Some(self.position),
3375 });
3376 }
3377 }
3378 })();
3379 self.in_call_args_depth -= 1;
3380 result
3381 }
3382
3383 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
3384 let mut args = Vec::new();
3385
3386 self.skip_whitespace();
3387 if self.position < self.tokens.len()
3388 && self.tokens[self.position].token_type == TokenType::Func
3389 && self.tokens[self.position].subtype == TokenSubType::Close
3390 {
3391 self.position += 1;
3392 return Ok(args);
3393 }
3394
3395 self.skip_whitespace();
3396 if self.position < self.tokens.len()
3397 && self.tokens[self.position].token_type == TokenType::Sep
3398 && self.tokens[self.position].subtype == TokenSubType::Arg
3399 {
3400 args.push(ASTNode::new(
3401 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
3402 None,
3403 ));
3404 self.position += 1;
3405 } else {
3406 args.push(self.parse_expression()?);
3407 }
3408
3409 while self.position < self.tokens.len() {
3410 self.skip_whitespace();
3411 if self.position >= self.tokens.len() {
3412 break;
3413 }
3414
3415 let token = &self.tokens[self.position];
3416 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
3417 self.position += 1;
3418 self.skip_whitespace();
3419 if self.position < self.tokens.len() {
3420 let next_token = &self.tokens[self.position];
3421 if next_token.token_type == TokenType::Sep
3422 && next_token.subtype == TokenSubType::Arg
3423 {
3424 args.push(ASTNode::new(
3425 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
3426 None,
3427 ));
3428 } else if next_token.token_type == TokenType::Func
3429 && next_token.subtype == TokenSubType::Close
3430 {
3431 args.push(ASTNode::new(
3432 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
3433 None,
3434 ));
3435 self.position += 1;
3436 break;
3437 } else {
3438 args.push(self.parse_expression()?);
3439 }
3440 } else {
3441 args.push(ASTNode::new(
3442 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
3443 None,
3444 ));
3445 }
3446 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
3447 self.position += 1;
3448 break;
3449 } else {
3450 return Err(ParserError {
3451 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
3452 position: Some(self.position),
3453 });
3454 }
3455 }
3456
3457 Ok(args)
3458 }
3459
3460 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
3461 let mut rows = Vec::new();
3462 let mut current_row = Vec::new();
3463
3464 self.skip_whitespace();
3465 if self.position < self.tokens.len()
3466 && self.tokens[self.position].token_type == TokenType::Array
3467 && self.tokens[self.position].subtype == TokenSubType::Close
3468 {
3469 self.position += 1;
3470 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
3471 }
3472
3473 current_row.push(self.parse_expression()?);
3474
3475 while self.position < self.tokens.len() {
3476 self.skip_whitespace();
3477 if self.position >= self.tokens.len() {
3478 break;
3479 }
3480 let token = &self.tokens[self.position];
3481
3482 if token.token_type == TokenType::Sep {
3483 if token.subtype == TokenSubType::Arg {
3484 self.position += 1;
3485 current_row.push(self.parse_expression()?);
3486 } else if token.subtype == TokenSubType::Row {
3487 self.position += 1;
3488 rows.push(current_row);
3489 current_row = vec![self.parse_expression()?];
3490 }
3491 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
3492 self.position += 1;
3493 rows.push(current_row);
3494 break;
3495 } else {
3496 return Err(ParserError {
3497 message: format!("Unexpected token in array: {token:?}"),
3498 position: Some(self.position),
3499 });
3500 }
3501 }
3502
3503 let contains_volatile = rows
3504 .iter()
3505 .flat_map(|r| r.iter())
3506 .any(|n| n.contains_volatile);
3507
3508 Ok(ASTNode::new_with_volatile(
3509 ASTNodeType::Array(rows),
3510 None,
3511 contains_volatile,
3512 ))
3513 }
3514}
3515
3516pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
3518 let ref_type = ReferenceType::from_string(reference)?;
3519 Ok(ref_type.to_string())
3520}
3521
3522pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
3523 parse_with_dialect(formula, FormulaDialect::Excel)
3524}
3525
3526pub fn parse_with_dialect<T: AsRef<str>>(
3527 formula: T,
3528 dialect: FormulaDialect,
3529) -> Result<ASTNode, ParserError> {
3530 let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
3531 let mut parser = SpanParser::new(formula.as_ref(), &spans, dialect);
3532 parser.parse()
3533}
3534
3535pub fn parse_with_volatility_classifier<T, F>(
3538 formula: T,
3539 classifier: F,
3540) -> Result<ASTNode, ParserError>
3541where
3542 T: AsRef<str>,
3543 F: Fn(&str) -> bool + Send + Sync + 'static,
3544{
3545 parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
3546}
3547
3548pub fn parse_with_dialect_and_volatility_classifier<T, F>(
3549 formula: T,
3550 dialect: FormulaDialect,
3551 classifier: F,
3552) -> Result<ASTNode, ParserError>
3553where
3554 T: AsRef<str>,
3555 F: Fn(&str) -> bool + Send + Sync + 'static,
3556{
3557 let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
3558 let mut parser =
3559 SpanParser::new(formula.as_ref(), &spans, dialect).with_volatility_classifier(classifier);
3560 parser.parse()
3561}
3562
3563pub struct BatchParser {
3568 include_whitespace: bool,
3569 volatility_classifier: Option<VolatilityClassifierArc>,
3570 token_cache: std::collections::HashMap<String, Arc<[crate::tokenizer::TokenSpan]>>, dialect: FormulaDialect,
3572}
3573
3574impl BatchParser {
3575 pub fn builder() -> BatchParserBuilder {
3576 BatchParserBuilder::default()
3577 }
3578
3579 pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
3581 let spans = if let Some(tokens) = self.token_cache.get(formula) {
3582 Arc::clone(tokens)
3583 } else {
3584 let mut spans = crate::tokenizer::tokenize_spans_with_dialect(formula, self.dialect)?;
3585 if !self.include_whitespace {
3586 spans.retain(|t| t.token_type != TokenType::Whitespace);
3587 }
3588
3589 let spans: Arc<[crate::tokenizer::TokenSpan]> = Arc::from(spans.into_boxed_slice());
3590 self.token_cache
3591 .insert(formula.to_string(), Arc::clone(&spans));
3592 spans
3593 };
3594
3595 let mut parser = SpanParser::new(formula, spans.as_ref(), self.dialect);
3596 if let Some(classifier) = self.volatility_classifier.clone() {
3597 parser = parser.with_volatility_classifier(move |name| classifier(name));
3598 }
3599 parser.parse()
3600 }
3601}
3602
3603#[derive(Default)]
3604pub struct BatchParserBuilder {
3605 include_whitespace: bool,
3606 volatility_classifier: Option<VolatilityClassifierArc>,
3607 dialect: FormulaDialect,
3608}
3609
3610impl BatchParserBuilder {
3611 pub fn include_whitespace(mut self, include: bool) -> Self {
3612 self.include_whitespace = include;
3613 self
3614 }
3615
3616 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
3617 where
3618 F: Fn(&str) -> bool + Send + Sync + 'static,
3619 {
3620 self.volatility_classifier = Some(Arc::new(f));
3621 self
3622 }
3623
3624 pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
3625 self.dialect = dialect;
3626 self
3627 }
3628
3629 pub fn build(self) -> BatchParser {
3630 BatchParser {
3631 include_whitespace: self.include_whitespace,
3632 volatility_classifier: self.volatility_classifier,
3633 token_cache: std::collections::HashMap::new(),
3634 dialect: self.dialect,
3635 }
3636 }
3637}