1use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
2use crate::types::{FormulaDialect, ParsingError};
3use crate::{ExcelError, LiteralValue};
4
5#[cfg(feature = "serde")]
6use serde::{Deserialize, Serialize};
7
8use crate::hasher::FormulaHasher;
9use formualizer_common::coord::{
10 col_index_from_letters_1based, col_letters_from_1based, parse_a1_1based,
11};
12use formualizer_common::{
13 AxisBound, RelativeCoord, SheetCellRef, SheetLocator, SheetRangeRef, SheetRef,
14};
15use once_cell::sync::Lazy;
16use smallvec::SmallVec;
17use std::error::Error;
18use std::fmt::{self, Display};
19use std::hash::{Hash, Hasher};
20use std::str::FromStr;
21use std::sync::Arc;
22
23type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
24type VolatilityClassifierBox = Box<VolatilityFn>;
25type VolatilityClassifierArc = Arc<VolatilityFn>;
26
27#[derive(Debug)]
29pub struct ParserError {
30 pub message: String,
31 pub position: Option<usize>,
32}
33
34impl Display for ParserError {
35 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36 if let Some(pos) = self.position {
37 write!(f, "ParserError at position {}: {}", pos, self.message)
38 } else {
39 write!(f, "ParserError: {}", self.message)
40 }
41 }
42}
43
44impl Error for ParserError {}
45
46static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
48 let mut cols = Vec::with_capacity(702);
49 for c in b'A'..=b'Z' {
51 cols.push(String::from(c as char));
52 }
53 for c1 in b'A'..=b'Z' {
55 for c2 in b'A'..=b'Z' {
56 cols.push(format!("{}{}", c1 as char, c2 as char));
57 }
58 }
59 cols
60});
61
62#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
64#[derive(Debug, Clone, PartialEq, Hash)]
65pub enum TableSpecifier {
66 All,
68 Data,
70 Headers,
72 Totals,
74 Row(TableRowSpecifier),
76 Column(String),
78 ColumnRange(String, String),
80 SpecialItem(SpecialItem),
82 Combination(Vec<Box<TableSpecifier>>),
84}
85
86#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
88#[derive(Debug, Clone, PartialEq, Hash)]
89pub enum TableRowSpecifier {
90 Current,
92 All,
94 Data,
96 Headers,
98 Totals,
100 Index(u32),
102}
103
104#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
106#[derive(Debug, Clone, PartialEq, Hash)]
107pub enum SpecialItem {
108 Headers,
110 Data,
112 Totals,
114 All,
116 ThisRow,
118}
119
120#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
122#[derive(Debug, Clone, PartialEq, Hash)]
123pub struct TableReference {
124 pub name: String,
126 pub specifier: Option<TableSpecifier>,
128}
129
130#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
131#[derive(Debug, Clone, PartialEq, Hash)]
132pub enum ExternalBookRef {
133 Token(String),
134}
135
136impl ExternalBookRef {
137 pub fn token(&self) -> &str {
138 match self {
139 ExternalBookRef::Token(s) => s,
140 }
141 }
142}
143
144#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
146pub enum ExternalRefKind {
147 Cell {
148 row: u32,
149 col: u32,
150 row_abs: bool,
151 col_abs: bool,
152 },
153 Range {
154 start_row: Option<u32>,
155 start_col: Option<u32>,
156 end_row: Option<u32>,
157 end_col: Option<u32>,
158 start_row_abs: bool,
159 start_col_abs: bool,
160 end_row_abs: bool,
161 end_col_abs: bool,
162 },
163}
164
165impl ExternalRefKind {
166 pub fn cell(row: u32, col: u32) -> Self {
167 Self::Cell {
168 row,
169 col,
170 row_abs: false,
171 col_abs: false,
172 }
173 }
174
175 pub fn cell_with_abs(row: u32, col: u32, row_abs: bool, col_abs: bool) -> Self {
176 Self::Cell {
177 row,
178 col,
179 row_abs,
180 col_abs,
181 }
182 }
183
184 pub fn range(
185 start_row: Option<u32>,
186 start_col: Option<u32>,
187 end_row: Option<u32>,
188 end_col: Option<u32>,
189 ) -> Self {
190 Self::Range {
191 start_row,
192 start_col,
193 end_row,
194 end_col,
195 start_row_abs: false,
196 start_col_abs: false,
197 end_row_abs: false,
198 end_col_abs: false,
199 }
200 }
201
202 #[allow(clippy::too_many_arguments)]
205 pub fn range_with_abs(
206 start_row: Option<u32>,
207 start_col: Option<u32>,
208 end_row: Option<u32>,
209 end_col: Option<u32>,
210 start_row_abs: bool,
211 start_col_abs: bool,
212 end_row_abs: bool,
213 end_col_abs: bool,
214 ) -> Self {
215 Self::Range {
216 start_row,
217 start_col,
218 end_row,
219 end_col,
220 start_row_abs,
221 start_col_abs,
222 end_row_abs,
223 end_col_abs,
224 }
225 }
226}
227
228#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
229#[derive(Debug, Clone, PartialEq, Hash)]
230pub struct ExternalReference {
231 pub raw: String,
232 pub book: ExternalBookRef,
233 pub sheet: String,
234 pub kind: ExternalRefKind,
235}
236
237#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
239#[derive(Debug, Clone, PartialEq, Hash)]
240pub enum ReferenceType {
241 Cell {
242 sheet: Option<String>,
243 row: u32,
244 col: u32,
245 row_abs: bool,
246 col_abs: bool,
247 },
248 Range {
249 sheet: Option<String>,
250 start_row: Option<u32>,
251 start_col: Option<u32>,
252 end_row: Option<u32>,
253 end_col: Option<u32>,
254 start_row_abs: bool,
255 start_col_abs: bool,
256 end_row_abs: bool,
257 end_col_abs: bool,
258 },
259 External(ExternalReference),
260 Table(TableReference),
261 NamedRange(String),
262}
263
264impl Display for TableSpecifier {
265 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266 match self {
267 TableSpecifier::All => write!(f, "#All"),
268 TableSpecifier::Data => write!(f, "#Data"),
269 TableSpecifier::Headers => write!(f, "#Headers"),
270 TableSpecifier::Totals => write!(f, "#Totals"),
271 TableSpecifier::Row(row) => write!(f, "{row}"),
272 TableSpecifier::Column(column) => write!(f, "{column}"),
273 TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
274 TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
275 TableSpecifier::Combination(specs) => {
276 let parts: Vec<String> = specs.iter().map(|s| format!("[{s}]")).collect();
279 write!(f, "{}", parts.join(","))
280 }
281 }
282 }
283}
284
285impl Display for TableRowSpecifier {
286 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
287 match self {
288 TableRowSpecifier::Current => write!(f, "@"),
289 TableRowSpecifier::All => write!(f, "#All"),
290 TableRowSpecifier::Data => write!(f, "#Data"),
291 TableRowSpecifier::Headers => write!(f, "#Headers"),
292 TableRowSpecifier::Totals => write!(f, "#Totals"),
293 TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
294 }
295 }
296}
297
298impl Display for SpecialItem {
299 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300 match self {
301 SpecialItem::Headers => write!(f, "#Headers"),
302 SpecialItem::Data => write!(f, "#Data"),
303 SpecialItem::Totals => write!(f, "#Totals"),
304 SpecialItem::All => write!(f, "#All"),
305 SpecialItem::ThisRow => write!(f, "@"),
306 }
307 }
308}
309
310fn sheet_name_needs_quoting(name: &str) -> bool {
312 if name.is_empty() {
313 return false;
314 }
315
316 let bytes = name.as_bytes();
317
318 if bytes[0].is_ascii_digit() {
320 return true;
321 }
322
323 for &byte in bytes {
326 match byte {
327 b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
328 | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
329 | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
330 _ => {}
331 }
332 }
333
334 let upper = name.to_uppercase();
336 matches!(
337 upper.as_str(),
338 "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
339 )
340}
341
342#[derive(Debug, Clone)]
343struct OpenFormulaRefPart {
344 sheet: Option<String>,
345 coord: String,
346}
347
348type AxisPartWithAbs = Option<(u32, bool)>;
349type RangePartWithAbs = (AxisPartWithAbs, AxisPartWithAbs);
350
351impl ReferenceType {
352 pub fn cell(sheet: Option<String>, row: u32, col: u32) -> Self {
354 Self::Cell {
355 sheet,
356 row,
357 col,
358 row_abs: false,
359 col_abs: false,
360 }
361 }
362
363 pub fn cell_with_abs(
365 sheet: Option<String>,
366 row: u32,
367 col: u32,
368 row_abs: bool,
369 col_abs: bool,
370 ) -> Self {
371 Self::Cell {
372 sheet,
373 row,
374 col,
375 row_abs,
376 col_abs,
377 }
378 }
379
380 pub fn range(
382 sheet: Option<String>,
383 start_row: Option<u32>,
384 start_col: Option<u32>,
385 end_row: Option<u32>,
386 end_col: Option<u32>,
387 ) -> Self {
388 Self::Range {
389 sheet,
390 start_row,
391 start_col,
392 end_row,
393 end_col,
394 start_row_abs: false,
395 start_col_abs: false,
396 end_row_abs: false,
397 end_col_abs: false,
398 }
399 }
400
401 #[allow(clippy::too_many_arguments)]
405 pub fn range_with_abs(
406 sheet: Option<String>,
407 start_row: Option<u32>,
408 start_col: Option<u32>,
409 end_row: Option<u32>,
410 end_col: Option<u32>,
411 start_row_abs: bool,
412 start_col_abs: bool,
413 end_row_abs: bool,
414 end_col_abs: bool,
415 ) -> Self {
416 Self::Range {
417 sheet,
418 start_row,
419 start_col,
420 end_row,
421 end_col,
422 start_row_abs,
423 start_col_abs,
424 end_row_abs,
425 end_col_abs,
426 }
427 }
428
429 pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
431 Self::parse_excel_reference(reference)
432 }
433
434 pub fn from_string_with_dialect(
436 reference: &str,
437 dialect: FormulaDialect,
438 ) -> Result<Self, ParsingError> {
439 match dialect {
440 FormulaDialect::Excel => Self::parse_excel_reference(reference),
441 FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
442 .or_else(|_| Self::parse_excel_reference(reference)),
443 }
444 }
445
446 pub fn parse_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
450 Self::parse_sheet_ref_with_dialect(reference, FormulaDialect::Excel)
451 }
452
453 pub fn parse_sheet_ref_with_dialect(
455 reference: &str,
456 dialect: FormulaDialect,
457 ) -> Result<SheetRef<'static>, ParsingError> {
458 match dialect {
459 FormulaDialect::Excel => Self::parse_excel_sheet_ref(reference),
460 FormulaDialect::OpenFormula => Self::parse_openformula_sheet_ref(reference)
461 .or_else(|_| Self::parse_excel_sheet_ref(reference)),
462 }
463 }
464
465 pub fn to_sheet_ref_lossy(&self) -> Option<SheetRef<'_>> {
468 match self {
469 ReferenceType::Cell {
470 sheet,
471 row,
472 col,
473 row_abs,
474 col_abs,
475 } => {
476 let row0 = row.checked_sub(1)?;
477 let col0 = col.checked_sub(1)?;
478 let sheet_loc = match sheet.as_deref() {
479 Some(name) => SheetLocator::from_name(name),
480 None => SheetLocator::Current,
481 };
482 let coord = RelativeCoord::new(row0, col0, *row_abs, *col_abs);
483 Some(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
484 }
485 ReferenceType::Range {
486 sheet,
487 start_row,
488 start_col,
489 end_row,
490 end_col,
491 start_row_abs,
492 start_col_abs,
493 end_row_abs,
494 end_col_abs,
495 } => {
496 let sheet_loc = match sheet.as_deref() {
497 Some(name) => SheetLocator::from_name(name),
498 None => SheetLocator::Current,
499 };
500 let sr = start_row
501 .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_row_abs)));
502 if start_row.is_some() && sr.is_none() {
503 return None;
504 }
505 let sc = start_col
506 .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_col_abs)));
507 if start_col.is_some() && sc.is_none() {
508 return None;
509 }
510 let er =
511 end_row.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_row_abs)));
512 if end_row.is_some() && er.is_none() {
513 return None;
514 }
515 let ec =
516 end_col.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_col_abs)));
517 if end_col.is_some() && ec.is_none() {
518 return None;
519 }
520 let range = SheetRangeRef::from_parts(sheet_loc, sr, sc, er, ec).ok()?;
521 Some(SheetRef::Range(range))
522 }
523 _ => None,
524 }
525 }
526
527 fn parse_excel_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
528 let (sheet, ref_part) = Self::extract_sheet_name(reference);
529
530 if ref_part.contains('[') {
531 return Err(ParsingError::InvalidReference(
532 "Table references are not supported for SheetRef".to_string(),
533 ));
534 }
535
536 let sheet_loc: SheetLocator<'static> = match sheet {
537 Some(name) => SheetLocator::from_name(name),
538 None => SheetLocator::Current,
539 };
540
541 if ref_part.contains(':') {
542 let mut parts = ref_part.splitn(2, ':');
543 let start = parts.next().unwrap();
544 let end = parts.next().ok_or_else(|| {
545 ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
546 })?;
547
548 let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
549 let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
550
551 let start_col = Self::axis_bound_from_1based(start_col)?;
552 let start_row = Self::axis_bound_from_1based(start_row)?;
553 let end_col = Self::axis_bound_from_1based(end_col)?;
554 let end_row = Self::axis_bound_from_1based(end_row)?;
555
556 let range =
557 SheetRangeRef::from_parts(sheet_loc, start_row, start_col, end_row, end_col)
558 .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
559 Ok(SheetRef::Range(range))
560 } else {
561 let (row, col, row_abs, col_abs) = parse_a1_1based(&ref_part)
562 .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
563 let coord = RelativeCoord::new(row - 1, col - 1, row_abs, col_abs);
564 Ok(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
565 }
566 }
567
568 fn parse_openformula_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
569 Self::parse_excel_sheet_ref(reference)
570 }
571
572 fn axis_bound_from_1based(
573 bound: Option<(u32, bool)>,
574 ) -> Result<Option<AxisBound>, ParsingError> {
575 match bound {
576 Some((index, abs)) => AxisBound::from_excel_1based(index, abs)
577 .map(Some)
578 .map_err(|err| ParsingError::InvalidReference(err.to_string())),
579 None => Ok(None),
580 }
581 }
582
583 fn parse_range_part_with_abs(part: &str) -> Result<RangePartWithAbs, ParsingError> {
584 if let Ok((row, col, row_abs, col_abs)) = parse_a1_1based(part) {
585 return Ok((Some((col, col_abs)), Some((row, row_abs))));
586 }
587
588 let bytes = part.as_bytes();
589 let len = bytes.len();
590 let mut i = 0usize;
591
592 let mut col_abs = false;
593 let mut row_abs = false;
594
595 if i < len && bytes[i] == b'$' {
596 col_abs = true;
597 i += 1;
598 }
599
600 let col_start = i;
601 while i < len && bytes[i].is_ascii_alphabetic() {
602 i += 1;
603 }
604
605 if i > col_start {
606 let col_str = &part[col_start..i];
607 let col1 = Self::column_to_number(col_str)?;
608
609 if i == len {
610 return Ok((Some((col1, col_abs)), None));
611 }
612
613 if i < len && bytes[i] == b'$' {
614 row_abs = true;
615 i += 1;
616 }
617
618 if i >= len {
619 return Err(ParsingError::InvalidReference(format!(
620 "Invalid range part: {part}"
621 )));
622 }
623
624 let row_start = i;
625 while i < len && bytes[i].is_ascii_digit() {
626 i += 1;
627 }
628
629 if row_start == i || i != len {
630 return Err(ParsingError::InvalidReference(format!(
631 "Invalid range part: {part}"
632 )));
633 }
634
635 let row_str = &part[row_start..i];
636 let row1 = row_str
637 .parse::<u32>()
638 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
639 if row1 == 0 {
640 return Err(ParsingError::InvalidReference(format!(
641 "Invalid range part: {part}"
642 )));
643 }
644
645 return Ok((Some((col1, col_abs)), Some((row1, row_abs))));
646 }
647
648 i = 0;
649 if i < len && bytes[i] == b'$' {
650 row_abs = true;
651 i += 1;
652 }
653
654 let row_start = i;
655 while i < len && bytes[i].is_ascii_digit() {
656 i += 1;
657 }
658
659 if row_start == i || i != len {
660 return Err(ParsingError::InvalidReference(format!(
661 "Invalid range part: {part}"
662 )));
663 }
664
665 let row_str = &part[row_start..i];
666 let row1 = row_str
667 .parse::<u32>()
668 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
669 if row1 == 0 {
670 return Err(ParsingError::InvalidReference(format!(
671 "Invalid range part: {part}"
672 )));
673 }
674
675 Ok((None, Some((row1, row_abs))))
676 }
677
678 fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
679 if reference.starts_with('[') && reference.ends_with(']') && !reference.contains('!') {
686 return Self::parse_bracketed_structured_reference(reference);
687 }
688
689 let (sheet, ref_part) = Self::extract_sheet_name(reference);
691
692 if ref_part.contains('[') {
695 return Self::parse_table_reference(&ref_part);
696 }
697
698 let external_sheet = sheet.as_deref().and_then(|s| {
699 let lb = s.rfind('[')?;
703 let rb_rel = s[lb..].find(']')?;
704 let rb = lb + rb_rel;
705 if lb >= rb {
706 return None;
707 }
708
709 let token = &s[..=rb];
710 let sheet_name = &s[rb + 1..];
711 if sheet_name.is_empty() {
712 None
713 } else {
714 Some((token, sheet_name))
715 }
716 });
717
718 if ref_part.contains(':') {
719 let mut parts = ref_part.splitn(2, ':');
721 let start = parts.next().unwrap();
722 let end = parts.next().ok_or_else(|| {
723 ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
724 })?;
725 let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
726 let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
727
728 let split = |bound: Option<(u32, bool)>| match bound {
729 Some((index, abs)) => (Some(index), abs),
730 None => (None, false),
731 };
732 let (start_col, start_col_abs) = split(start_col);
733 let (start_row, start_row_abs) = split(start_row);
734 let (end_col, end_col_abs) = split(end_col);
735 let (end_row, end_row_abs) = split(end_row);
736
737 if let Some((book_token, sheet_name)) = external_sheet {
738 Ok(ReferenceType::External(ExternalReference {
739 raw: reference.to_string(),
740 book: ExternalBookRef::Token(book_token.to_string()),
741 sheet: sheet_name.to_string(),
742 kind: ExternalRefKind::Range {
743 start_row,
744 start_col,
745 end_row,
746 end_col,
747 start_row_abs,
748 start_col_abs,
749 end_row_abs,
750 end_col_abs,
751 },
752 }))
753 } else {
754 Ok(ReferenceType::Range {
755 sheet,
756 start_row,
757 start_col,
758 end_row,
759 end_col,
760 start_row_abs,
761 start_col_abs,
762 end_row_abs,
763 end_col_abs,
764 })
765 }
766 } else {
767 match Self::parse_cell_reference(&ref_part) {
769 Ok((col, row, col_abs, row_abs)) => {
770 if let Some((book_token, sheet_name)) = external_sheet {
771 Ok(ReferenceType::External(ExternalReference {
772 raw: reference.to_string(),
773 book: ExternalBookRef::Token(book_token.to_string()),
774 sheet: sheet_name.to_string(),
775 kind: ExternalRefKind::Cell {
776 row,
777 col,
778 row_abs,
779 col_abs,
780 },
781 }))
782 } else {
783 Ok(ReferenceType::Cell {
784 sheet,
785 row,
786 col,
787 row_abs,
788 col_abs,
789 })
790 }
791 }
792 Err(_) => {
793 Ok(ReferenceType::NamedRange(reference.to_string()))
795 }
796 }
797 }
798 }
799
800 fn parse_cell_reference(reference: &str) -> Result<(u32, u32, bool, bool), ParsingError> {
802 parse_a1_1based(reference)
803 .map(|(row, col, row_abs, col_abs)| (col, row, col_abs, row_abs))
804 .map_err(|_| {
805 ParsingError::InvalidReference(format!("Invalid cell reference: {reference}"))
806 })
807 }
808
809 pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
811 col_index_from_letters_1based(column)
812 .map_err(|_| ParsingError::InvalidReference(format!("Invalid column: {column}")))
813 }
814
815 pub(crate) fn number_to_column(num: u32) -> String {
817 if num == 0 {
818 return String::new();
819 }
820 if num > 0 && num <= 702 {
822 return COLUMN_LOOKUP[(num - 1) as usize].clone();
823 }
824
825 col_letters_from_1based(num).unwrap_or_default()
826 }
827
828 fn format_col(col: u32, abs: bool) -> String {
829 if abs {
830 format!("${}", Self::number_to_column(col))
831 } else {
832 Self::number_to_column(col)
833 }
834 }
835
836 fn format_row(row: u32, abs: bool) -> String {
837 if abs {
838 format!("${row}")
839 } else {
840 row.to_string()
841 }
842 }
843}
844
845impl Display for ReferenceType {
846 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
847 write!(
848 f,
849 "{}",
850 match self {
851 ReferenceType::Cell {
852 sheet,
853 row,
854 col,
855 row_abs,
856 col_abs,
857 } => {
858 let col_str = Self::format_col(*col, *col_abs);
859 let row_str = Self::format_row(*row, *row_abs);
860
861 if let Some(sheet_name) = sheet {
862 if sheet_name_needs_quoting(sheet_name) {
863 let escaped_name = sheet_name.replace('\'', "''");
865 format!("'{escaped_name}'!{col_str}{row_str}")
866 } else {
867 format!("{sheet_name}!{col_str}{row_str}")
868 }
869 } else {
870 format!("{col_str}{row_str}")
871 }
872 }
873 ReferenceType::Range {
874 sheet,
875 start_row,
876 start_col,
877 end_row,
878 end_col,
879 start_row_abs,
880 start_col_abs,
881 end_row_abs,
882 end_col_abs,
883 } => {
884 let start_ref = match (start_col, start_row) {
886 (Some(col), Some(row)) => format!(
887 "{}{}",
888 Self::format_col(*col, *start_col_abs),
889 Self::format_row(*row, *start_row_abs)
890 ),
891 (Some(col), None) => Self::format_col(*col, *start_col_abs),
892 (None, Some(row)) => Self::format_row(*row, *start_row_abs),
893 (None, None) => "".to_string(), };
895
896 let end_ref = match (end_col, end_row) {
898 (Some(col), Some(row)) => format!(
899 "{}{}",
900 Self::format_col(*col, *end_col_abs),
901 Self::format_row(*row, *end_row_abs)
902 ),
903 (Some(col), None) => Self::format_col(*col, *end_col_abs),
904 (None, Some(row)) => Self::format_row(*row, *end_row_abs),
905 (None, None) => "".to_string(), };
907
908 let range_part = format!("{start_ref}:{end_ref}");
909
910 if let Some(sheet_name) = sheet {
911 if sheet_name_needs_quoting(sheet_name) {
912 let escaped_name = sheet_name.replace('\'', "''");
914 format!("'{escaped_name}'!{range_part}")
915 } else {
916 format!("{sheet_name}!{range_part}")
917 }
918 } else {
919 range_part
920 }
921 }
922 ReferenceType::External(ext) => ext.raw.clone(),
923 ReferenceType::Table(table_ref) => {
924 if let Some(specifier) = &table_ref.specifier {
925 match specifier {
928 TableSpecifier::Column(column) => {
929 format!("{}[{}]", table_ref.name, column.trim())
930 }
931 TableSpecifier::ColumnRange(start, end) => {
932 format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
933 }
934 _ => {
935 format!("{}[{}]", table_ref.name, specifier)
937 }
938 }
939 } else {
940 table_ref.name.clone()
941 }
942 }
943 ReferenceType::NamedRange(name) => name.clone(),
944 }
945 )
946 }
947}
948
949impl TryFrom<&str> for ReferenceType {
950 type Error = ParsingError;
951
952 fn try_from(value: &str) -> Result<Self, Self::Error> {
953 ReferenceType::from_string(value)
954 }
955}
956
957impl FromStr for ReferenceType {
958 type Err = ParsingError;
959
960 fn from_str(s: &str) -> Result<Self, Self::Err> {
961 ReferenceType::from_string(s)
962 }
963}
964
965impl ReferenceType {
966 pub fn normalise(&self) -> String {
968 format!("{self}")
969 }
970
971 fn extract_sheet_name(reference: &str) -> (Option<String>, String) {
973 let bytes = reference.as_bytes();
974 let mut i = 0;
975
976 if i < bytes.len() && bytes[i] == b'\'' {
980 i += 1;
981 let start = i;
982
983 while i < bytes.len() {
984 if bytes[i] == b'\'' {
985 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
987 i += 2;
988 continue;
989 }
990
991 if i + 1 < bytes.len() && bytes[i + 1] == b'!' {
993 let raw = &reference[start..i];
994 let sheet = raw.replace("''", "'");
995 let ref_part = String::from(&reference[i + 2..]);
996 return (Some(sheet), ref_part);
997 }
998 }
999
1000 i += 1;
1001 }
1002 }
1003
1004 i = 0;
1006 while i < bytes.len() {
1007 if bytes[i] == b'!' && i > 0 {
1008 let sheet = String::from(&reference[0..i]);
1009 let ref_part = String::from(&reference[i + 1..]);
1010 return (Some(sheet), ref_part);
1011 }
1012 i += 1;
1013 }
1014
1015 (None, reference.to_string())
1016 }
1017
1018 fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
1020 if let Some(bracket_pos) = reference.find('[') {
1022 let table_name = reference[..bracket_pos].trim();
1023 if table_name.is_empty() {
1024 return Err(ParsingError::InvalidReference(reference.to_string()));
1025 }
1026
1027 let specifier_str = &reference[bracket_pos..];
1028 let specifier = Self::parse_table_specifier(specifier_str)?;
1029
1030 Ok(ReferenceType::Table(TableReference {
1031 name: table_name.to_string(),
1032 specifier,
1033 }))
1034 } else {
1035 Err(ParsingError::InvalidReference(reference.to_string()))
1036 }
1037 }
1038
1039 fn parse_bracketed_structured_reference(reference: &str) -> Result<Self, ParsingError> {
1040 debug_assert!(reference.starts_with('[') && reference.ends_with(']'));
1041 let inner = reference[1..reference.len().saturating_sub(1)].trim();
1042 if inner.is_empty() {
1043 return Err(ParsingError::InvalidReference(reference.to_string()));
1044 }
1045
1046 if let Some(rest) = inner.strip_prefix('@') {
1048 let mut col = rest.trim();
1049 if col.starts_with('[') && col.ends_with(']') && col.len() >= 2 {
1050 col = col[1..col.len() - 1].trim();
1051 }
1052 if col.is_empty() {
1053 return Err(ParsingError::InvalidReference(format!(
1054 "This-row structured reference missing column: {reference}"
1055 )));
1056 }
1057
1058 let spec = TableSpecifier::Combination(vec![
1059 Box::new(TableSpecifier::SpecialItem(SpecialItem::ThisRow)),
1060 Box::new(TableSpecifier::Column(col.to_string())),
1061 ]);
1062 return Ok(ReferenceType::Table(TableReference {
1063 name: String::new(),
1064 specifier: Some(spec),
1065 }));
1066 }
1067
1068 Ok(ReferenceType::Table(TableReference {
1070 name: inner.to_string(),
1071 specifier: Some(TableSpecifier::SpecialItem(SpecialItem::Data)),
1072 }))
1073 }
1074
1075 fn parse_table_specifier(specifier_str: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1077 if specifier_str.is_empty() || !specifier_str.starts_with('[') {
1078 return Ok(None);
1079 }
1080
1081 let mut depth = 0;
1083 let mut end_pos = 0;
1084
1085 for (i, c) in specifier_str.char_indices() {
1086 if c == '[' {
1087 depth += 1;
1088 } else if c == ']' {
1089 depth -= 1;
1090 if depth == 0 {
1091 end_pos = i;
1092 break;
1093 }
1094 }
1095 }
1096
1097 if depth != 0 || end_pos == 0 {
1098 return Err(ParsingError::InvalidReference(format!(
1099 "Unbalanced brackets in table specifier: {specifier_str}"
1100 )));
1101 }
1102
1103 let content = &specifier_str[1..end_pos];
1105
1106 if content.is_empty() {
1108 return Ok(Some(TableSpecifier::All));
1110 }
1111
1112 if content.starts_with("#") {
1114 return Self::parse_special_item(content);
1115 }
1116
1117 if !content.contains('[') && !content.contains('#') {
1119 if let Some(colon_pos) = content.find(':') {
1121 let start = content[..colon_pos].trim();
1122 let end = content[colon_pos + 1..].trim();
1123 return Ok(Some(TableSpecifier::ColumnRange(
1124 start.to_string(),
1125 end.to_string(),
1126 )));
1127 } else {
1128 return Ok(Some(TableSpecifier::Column(content.trim().to_string())));
1130 }
1131 }
1132
1133 if content.contains('[') {
1135 return Self::parse_complex_table_specifier(content);
1136 }
1137
1138 Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1140 }
1141
1142 fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
1143 if reference.starts_with('[') && reference.ends_with(']') {
1144 let inner = &reference[1..reference.len() - 1];
1145 if inner.is_empty() {
1146 return Err(ParsingError::InvalidReference(
1147 "Empty OpenFormula reference".to_string(),
1148 ));
1149 }
1150
1151 let mut parts = inner.splitn(2, ':');
1152 let start_part_str = parts.next().unwrap();
1153 let end_part_str = parts.next();
1154
1155 let start_part = Self::parse_openformula_part(start_part_str)?;
1156 let end_part = if let Some(part) = end_part_str {
1157 Some(Self::parse_openformula_part(part)?)
1158 } else {
1159 None
1160 };
1161
1162 let sheet = match (&start_part.sheet, &end_part) {
1163 (Some(sheet), Some(end)) => {
1164 if let Some(end_sheet) = &end.sheet {
1165 if end_sheet != sheet {
1166 return Err(ParsingError::InvalidReference(format!(
1167 "Mismatched sheets in reference: {sheet} vs {end_sheet}"
1168 )));
1169 }
1170 }
1171 Some(sheet.clone())
1172 }
1173 (Some(sheet), None) => Some(sheet.clone()),
1174 (None, Some(end)) => end.sheet.clone(),
1175 (None, None) => None,
1176 };
1177
1178 let mut excel_like = String::new();
1179 if let Some(sheet_name) = sheet {
1180 if sheet_name_needs_quoting(&sheet_name) {
1181 let escaped = sheet_name.replace('\'', "''");
1182 excel_like.push('\'');
1183 excel_like.push_str(&escaped);
1184 excel_like.push('\'');
1185 } else {
1186 excel_like.push_str(&sheet_name);
1187 }
1188 excel_like.push('!');
1189 }
1190
1191 excel_like.push_str(&start_part.coord);
1192 if let Some(end) = end_part {
1193 excel_like.push(':');
1194 excel_like.push_str(&end.coord);
1195 }
1196
1197 return Self::parse_excel_reference(&excel_like);
1198 }
1199
1200 Err(ParsingError::InvalidReference(format!(
1201 "Unsupported OpenFormula reference: {reference}"
1202 )))
1203 }
1204
1205 fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
1206 let trimmed = part.trim();
1207 if trimmed.is_empty() {
1208 return Err(ParsingError::InvalidReference(
1209 "Empty component in OpenFormula reference".to_string(),
1210 ));
1211 }
1212
1213 if trimmed == "." {
1214 return Err(ParsingError::InvalidReference(
1215 "Incomplete OpenFormula reference component".to_string(),
1216 ));
1217 }
1218
1219 if trimmed.starts_with('[') {
1220 return Err(ParsingError::InvalidReference(format!(
1222 "Unexpected '[' in OpenFormula reference component: {trimmed}"
1223 )));
1224 }
1225
1226 let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
1227 (None, stripped.trim())
1228 } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
1229 let sheet_part = trimmed[..dot_idx].trim();
1230 let coord_part = trimmed[dot_idx + 1..].trim();
1231 if coord_part.is_empty() {
1232 return Err(ParsingError::InvalidReference(format!(
1233 "Missing coordinate in OpenFormula reference component: {trimmed}"
1234 )));
1235 }
1236 let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
1237 (Some(sheet_name), coord_part)
1238 } else {
1239 (None, trimmed)
1240 };
1241
1242 let coord = coord_slice.trim_start_matches('.').trim().to_string();
1243
1244 if coord.is_empty() {
1245 return Err(ParsingError::InvalidReference(format!(
1246 "Missing coordinate in OpenFormula reference component: {trimmed}"
1247 )));
1248 }
1249
1250 Ok(OpenFormulaRefPart { sheet, coord })
1251 }
1252
1253 fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
1254 let without_abs = sheet.trim().trim_start_matches('$');
1255
1256 if without_abs.starts_with('\'') {
1257 if without_abs.len() < 2 || !without_abs.ends_with('\'') {
1258 return Err(ParsingError::InvalidReference(format!(
1259 "Unterminated sheet name in OpenFormula reference: {sheet}"
1260 )));
1261 }
1262 let inner = &without_abs[1..without_abs.len() - 1];
1263 Ok(inner.replace("''", "'"))
1264 } else {
1265 Ok(without_abs.to_string())
1266 }
1267 }
1268
1269 fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
1270 let bytes = part.as_bytes();
1271 let mut i = 0;
1272 let mut in_quotes = false;
1273
1274 while i < bytes.len() {
1275 match bytes[i] {
1276 b'\'' => {
1277 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
1278 i += 2;
1279 continue;
1280 }
1281 in_quotes = !in_quotes;
1282 i += 1;
1283 }
1284 b'.' if !in_quotes => return Some(i),
1285 _ => i += 1,
1286 }
1287 }
1288
1289 None
1290 }
1291
1292 fn parse_special_item(content: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1294 match content {
1295 "#All" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::All))),
1296 "#Headers" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Headers))),
1297 "#Data" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Data))),
1298 "#Totals" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Totals))),
1299 "@" => Ok(Some(TableSpecifier::Row(TableRowSpecifier::Current))),
1300 _ => Err(ParsingError::InvalidReference(format!(
1301 "Unknown special item: {content}"
1302 ))),
1303 }
1304 }
1305
1306 fn parse_complex_table_specifier(
1308 content: &str,
1309 ) -> Result<Option<TableSpecifier>, ParsingError> {
1310 if content.contains("[#Headers]")
1315 || content.contains("[#All]")
1316 || content.contains("[#Data]")
1317 || content.contains("[#Totals]")
1318 || content.contains("[@]")
1319 {
1320 let mut specifiers = Vec::new();
1323
1324 if content.contains("[#Headers]") {
1326 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Headers)));
1327 }
1328 if content.contains("[#Data]") {
1329 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Data)));
1330 }
1331 if content.contains("[#Totals]") {
1332 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Totals)));
1333 }
1334 if content.contains("[#All]") {
1335 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::All)));
1336 }
1337
1338 if !specifiers.is_empty() {
1339 return Ok(Some(TableSpecifier::Combination(specifiers)));
1340 }
1341 }
1342
1343 Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1345 }
1346
1347 pub fn to_excel_string(&self) -> String {
1349 match self {
1350 ReferenceType::Cell {
1351 sheet,
1352 row,
1353 col,
1354 row_abs,
1355 col_abs,
1356 } => {
1357 let col_str = Self::format_col(*col, *col_abs);
1358 let row_str = Self::format_row(*row, *row_abs);
1359 if let Some(s) = sheet {
1360 if sheet_name_needs_quoting(s) {
1361 let escaped_name = s.replace('\'', "''");
1362 format!("'{}'!{}{}", escaped_name, col_str, row_str)
1363 } else {
1364 format!("{}!{}{}", s, col_str, row_str)
1365 }
1366 } else {
1367 format!("{}{}", col_str, row_str)
1368 }
1369 }
1370 ReferenceType::Range {
1371 sheet,
1372 start_row,
1373 start_col,
1374 end_row,
1375 end_col,
1376 start_row_abs,
1377 start_col_abs,
1378 end_row_abs,
1379 end_col_abs,
1380 } => {
1381 let start_ref = match (start_col, start_row) {
1383 (Some(col), Some(row)) => format!(
1384 "{}{}",
1385 Self::format_col(*col, *start_col_abs),
1386 Self::format_row(*row, *start_row_abs)
1387 ),
1388 (Some(col), None) => Self::format_col(*col, *start_col_abs),
1389 (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1390 (None, None) => "".to_string(), };
1392
1393 let end_ref = match (end_col, end_row) {
1395 (Some(col), Some(row)) => format!(
1396 "{}{}",
1397 Self::format_col(*col, *end_col_abs),
1398 Self::format_row(*row, *end_row_abs)
1399 ),
1400 (Some(col), None) => Self::format_col(*col, *end_col_abs),
1401 (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1402 (None, None) => "".to_string(), };
1404
1405 let range_part = format!("{start_ref}:{end_ref}");
1406
1407 if let Some(s) = sheet {
1408 if sheet_name_needs_quoting(s) {
1409 let escaped_name = s.replace('\'', "''");
1410 format!("'{escaped_name}'!{range_part}")
1411 } else {
1412 format!("{s}!{range_part}")
1413 }
1414 } else {
1415 range_part
1416 }
1417 }
1418 ReferenceType::External(ext) => ext.raw.clone(),
1419 ReferenceType::Table(table_ref) => {
1420 if let Some(specifier) = &table_ref.specifier {
1421 format!("{}[{}]", table_ref.name, specifier)
1422 } else {
1423 table_ref.name.clone()
1424 }
1425 }
1426 ReferenceType::NamedRange(name) => name.clone(),
1427 }
1428 }
1429}
1430
1431#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1433#[derive(Debug, Clone, PartialEq, Hash)]
1434pub enum ASTNodeType {
1435 Literal(LiteralValue),
1436 Reference {
1437 original: String, reference: ReferenceType, },
1440 UnaryOp {
1441 op: String,
1442 expr: Box<ASTNode>,
1443 },
1444 BinaryOp {
1445 op: String,
1446 left: Box<ASTNode>,
1447 right: Box<ASTNode>,
1448 },
1449 Function {
1450 name: String,
1451 args: Vec<ASTNode>, },
1453 Array(Vec<Vec<ASTNode>>), }
1455
1456impl Display for ASTNodeType {
1457 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1458 match self {
1459 ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
1460 ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
1461 ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
1462 ASTNodeType::BinaryOp { op, left, right } => {
1463 write!(f, "BinaryOp({op}, {left}, {right})")
1464 }
1465 ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
1466 ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
1467 }
1468 }
1469}
1470
1471#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1473#[derive(Debug, Clone, PartialEq)]
1474pub struct ASTNode {
1475 pub node_type: ASTNodeType,
1476 pub source_token: Option<Token>,
1477 pub contains_volatile: bool,
1482}
1483
1484impl ASTNode {
1485 pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1486 ASTNode {
1487 node_type,
1488 source_token,
1489 contains_volatile: false,
1490 }
1491 }
1492
1493 pub fn new_with_volatile(
1495 node_type: ASTNodeType,
1496 source_token: Option<Token>,
1497 contains_volatile: bool,
1498 ) -> Self {
1499 ASTNode {
1500 node_type,
1501 source_token,
1502 contains_volatile,
1503 }
1504 }
1505
1506 pub fn contains_volatile(&self) -> bool {
1508 self.contains_volatile
1509 }
1510
1511 pub fn fingerprint(&self) -> u64 {
1512 self.calculate_hash()
1513 }
1514
1515 pub fn calculate_hash(&self) -> u64 {
1517 let mut hasher = FormulaHasher::new();
1518 self.hash_node(&mut hasher);
1519 hasher.finish()
1520 }
1521
1522 fn hash_node(&self, hasher: &mut FormulaHasher) {
1523 match &self.node_type {
1524 ASTNodeType::Literal(value) => {
1525 hasher.write(&[1]); value.hash(hasher);
1527 }
1528 ASTNodeType::Reference { reference, .. } => {
1529 hasher.write(&[2]); reference.hash(hasher);
1531 }
1532 ASTNodeType::UnaryOp { op, expr } => {
1533 hasher.write(&[3]); hasher.write(op.as_bytes());
1535 expr.hash_node(hasher);
1536 }
1537 ASTNodeType::BinaryOp { op, left, right } => {
1538 hasher.write(&[4]); hasher.write(op.as_bytes());
1540 left.hash_node(hasher);
1541 right.hash_node(hasher);
1542 }
1543 ASTNodeType::Function { name, args } => {
1544 hasher.write(&[5]); let name_lower = name.to_lowercase();
1547 hasher.write(name_lower.as_bytes());
1548 hasher.write_usize(args.len());
1549 for arg in args {
1550 arg.hash_node(hasher);
1551 }
1552 }
1553 ASTNodeType::Array(rows) => {
1554 hasher.write(&[6]); hasher.write_usize(rows.len());
1556 for row in rows {
1557 hasher.write_usize(row.len());
1558 for item in row {
1559 item.hash_node(hasher);
1560 }
1561 }
1562 }
1563 }
1564 }
1565
1566 pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1567 let mut dependencies = Vec::new();
1568 self.collect_dependencies(&mut dependencies);
1569 dependencies
1570 }
1571
1572 pub fn get_dependency_strings(&self) -> Vec<String> {
1573 self.get_dependencies()
1574 .into_iter()
1575 .map(|dep| format!("{dep}"))
1576 .collect()
1577 }
1578
1579 fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1580 match &self.node_type {
1581 ASTNodeType::Reference { reference, .. } => {
1582 dependencies.push(reference);
1583 }
1584 ASTNodeType::UnaryOp { expr, .. } => {
1585 expr.collect_dependencies(dependencies);
1586 }
1587 ASTNodeType::BinaryOp { left, right, .. } => {
1588 left.collect_dependencies(dependencies);
1589 right.collect_dependencies(dependencies);
1590 }
1591 ASTNodeType::Function { args, .. } => {
1592 for arg in args {
1593 arg.collect_dependencies(dependencies);
1594 }
1595 }
1596 ASTNodeType::Array(rows) => {
1597 for row in rows {
1598 for item in row {
1599 item.collect_dependencies(dependencies);
1600 }
1601 }
1602 }
1603 _ => {}
1604 }
1605 }
1606
1607 pub fn refs(&self) -> RefIter<'_> {
1610 RefIter {
1611 stack: smallvec::smallvec![self],
1612 }
1613 }
1614
1615 pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1617 let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1618 stack.push(self);
1619 while let Some(node) = stack.pop() {
1620 match &node.node_type {
1621 ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1622 ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1623 ASTNodeType::BinaryOp { left, right, .. } => {
1624 stack.push(right);
1626 stack.push(left);
1627 }
1628 ASTNodeType::Function { args, .. } => {
1629 for a in args.iter().rev() {
1630 stack.push(a);
1631 }
1632 }
1633 ASTNodeType::Array(rows) => {
1634 for r in rows.iter().rev() {
1635 for item in r.iter().rev() {
1636 stack.push(item);
1637 }
1638 }
1639 }
1640 ASTNodeType::Literal(_) => {}
1641 }
1642 }
1643 }
1644
1645 pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1647 let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1648 self.visit_refs(|rv| match rv {
1649 RefView::Cell {
1650 sheet,
1651 row,
1652 col,
1653 row_abs,
1654 col_abs,
1655 } => out.push(ReferenceType::Cell {
1656 sheet: sheet.map(|s| s.to_string()),
1657 row,
1658 col,
1659 row_abs,
1660 col_abs,
1661 }),
1662 RefView::Range {
1663 sheet,
1664 start_row,
1665 start_col,
1666 end_row,
1667 end_col,
1668 start_row_abs,
1669 start_col_abs,
1670 end_row_abs,
1671 end_col_abs,
1672 } => {
1673 if policy.expand_small_ranges {
1675 if let (Some(sr), Some(sc), Some(er), Some(ec)) =
1676 (start_row, start_col, end_row, end_col)
1677 {
1678 let rows = er.saturating_sub(sr) + 1;
1679 let cols = ec.saturating_sub(sc) + 1;
1680 let area = rows.saturating_mul(cols);
1681 if area as usize <= policy.range_expansion_limit {
1682 let row_abs = start_row_abs && end_row_abs;
1683 let col_abs = start_col_abs && end_col_abs;
1684 for r in sr..=er {
1685 for c in sc..=ec {
1686 out.push(ReferenceType::Cell {
1687 sheet: sheet.map(|s| s.to_string()),
1688 row: r,
1689 col: c,
1690 row_abs,
1691 col_abs,
1692 });
1693 }
1694 }
1695 return; }
1697 }
1698 }
1699 out.push(ReferenceType::Range {
1700 sheet: sheet.map(|s| s.to_string()),
1701 start_row,
1702 start_col,
1703 end_row,
1704 end_col,
1705 start_row_abs,
1706 start_col_abs,
1707 end_row_abs,
1708 end_col_abs,
1709 });
1710 }
1711 RefView::External {
1712 raw,
1713 book,
1714 sheet,
1715 kind,
1716 } => out.push(ReferenceType::External(ExternalReference {
1717 raw: raw.to_string(),
1718 book: ExternalBookRef::Token(book.to_string()),
1719 sheet: sheet.to_string(),
1720 kind,
1721 })),
1722 RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
1723 name: name.to_string(),
1724 specifier: specifier.cloned(),
1725 })),
1726 RefView::NamedRange { name } => {
1727 if policy.include_names {
1728 out.push(ReferenceType::NamedRange(name.to_string()));
1729 }
1730 }
1731 });
1732 out
1733 }
1734 pub fn update_sheet_references(&mut self, target_name: Option<&str>, new_name: &str) {
1740 match &mut self.node_type {
1741 ASTNodeType::Reference {
1742 reference: ReferenceType::Cell { sheet, .. } | ReferenceType::Range { sheet, .. },
1743 ..
1744 } => {
1745 if let Some(current_sheet) = sheet
1746 && (target_name.is_none() || target_name == Some(current_sheet.as_str()))
1747 {
1748 *sheet = Some(new_name.to_string());
1749 }
1750 }
1751 ASTNodeType::UnaryOp { expr, .. } => {
1752 expr.update_sheet_references(target_name, new_name);
1753 }
1754 ASTNodeType::BinaryOp { left, right, .. } => {
1755 left.update_sheet_references(target_name, new_name);
1756 right.update_sheet_references(target_name, new_name);
1757 }
1758 ASTNodeType::Function { args, .. } => {
1759 for arg in args {
1760 arg.update_sheet_references(target_name, new_name);
1761 }
1762 }
1763 ASTNodeType::Array(rows) => {
1764 for row in rows {
1765 for cell in row {
1766 cell.update_sheet_references(target_name, new_name);
1767 }
1768 }
1769 }
1770 _ => {}
1771 }
1772 }
1773}
1774
1775#[derive(Clone, Copy, Debug)]
1777pub enum RefView<'a> {
1778 Cell {
1779 sheet: Option<&'a str>,
1780 row: u32,
1781 col: u32,
1782 row_abs: bool,
1783 col_abs: bool,
1784 },
1785 Range {
1786 sheet: Option<&'a str>,
1787 start_row: Option<u32>,
1788 start_col: Option<u32>,
1789 end_row: Option<u32>,
1790 end_col: Option<u32>,
1791 start_row_abs: bool,
1792 start_col_abs: bool,
1793 end_row_abs: bool,
1794 end_col_abs: bool,
1795 },
1796 External {
1797 raw: &'a str,
1798 book: &'a str,
1799 sheet: &'a str,
1800 kind: ExternalRefKind,
1801 },
1802 Table {
1803 name: &'a str,
1804 specifier: Option<&'a TableSpecifier>,
1805 },
1806 NamedRange {
1807 name: &'a str,
1808 },
1809}
1810
1811impl<'a> From<&'a ReferenceType> for RefView<'a> {
1812 fn from(r: &'a ReferenceType) -> Self {
1813 match r {
1814 ReferenceType::Cell {
1815 sheet,
1816 row,
1817 col,
1818 row_abs,
1819 col_abs,
1820 } => RefView::Cell {
1821 sheet: sheet.as_deref(),
1822 row: *row,
1823 col: *col,
1824 row_abs: *row_abs,
1825 col_abs: *col_abs,
1826 },
1827 ReferenceType::Range {
1828 sheet,
1829 start_row,
1830 start_col,
1831 end_row,
1832 end_col,
1833 start_row_abs,
1834 start_col_abs,
1835 end_row_abs,
1836 end_col_abs,
1837 } => RefView::Range {
1838 sheet: sheet.as_deref(),
1839 start_row: *start_row,
1840 start_col: *start_col,
1841 end_row: *end_row,
1842 end_col: *end_col,
1843 start_row_abs: *start_row_abs,
1844 start_col_abs: *start_col_abs,
1845 end_row_abs: *end_row_abs,
1846 end_col_abs: *end_col_abs,
1847 },
1848 ReferenceType::External(ext) => RefView::External {
1849 raw: ext.raw.as_str(),
1850 book: ext.book.token(),
1851 sheet: ext.sheet.as_str(),
1852 kind: ext.kind,
1853 },
1854 ReferenceType::Table(tr) => RefView::Table {
1855 name: tr.name.as_str(),
1856 specifier: tr.specifier.as_ref(),
1857 },
1858 ReferenceType::NamedRange(name) => RefView::NamedRange { name },
1859 }
1860 }
1861}
1862
1863pub struct RefIter<'a> {
1865 stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
1866}
1867
1868impl<'a> Iterator for RefIter<'a> {
1869 type Item = RefView<'a>;
1870 fn next(&mut self) -> Option<Self::Item> {
1871 while let Some(node) = self.stack.pop() {
1872 match &node.node_type {
1873 ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
1874 ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
1875 ASTNodeType::BinaryOp { left, right, .. } => {
1876 self.stack.push(right);
1877 self.stack.push(left);
1878 }
1879 ASTNodeType::Function { args, .. } => {
1880 for a in args.iter().rev() {
1881 self.stack.push(a);
1882 }
1883 }
1884 ASTNodeType::Array(rows) => {
1885 for r in rows.iter().rev() {
1886 for item in r.iter().rev() {
1887 self.stack.push(item);
1888 }
1889 }
1890 }
1891 ASTNodeType::Literal(_) => {}
1892 }
1893 }
1894 None
1895 }
1896}
1897
1898#[derive(Debug, Clone)]
1900pub struct CollectPolicy {
1901 pub expand_small_ranges: bool,
1902 pub range_expansion_limit: usize,
1903 pub include_names: bool,
1904}
1905
1906impl Default for CollectPolicy {
1907 fn default() -> Self {
1908 Self {
1909 expand_small_ranges: false,
1910 range_expansion_limit: 0,
1911 include_names: true,
1912 }
1913 }
1914}
1915
1916impl Display for ASTNode {
1917 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1918 write!(f, "{}", self.node_type)
1919 }
1920}
1921
1922impl std::hash::Hash for ASTNode {
1923 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1924 let hash = self.calculate_hash();
1925 state.write_u64(hash);
1926 }
1927}
1928
1929pub struct Parser {
1931 tokens: Arc<[Token]>,
1932 position: usize,
1933 volatility_classifier: Option<VolatilityClassifierBox>,
1935 dialect: FormulaDialect,
1936}
1937
1938impl TryFrom<&str> for Parser {
1939 type Error = TokenizerError;
1940
1941 fn try_from(formula: &str) -> Result<Self, Self::Error> {
1942 let tokens = Tokenizer::new(formula)?.items;
1943 Ok(Self::new(tokens, false))
1944 }
1945}
1946
1947impl TryFrom<String> for Parser {
1948 type Error = TokenizerError;
1949
1950 fn try_from(formula: String) -> Result<Self, Self::Error> {
1951 Self::try_from(formula.as_str())
1952 }
1953}
1954
1955impl Parser {
1956 pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
1957 Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
1958 }
1959
1960 pub fn new_with_dialect(
1961 mut tokens: Vec<Token>,
1962 include_whitespace: bool,
1963 dialect: FormulaDialect,
1964 ) -> Self {
1965 if !include_whitespace {
1966 tokens.retain(|t| t.token_type != TokenType::Whitespace);
1967 }
1968
1969 Parser {
1970 tokens: Arc::from(tokens.into_boxed_slice()),
1971 position: 0,
1972 volatility_classifier: None,
1973 dialect,
1974 }
1975 }
1976
1977 pub fn try_from_formula(formula: &str) -> Result<Self, TokenizerError> {
1978 let tokens = Tokenizer::new(formula)?.items;
1979 Ok(Self::new(tokens, false))
1980 }
1981
1982 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1985 where
1986 F: Fn(&str) -> bool + Send + Sync + 'static,
1987 {
1988 self.volatility_classifier = Some(Box::new(f));
1989 self
1990 }
1991
1992 pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
1994 where
1995 F: Fn(&str) -> bool + Send + Sync + 'static,
1996 {
1997 Self::new(tokens, include_whitespace).with_volatility_classifier(f)
1998 }
1999
2000 pub fn new_with_classifier_and_dialect<F>(
2001 tokens: Vec<Token>,
2002 include_whitespace: bool,
2003 dialect: FormulaDialect,
2004 f: F,
2005 ) -> Self
2006 where
2007 F: Fn(&str) -> bool + Send + Sync + 'static,
2008 {
2009 Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
2010 }
2011
2012 fn skip_whitespace(&mut self) {
2013 while self.position < self.tokens.len()
2014 && self.tokens[self.position].token_type == TokenType::Whitespace
2015 {
2016 self.position += 1;
2017 }
2018 }
2019
2020 pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
2022 if self.tokens.is_empty() {
2023 return Err(ParserError {
2024 message: "No tokens to parse".to_string(),
2025 position: None,
2026 });
2027 }
2028
2029 self.skip_whitespace();
2030 if self.position >= self.tokens.len() {
2031 return Err(ParserError {
2032 message: "No tokens to parse".to_string(),
2033 position: None,
2034 });
2035 }
2036
2037 if self.tokens[self.position].token_type == TokenType::Literal {
2039 let token = self.tokens[self.position].clone();
2040 self.position += 1;
2041 self.skip_whitespace();
2042 if self.position < self.tokens.len() {
2043 return Err(ParserError {
2044 message: format!(
2045 "Unexpected token at position {}: {:?}",
2046 self.position, self.tokens[self.position]
2047 ),
2048 position: Some(self.position),
2049 });
2050 }
2051 return Ok(ASTNode::new(
2052 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2053 Some(token),
2054 ));
2055 }
2056
2057 let ast = self.parse_expression()?;
2058 self.skip_whitespace();
2059 if self.position < self.tokens.len() {
2060 return Err(ParserError {
2061 message: format!(
2062 "Unexpected token at position {}: {:?}",
2063 self.position, self.tokens[self.position]
2064 ),
2065 position: Some(self.position),
2066 });
2067 }
2068 Ok(ast)
2069 }
2070
2071 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2072 self.parse_bp(0)
2073 }
2074
2075 fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2078 let mut left = self.parse_prefix()?;
2079
2080 loop {
2081 self.skip_whitespace();
2082 if self.position >= self.tokens.len() {
2083 break;
2084 }
2085
2086 if self.tokens[self.position].token_type == TokenType::OpPostfix {
2088 let (precedence, _) = self.tokens[self.position]
2089 .get_precedence()
2090 .unwrap_or((0, Associativity::Left));
2091 if precedence < min_precedence {
2092 break;
2093 }
2094
2095 let op_token = self.tokens[self.position].clone();
2096 self.position += 1;
2097 let contains_volatile = left.contains_volatile;
2098 left = ASTNode::new_with_volatile(
2099 ASTNodeType::UnaryOp {
2100 op: op_token.value.clone(),
2101 expr: Box::new(left),
2102 },
2103 Some(op_token),
2104 contains_volatile,
2105 );
2106 continue;
2107 }
2108
2109 let token = &self.tokens[self.position];
2110 if token.token_type != TokenType::OpInfix {
2111 break;
2112 }
2113
2114 let (precedence, associativity) =
2115 token.get_precedence().unwrap_or((0, Associativity::Left));
2116 if precedence < min_precedence {
2117 break;
2118 }
2119
2120 let op_token = self.tokens[self.position].clone();
2121 self.position += 1;
2122
2123 let next_min_precedence = if associativity == Associativity::Left {
2124 precedence + 1
2125 } else {
2126 precedence
2127 };
2128
2129 let right = self.parse_bp(next_min_precedence)?;
2130 let contains_volatile = left.contains_volatile || right.contains_volatile;
2131 left = ASTNode::new_with_volatile(
2132 ASTNodeType::BinaryOp {
2133 op: op_token.value.clone(),
2134 left: Box::new(left),
2135 right: Box::new(right),
2136 },
2137 Some(op_token),
2138 contains_volatile,
2139 );
2140 }
2141
2142 Ok(left)
2143 }
2144
2145 fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2146 self.skip_whitespace();
2147 if self.position < self.tokens.len()
2148 && self.tokens[self.position].token_type == TokenType::OpPrefix
2149 {
2150 let op_token = self.tokens[self.position].clone();
2151 self.position += 1;
2152
2153 let (precedence, _) = op_token
2156 .get_precedence()
2157 .unwrap_or((0, Associativity::Right));
2158
2159 let expr = self.parse_bp(precedence)?;
2160 let contains_volatile = expr.contains_volatile;
2161 return Ok(ASTNode::new_with_volatile(
2162 ASTNodeType::UnaryOp {
2163 op: op_token.value.clone(),
2164 expr: Box::new(expr),
2165 },
2166 Some(op_token),
2167 contains_volatile,
2168 ));
2169 }
2170
2171 self.parse_primary()
2172 }
2173
2174 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2175 self.skip_whitespace();
2176 if self.position >= self.tokens.len() {
2177 return Err(ParserError {
2178 message: "Unexpected end of tokens".to_string(),
2179 position: Some(self.position),
2180 });
2181 }
2182
2183 let token = &self.tokens[self.position];
2184 match token.token_type {
2185 TokenType::Operand => {
2186 let operand_token = self.tokens[self.position].clone();
2187 self.position += 1;
2188 self.parse_operand(operand_token)
2189 }
2190 TokenType::Func => {
2191 let func_token = self.tokens[self.position].clone();
2192 self.position += 1;
2193 self.parse_function(func_token)
2194 }
2195 TokenType::Paren if token.subtype == TokenSubType::Open => {
2196 self.position += 1;
2197 let expr = self.parse_expression()?;
2198 if self.position >= self.tokens.len()
2199 || self.tokens[self.position].token_type != TokenType::Paren
2200 || self.tokens[self.position].subtype != TokenSubType::Close
2201 {
2202 return Err(ParserError {
2203 message: "Expected closing parenthesis".to_string(),
2204 position: Some(self.position),
2205 });
2206 }
2207 self.position += 1;
2208 Ok(expr)
2209 }
2210 TokenType::Array if token.subtype == TokenSubType::Open => {
2211 self.position += 1;
2212 self.parse_array()
2213 }
2214 _ => Err(ParserError {
2215 message: format!("Unexpected token: {token:?}"),
2216 position: Some(self.position),
2217 }),
2218 }
2219 }
2220
2221 fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
2222 match token.subtype {
2223 TokenSubType::Number => {
2224 let value = token.value.parse::<f64>().map_err(|_| ParserError {
2225 message: format!("Invalid number: {}", token.value),
2226 position: Some(self.position),
2227 })?;
2228 Ok(ASTNode::new(
2229 ASTNodeType::Literal(LiteralValue::Number(value)),
2230 Some(token),
2231 ))
2232 }
2233 TokenSubType::Text => {
2234 let mut text = token.value.clone();
2236 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2237 text = text[1..text.len() - 1].to_string();
2238 text = text.replace("\"\"", "\"");
2240 }
2241 Ok(ASTNode::new(
2242 ASTNodeType::Literal(LiteralValue::Text(text)),
2243 Some(token),
2244 ))
2245 }
2246 TokenSubType::Logical => {
2247 let value = token.value.to_uppercase() == "TRUE";
2248 Ok(ASTNode::new(
2249 ASTNodeType::Literal(LiteralValue::Boolean(value)),
2250 Some(token),
2251 ))
2252 }
2253 TokenSubType::Error => {
2254 let error = ExcelError::from_error_string(&token.value);
2255 Ok(ASTNode::new(
2256 ASTNodeType::Literal(LiteralValue::Error(error)),
2257 Some(token),
2258 ))
2259 }
2260 TokenSubType::Range => {
2261 let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
2262 .map_err(|e| ParserError {
2263 message: format!("Invalid reference '{}': {}", token.value, e),
2264 position: Some(self.position),
2265 })?;
2266 Ok(ASTNode::new(
2267 ASTNodeType::Reference {
2268 original: token.value.clone(),
2269 reference,
2270 },
2271 Some(token),
2272 ))
2273 }
2274 _ => Err(ParserError {
2275 message: format!("Unexpected operand subtype: {:?}", token.subtype),
2276 position: Some(self.position),
2277 }),
2278 }
2279 }
2280
2281 fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
2282 let name = func_token.value[..func_token.value.len() - 1].to_string();
2283 let args = self.parse_function_arguments()?;
2284 let this_is_volatile = self
2286 .volatility_classifier
2287 .as_ref()
2288 .map(|f| f(name.as_str()))
2289 .unwrap_or(false);
2290 let args_volatile = args.iter().any(|a| a.contains_volatile);
2291
2292 Ok(ASTNode::new_with_volatile(
2293 ASTNodeType::Function { name, args },
2294 Some(func_token),
2295 this_is_volatile || args_volatile,
2296 ))
2297 }
2298
2299 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2301 let mut args = Vec::new();
2302
2303 if self.position < self.tokens.len()
2305 && self.tokens[self.position].token_type == TokenType::Func
2306 && self.tokens[self.position].subtype == TokenSubType::Close
2307 {
2308 self.position += 1;
2309 return Ok(args);
2310 }
2311
2312 if self.position < self.tokens.len()
2315 && self.tokens[self.position].token_type == TokenType::Sep
2316 && self.tokens[self.position].subtype == TokenSubType::Arg
2317 {
2318 args.push(ASTNode::new(
2320 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2321 None,
2322 ));
2323 self.position += 1;
2324 } else {
2325 args.push(self.parse_expression()?);
2327 }
2328
2329 while self.position < self.tokens.len() {
2331 let token = &self.tokens[self.position];
2332
2333 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2334 self.position += 1;
2335 if self.position < self.tokens.len() {
2337 let next_token = &self.tokens[self.position];
2338 if next_token.token_type == TokenType::Sep
2339 && next_token.subtype == TokenSubType::Arg
2340 {
2341 args.push(ASTNode::new(
2343 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2344 None,
2345 ));
2346 } else if next_token.token_type == TokenType::Func
2347 && next_token.subtype == TokenSubType::Close
2348 {
2349 args.push(ASTNode::new(
2351 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2352 None,
2353 ));
2354 self.position += 1;
2355 break;
2356 } else {
2357 args.push(self.parse_expression()?);
2358 }
2359 } else {
2360 args.push(ASTNode::new(
2362 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2363 None,
2364 ));
2365 }
2366 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2367 self.position += 1;
2368 break;
2369 } else {
2370 return Err(ParserError {
2371 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2372 position: Some(self.position),
2373 });
2374 }
2375 }
2376
2377 Ok(args)
2378 }
2379
2380 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2381 let mut rows = Vec::new();
2382 let mut current_row = Vec::new();
2383
2384 if self.position < self.tokens.len()
2386 && self.tokens[self.position].token_type == TokenType::Array
2387 && self.tokens[self.position].subtype == TokenSubType::Close
2388 {
2389 self.position += 1;
2390 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2391 }
2392
2393 current_row.push(self.parse_expression()?);
2395
2396 while self.position < self.tokens.len() {
2397 let token = &self.tokens[self.position];
2398
2399 if token.token_type == TokenType::Sep {
2400 if token.subtype == TokenSubType::Arg {
2401 self.position += 1;
2403 current_row.push(self.parse_expression()?);
2404 } else if token.subtype == TokenSubType::Row {
2405 self.position += 1;
2407 rows.push(current_row);
2408 current_row = vec![self.parse_expression()?];
2409 }
2410 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2411 self.position += 1;
2412 rows.push(current_row);
2413 break;
2414 } else {
2415 return Err(ParserError {
2416 message: format!("Unexpected token in array: {token:?}"),
2417 position: Some(self.position),
2418 });
2419 }
2420 }
2421
2422 let contains_volatile = rows
2424 .iter()
2425 .flat_map(|r| r.iter())
2426 .any(|n| n.contains_volatile);
2427 Ok(ASTNode::new_with_volatile(
2428 ASTNodeType::Array(rows),
2429 None,
2430 contains_volatile,
2431 ))
2432 }
2433}
2434
2435impl From<TokenizerError> for ParserError {
2436 fn from(err: TokenizerError) -> Self {
2437 ParserError {
2438 message: err.message,
2439 position: Some(err.pos),
2440 }
2441 }
2442}
2443
2444struct SpanParser<'a> {
2445 source: &'a str,
2446 tokens: &'a [crate::tokenizer::TokenSpan],
2447 position: usize,
2448 volatility_classifier: Option<VolatilityClassifierBox>,
2449 dialect: FormulaDialect,
2450}
2451
2452impl<'a> SpanParser<'a> {
2453 fn new(
2454 source: &'a str,
2455 tokens: &'a [crate::tokenizer::TokenSpan],
2456 dialect: FormulaDialect,
2457 ) -> Self {
2458 SpanParser {
2459 source,
2460 tokens,
2461 position: 0,
2462 volatility_classifier: None,
2463 dialect,
2464 }
2465 }
2466
2467 fn with_volatility_classifier<F>(mut self, f: F) -> Self
2468 where
2469 F: Fn(&str) -> bool + Send + Sync + 'static,
2470 {
2471 self.volatility_classifier = Some(Box::new(f));
2472 self
2473 }
2474
2475 fn skip_whitespace(&mut self) {
2476 while self.position < self.tokens.len()
2477 && self.tokens[self.position].token_type == TokenType::Whitespace
2478 {
2479 self.position += 1;
2480 }
2481 }
2482
2483 fn span_value(&self, span: &crate::tokenizer::TokenSpan) -> &str {
2484 &self.source[span.start..span.end]
2485 }
2486
2487 fn span_to_token(&self, span: &crate::tokenizer::TokenSpan) -> Token {
2488 Token::new_with_span(
2489 self.span_value(span).to_string(),
2490 span.token_type,
2491 span.subtype,
2492 span.start,
2493 span.end,
2494 )
2495 }
2496
2497 fn span_precedence(&self, span: &crate::tokenizer::TokenSpan) -> Option<(u8, Associativity)> {
2498 if !matches!(
2499 span.token_type,
2500 TokenType::OpPrefix | TokenType::OpInfix | TokenType::OpPostfix
2501 ) {
2502 return None;
2503 }
2504
2505 let op = if span.token_type == TokenType::OpPrefix {
2506 "u"
2507 } else {
2508 self.span_value(span)
2509 };
2510
2511 match op {
2512 ":" | " " | "," => Some((8, Associativity::Left)),
2513 "%" => Some((7, Associativity::Left)),
2514 "u" => Some((6, Associativity::Right)),
2515 "^" => Some((5, Associativity::Right)),
2516 "*" | "/" => Some((4, Associativity::Left)),
2517 "+" | "-" => Some((3, Associativity::Left)),
2518 "&" => Some((2, Associativity::Left)),
2519 "=" | "<" | ">" | "<=" | ">=" | "<>" => Some((1, Associativity::Left)),
2520 _ => None,
2521 }
2522 }
2523
2524 fn parse(&mut self) -> Result<ASTNode, ParserError> {
2525 if self.tokens.is_empty() {
2526 return Err(ParserError {
2527 message: "No tokens to parse".to_string(),
2528 position: None,
2529 });
2530 }
2531
2532 self.skip_whitespace();
2533 if self.position >= self.tokens.len() {
2534 return Err(ParserError {
2535 message: "No tokens to parse".to_string(),
2536 position: None,
2537 });
2538 }
2539
2540 if self.tokens[self.position].token_type == TokenType::Literal {
2541 let span = self.tokens[self.position];
2542 self.position += 1;
2543 self.skip_whitespace();
2544 if self.position < self.tokens.len() {
2545 return Err(ParserError {
2546 message: format!(
2547 "Unexpected token at position {}: {:?}",
2548 self.position, self.tokens[self.position]
2549 ),
2550 position: Some(self.position),
2551 });
2552 }
2553
2554 let token = self.span_to_token(&span);
2555 return Ok(ASTNode::new(
2556 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2557 Some(token),
2558 ));
2559 }
2560
2561 let ast = self.parse_expression()?;
2562 self.skip_whitespace();
2563 if self.position < self.tokens.len() {
2564 return Err(ParserError {
2565 message: format!(
2566 "Unexpected token at position {}: {:?}",
2567 self.position, self.tokens[self.position]
2568 ),
2569 position: Some(self.position),
2570 });
2571 }
2572 Ok(ast)
2573 }
2574
2575 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2576 self.parse_bp(0)
2577 }
2578
2579 fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2580 let mut left = self.parse_prefix()?;
2581
2582 loop {
2583 self.skip_whitespace();
2584 if self.position >= self.tokens.len() {
2585 break;
2586 }
2587
2588 if self.tokens[self.position].token_type == TokenType::OpPostfix {
2589 let (precedence, _) = self
2590 .span_precedence(&self.tokens[self.position])
2591 .unwrap_or((0, Associativity::Left));
2592 if precedence < min_precedence {
2593 break;
2594 }
2595
2596 let op_span = self.tokens[self.position];
2597 self.position += 1;
2598 let op_token = self.span_to_token(&op_span);
2599 let contains_volatile = left.contains_volatile;
2600 left = ASTNode::new_with_volatile(
2601 ASTNodeType::UnaryOp {
2602 op: op_token.value.clone(),
2603 expr: Box::new(left),
2604 },
2605 Some(op_token),
2606 contains_volatile,
2607 );
2608 continue;
2609 }
2610
2611 let token = &self.tokens[self.position];
2612 if token.token_type != TokenType::OpInfix {
2613 break;
2614 }
2615
2616 let (precedence, associativity) = self
2617 .span_precedence(token)
2618 .unwrap_or((0, Associativity::Left));
2619 if precedence < min_precedence {
2620 break;
2621 }
2622
2623 let op_span = self.tokens[self.position];
2624 self.position += 1;
2625
2626 let next_min_precedence = if associativity == Associativity::Left {
2627 precedence + 1
2628 } else {
2629 precedence
2630 };
2631
2632 let right = self.parse_bp(next_min_precedence)?;
2633 let op_token = self.span_to_token(&op_span);
2634 let contains_volatile = left.contains_volatile || right.contains_volatile;
2635 left = ASTNode::new_with_volatile(
2636 ASTNodeType::BinaryOp {
2637 op: op_token.value.clone(),
2638 left: Box::new(left),
2639 right: Box::new(right),
2640 },
2641 Some(op_token),
2642 contains_volatile,
2643 );
2644 }
2645
2646 Ok(left)
2647 }
2648
2649 fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2650 self.skip_whitespace();
2651 if self.position < self.tokens.len()
2652 && self.tokens[self.position].token_type == TokenType::OpPrefix
2653 {
2654 let op_span = self.tokens[self.position];
2655 self.position += 1;
2656
2657 let (precedence, _) = self
2658 .span_precedence(&op_span)
2659 .unwrap_or((0, Associativity::Right));
2660
2661 let expr = self.parse_bp(precedence)?;
2662 let op_token = self.span_to_token(&op_span);
2663 let contains_volatile = expr.contains_volatile;
2664 return Ok(ASTNode::new_with_volatile(
2665 ASTNodeType::UnaryOp {
2666 op: op_token.value.clone(),
2667 expr: Box::new(expr),
2668 },
2669 Some(op_token),
2670 contains_volatile,
2671 ));
2672 }
2673
2674 self.parse_primary()
2675 }
2676
2677 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2678 self.skip_whitespace();
2679 if self.position >= self.tokens.len() {
2680 return Err(ParserError {
2681 message: "Unexpected end of tokens".to_string(),
2682 position: Some(self.position),
2683 });
2684 }
2685
2686 let token = &self.tokens[self.position];
2687 match token.token_type {
2688 TokenType::Operand => {
2689 let span = self.tokens[self.position];
2690 self.position += 1;
2691 self.parse_operand(span)
2692 }
2693 TokenType::Func => {
2694 let span = self.tokens[self.position];
2695 self.position += 1;
2696 self.parse_function(span)
2697 }
2698 TokenType::Paren if token.subtype == TokenSubType::Open => {
2699 self.position += 1;
2700 let expr = self.parse_expression()?;
2701 self.skip_whitespace();
2702 if self.position >= self.tokens.len()
2703 || self.tokens[self.position].token_type != TokenType::Paren
2704 || self.tokens[self.position].subtype != TokenSubType::Close
2705 {
2706 return Err(ParserError {
2707 message: "Expected closing parenthesis".to_string(),
2708 position: Some(self.position),
2709 });
2710 }
2711 self.position += 1;
2712 Ok(expr)
2713 }
2714 TokenType::Array if token.subtype == TokenSubType::Open => {
2715 self.position += 1;
2716 self.parse_array()
2717 }
2718 _ => Err(ParserError {
2719 message: format!("Unexpected token: {token:?}"),
2720 position: Some(self.position),
2721 }),
2722 }
2723 }
2724
2725 fn parse_operand(&mut self, span: crate::tokenizer::TokenSpan) -> Result<ASTNode, ParserError> {
2726 let value = self.span_value(&span);
2727 let token = self.span_to_token(&span);
2728
2729 match span.subtype {
2730 TokenSubType::Number => {
2731 let value = value.parse::<f64>().map_err(|_| ParserError {
2732 message: format!("Invalid number: {value}"),
2733 position: Some(self.position),
2734 })?;
2735 Ok(ASTNode::new(
2736 ASTNodeType::Literal(LiteralValue::Number(value)),
2737 Some(token),
2738 ))
2739 }
2740 TokenSubType::Text => {
2741 let mut text = value.to_string();
2742 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2743 text = text[1..text.len() - 1].to_string();
2744 text = text.replace("\"\"", "\"");
2745 }
2746 Ok(ASTNode::new(
2747 ASTNodeType::Literal(LiteralValue::Text(text)),
2748 Some(token),
2749 ))
2750 }
2751 TokenSubType::Logical => {
2752 let v = value.to_uppercase() == "TRUE";
2753 Ok(ASTNode::new(
2754 ASTNodeType::Literal(LiteralValue::Boolean(v)),
2755 Some(token),
2756 ))
2757 }
2758 TokenSubType::Error => {
2759 let error = ExcelError::from_error_string(value);
2760 Ok(ASTNode::new(
2761 ASTNodeType::Literal(LiteralValue::Error(error)),
2762 Some(token),
2763 ))
2764 }
2765 TokenSubType::Range => {
2766 let reference = ReferenceType::from_string_with_dialect(value, self.dialect)
2767 .map_err(|e| ParserError {
2768 message: format!("Invalid reference '{value}': {e}"),
2769 position: Some(self.position),
2770 })?;
2771 Ok(ASTNode::new(
2772 ASTNodeType::Reference {
2773 original: value.to_string(),
2774 reference,
2775 },
2776 Some(token),
2777 ))
2778 }
2779 _ => Err(ParserError {
2780 message: format!("Unexpected operand subtype: {:?}", span.subtype),
2781 position: Some(self.position),
2782 }),
2783 }
2784 }
2785
2786 fn parse_function(
2787 &mut self,
2788 func_span: crate::tokenizer::TokenSpan,
2789 ) -> Result<ASTNode, ParserError> {
2790 let func_value = self.span_value(&func_span);
2791 if func_value.is_empty() {
2792 return Err(ParserError {
2793 message: "Invalid function token".to_string(),
2794 position: Some(self.position),
2795 });
2796 }
2797 let name = func_value[..func_value.len() - 1].to_string();
2798 let args = self.parse_function_arguments()?;
2799
2800 let this_is_volatile = self
2801 .volatility_classifier
2802 .as_ref()
2803 .map(|f| f(name.as_str()))
2804 .unwrap_or(false);
2805 let args_volatile = args.iter().any(|a| a.contains_volatile);
2806
2807 let func_token = self.span_to_token(&func_span);
2808 Ok(ASTNode::new_with_volatile(
2809 ASTNodeType::Function { name, args },
2810 Some(func_token),
2811 this_is_volatile || args_volatile,
2812 ))
2813 }
2814
2815 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2816 let mut args = Vec::new();
2817
2818 self.skip_whitespace();
2819 if self.position < self.tokens.len()
2820 && self.tokens[self.position].token_type == TokenType::Func
2821 && self.tokens[self.position].subtype == TokenSubType::Close
2822 {
2823 self.position += 1;
2824 return Ok(args);
2825 }
2826
2827 self.skip_whitespace();
2828 if self.position < self.tokens.len()
2829 && self.tokens[self.position].token_type == TokenType::Sep
2830 && self.tokens[self.position].subtype == TokenSubType::Arg
2831 {
2832 args.push(ASTNode::new(
2833 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2834 None,
2835 ));
2836 self.position += 1;
2837 } else {
2838 args.push(self.parse_expression()?);
2839 }
2840
2841 while self.position < self.tokens.len() {
2842 self.skip_whitespace();
2843 if self.position >= self.tokens.len() {
2844 break;
2845 }
2846
2847 let token = &self.tokens[self.position];
2848 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2849 self.position += 1;
2850 self.skip_whitespace();
2851 if self.position < self.tokens.len() {
2852 let next_token = &self.tokens[self.position];
2853 if next_token.token_type == TokenType::Sep
2854 && next_token.subtype == TokenSubType::Arg
2855 {
2856 args.push(ASTNode::new(
2857 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2858 None,
2859 ));
2860 } else if next_token.token_type == TokenType::Func
2861 && next_token.subtype == TokenSubType::Close
2862 {
2863 args.push(ASTNode::new(
2864 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2865 None,
2866 ));
2867 self.position += 1;
2868 break;
2869 } else {
2870 args.push(self.parse_expression()?);
2871 }
2872 } else {
2873 args.push(ASTNode::new(
2874 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2875 None,
2876 ));
2877 }
2878 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2879 self.position += 1;
2880 break;
2881 } else {
2882 return Err(ParserError {
2883 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2884 position: Some(self.position),
2885 });
2886 }
2887 }
2888
2889 Ok(args)
2890 }
2891
2892 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2893 let mut rows = Vec::new();
2894 let mut current_row = Vec::new();
2895
2896 self.skip_whitespace();
2897 if self.position < self.tokens.len()
2898 && self.tokens[self.position].token_type == TokenType::Array
2899 && self.tokens[self.position].subtype == TokenSubType::Close
2900 {
2901 self.position += 1;
2902 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2903 }
2904
2905 current_row.push(self.parse_expression()?);
2906
2907 while self.position < self.tokens.len() {
2908 self.skip_whitespace();
2909 if self.position >= self.tokens.len() {
2910 break;
2911 }
2912 let token = &self.tokens[self.position];
2913
2914 if token.token_type == TokenType::Sep {
2915 if token.subtype == TokenSubType::Arg {
2916 self.position += 1;
2917 current_row.push(self.parse_expression()?);
2918 } else if token.subtype == TokenSubType::Row {
2919 self.position += 1;
2920 rows.push(current_row);
2921 current_row = vec![self.parse_expression()?];
2922 }
2923 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2924 self.position += 1;
2925 rows.push(current_row);
2926 break;
2927 } else {
2928 return Err(ParserError {
2929 message: format!("Unexpected token in array: {token:?}"),
2930 position: Some(self.position),
2931 });
2932 }
2933 }
2934
2935 let contains_volatile = rows
2936 .iter()
2937 .flat_map(|r| r.iter())
2938 .any(|n| n.contains_volatile);
2939
2940 Ok(ASTNode::new_with_volatile(
2941 ASTNodeType::Array(rows),
2942 None,
2943 contains_volatile,
2944 ))
2945 }
2946}
2947
2948pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
2950 let ref_type = ReferenceType::from_string(reference)?;
2951 Ok(ref_type.to_string())
2952}
2953
2954pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
2955 parse_with_dialect(formula, FormulaDialect::Excel)
2956}
2957
2958pub fn parse_with_dialect<T: AsRef<str>>(
2959 formula: T,
2960 dialect: FormulaDialect,
2961) -> Result<ASTNode, ParserError> {
2962 let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2963 let mut parser = SpanParser::new(formula.as_ref(), &spans, dialect);
2964 parser.parse()
2965}
2966
2967pub fn parse_with_volatility_classifier<T, F>(
2970 formula: T,
2971 classifier: F,
2972) -> Result<ASTNode, ParserError>
2973where
2974 T: AsRef<str>,
2975 F: Fn(&str) -> bool + Send + Sync + 'static,
2976{
2977 parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
2978}
2979
2980pub fn parse_with_dialect_and_volatility_classifier<T, F>(
2981 formula: T,
2982 dialect: FormulaDialect,
2983 classifier: F,
2984) -> Result<ASTNode, ParserError>
2985where
2986 T: AsRef<str>,
2987 F: Fn(&str) -> bool + Send + Sync + 'static,
2988{
2989 let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2990 let mut parser =
2991 SpanParser::new(formula.as_ref(), &spans, dialect).with_volatility_classifier(classifier);
2992 parser.parse()
2993}
2994
2995pub struct BatchParser {
3000 include_whitespace: bool,
3001 volatility_classifier: Option<VolatilityClassifierArc>,
3002 token_cache: std::collections::HashMap<String, Arc<[crate::tokenizer::TokenSpan]>>, dialect: FormulaDialect,
3004}
3005
3006impl BatchParser {
3007 pub fn builder() -> BatchParserBuilder {
3008 BatchParserBuilder::default()
3009 }
3010
3011 pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
3013 let spans = if let Some(tokens) = self.token_cache.get(formula) {
3014 Arc::clone(tokens)
3015 } else {
3016 let mut spans = crate::tokenizer::tokenize_spans_with_dialect(formula, self.dialect)?;
3017 if !self.include_whitespace {
3018 spans.retain(|t| t.token_type != TokenType::Whitespace);
3019 }
3020
3021 let spans: Arc<[crate::tokenizer::TokenSpan]> = Arc::from(spans.into_boxed_slice());
3022 self.token_cache
3023 .insert(formula.to_string(), Arc::clone(&spans));
3024 spans
3025 };
3026
3027 let mut parser = SpanParser::new(formula, spans.as_ref(), self.dialect);
3028 if let Some(classifier) = self.volatility_classifier.clone() {
3029 parser = parser.with_volatility_classifier(move |name| classifier(name));
3030 }
3031 parser.parse()
3032 }
3033}
3034
3035#[derive(Default)]
3036pub struct BatchParserBuilder {
3037 include_whitespace: bool,
3038 volatility_classifier: Option<VolatilityClassifierArc>,
3039 dialect: FormulaDialect,
3040}
3041
3042impl BatchParserBuilder {
3043 pub fn include_whitespace(mut self, include: bool) -> Self {
3044 self.include_whitespace = include;
3045 self
3046 }
3047
3048 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
3049 where
3050 F: Fn(&str) -> bool + Send + Sync + 'static,
3051 {
3052 self.volatility_classifier = Some(Arc::new(f));
3053 self
3054 }
3055
3056 pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
3057 self.dialect = dialect;
3058 self
3059 }
3060
3061 pub fn build(self) -> BatchParser {
3062 BatchParser {
3063 include_whitespace: self.include_whitespace,
3064 volatility_classifier: self.volatility_classifier,
3065 token_cache: std::collections::HashMap::new(),
3066 dialect: self.dialect,
3067 }
3068 }
3069}