1use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
2use crate::types::{FormulaDialect, ParsingError};
3use crate::{ExcelError, LiteralValue};
4
5#[cfg(feature = "serde")]
6use serde::{Deserialize, Serialize};
7
8use crate::hasher::FormulaHasher;
9use formualizer_common::coord::{
10 col_index_from_letters_1based, col_letters_from_1based, parse_a1_1based,
11};
12use formualizer_common::{
13 AxisBound, RelativeCoord, SheetCellRef, SheetLocator, SheetRangeRef, SheetRef,
14};
15use once_cell::sync::Lazy;
16use smallvec::SmallVec;
17use std::error::Error;
18use std::fmt::{self, Display};
19use std::hash::{Hash, Hasher};
20use std::str::FromStr;
21use std::sync::Arc;
22
23type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
24type VolatilityClassifierBox = Box<VolatilityFn>;
25type VolatilityClassifierArc = Arc<VolatilityFn>;
26
27#[derive(Debug)]
29pub struct ParserError {
30 pub message: String,
31 pub position: Option<usize>,
32}
33
34impl Display for ParserError {
35 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36 if let Some(pos) = self.position {
37 write!(f, "ParserError at position {}: {}", pos, self.message)
38 } else {
39 write!(f, "ParserError: {}", self.message)
40 }
41 }
42}
43
44impl Error for ParserError {}
45
46static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
48 let mut cols = Vec::with_capacity(702);
49 for c in b'A'..=b'Z' {
51 cols.push(String::from(c as char));
52 }
53 for c1 in b'A'..=b'Z' {
55 for c2 in b'A'..=b'Z' {
56 cols.push(format!("{}{}", c1 as char, c2 as char));
57 }
58 }
59 cols
60});
61
62#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
64#[derive(Debug, Clone, PartialEq, Hash)]
65pub enum TableSpecifier {
66 All,
68 Data,
70 Headers,
72 Totals,
74 Row(TableRowSpecifier),
76 Column(String),
78 ColumnRange(String, String),
80 SpecialItem(SpecialItem),
82 Combination(Vec<Box<TableSpecifier>>),
84}
85
86#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
88#[derive(Debug, Clone, PartialEq, Hash)]
89pub enum TableRowSpecifier {
90 Current,
92 All,
94 Data,
96 Headers,
98 Totals,
100 Index(u32),
102}
103
104#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
106#[derive(Debug, Clone, PartialEq, Hash)]
107pub enum SpecialItem {
108 Headers,
110 Data,
112 Totals,
114 All,
116 ThisRow,
118}
119
120#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
122#[derive(Debug, Clone, PartialEq, Hash)]
123pub struct TableReference {
124 pub name: String,
126 pub specifier: Option<TableSpecifier>,
128}
129
130#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
131#[derive(Debug, Clone, PartialEq, Hash)]
132pub enum ExternalBookRef {
133 Token(String),
134}
135
136impl ExternalBookRef {
137 pub fn token(&self) -> &str {
138 match self {
139 ExternalBookRef::Token(s) => s,
140 }
141 }
142}
143
144#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
146pub enum ExternalRefKind {
147 Cell {
148 row: u32,
149 col: u32,
150 row_abs: bool,
151 col_abs: bool,
152 },
153 Range {
154 start_row: Option<u32>,
155 start_col: Option<u32>,
156 end_row: Option<u32>,
157 end_col: Option<u32>,
158 start_row_abs: bool,
159 start_col_abs: bool,
160 end_row_abs: bool,
161 end_col_abs: bool,
162 },
163}
164
165impl ExternalRefKind {
166 pub fn cell(row: u32, col: u32) -> Self {
167 Self::Cell {
168 row,
169 col,
170 row_abs: false,
171 col_abs: false,
172 }
173 }
174
175 pub fn cell_with_abs(row: u32, col: u32, row_abs: bool, col_abs: bool) -> Self {
176 Self::Cell {
177 row,
178 col,
179 row_abs,
180 col_abs,
181 }
182 }
183
184 pub fn range(
185 start_row: Option<u32>,
186 start_col: Option<u32>,
187 end_row: Option<u32>,
188 end_col: Option<u32>,
189 ) -> Self {
190 Self::Range {
191 start_row,
192 start_col,
193 end_row,
194 end_col,
195 start_row_abs: false,
196 start_col_abs: false,
197 end_row_abs: false,
198 end_col_abs: false,
199 }
200 }
201
202 pub fn range_with_abs(
203 start_row: Option<u32>,
204 start_col: Option<u32>,
205 end_row: Option<u32>,
206 end_col: Option<u32>,
207 start_row_abs: bool,
208 start_col_abs: bool,
209 end_row_abs: bool,
210 end_col_abs: bool,
211 ) -> Self {
212 Self::Range {
213 start_row,
214 start_col,
215 end_row,
216 end_col,
217 start_row_abs,
218 start_col_abs,
219 end_row_abs,
220 end_col_abs,
221 }
222 }
223}
224
225#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
226#[derive(Debug, Clone, PartialEq, Hash)]
227pub struct ExternalReference {
228 pub raw: String,
229 pub book: ExternalBookRef,
230 pub sheet: String,
231 pub kind: ExternalRefKind,
232}
233
234#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
236#[derive(Debug, Clone, PartialEq, Hash)]
237pub enum ReferenceType {
238 Cell {
239 sheet: Option<String>,
240 row: u32,
241 col: u32,
242 row_abs: bool,
243 col_abs: bool,
244 },
245 Range {
246 sheet: Option<String>,
247 start_row: Option<u32>,
248 start_col: Option<u32>,
249 end_row: Option<u32>,
250 end_col: Option<u32>,
251 start_row_abs: bool,
252 start_col_abs: bool,
253 end_row_abs: bool,
254 end_col_abs: bool,
255 },
256 External(ExternalReference),
257 Table(TableReference),
258 NamedRange(String),
259}
260
261impl Display for TableSpecifier {
262 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263 match self {
264 TableSpecifier::All => write!(f, "#All"),
265 TableSpecifier::Data => write!(f, "#Data"),
266 TableSpecifier::Headers => write!(f, "#Headers"),
267 TableSpecifier::Totals => write!(f, "#Totals"),
268 TableSpecifier::Row(row) => write!(f, "{row}"),
269 TableSpecifier::Column(column) => write!(f, "{column}"),
270 TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
271 TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
272 TableSpecifier::Combination(specs) => {
273 let parts: Vec<String> = specs.iter().map(|s| format!("[{s}]")).collect();
276 write!(f, "{}", parts.join(","))
277 }
278 }
279 }
280}
281
282impl Display for TableRowSpecifier {
283 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
284 match self {
285 TableRowSpecifier::Current => write!(f, "@"),
286 TableRowSpecifier::All => write!(f, "#All"),
287 TableRowSpecifier::Data => write!(f, "#Data"),
288 TableRowSpecifier::Headers => write!(f, "#Headers"),
289 TableRowSpecifier::Totals => write!(f, "#Totals"),
290 TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
291 }
292 }
293}
294
295impl Display for SpecialItem {
296 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
297 match self {
298 SpecialItem::Headers => write!(f, "#Headers"),
299 SpecialItem::Data => write!(f, "#Data"),
300 SpecialItem::Totals => write!(f, "#Totals"),
301 SpecialItem::All => write!(f, "#All"),
302 SpecialItem::ThisRow => write!(f, "@"),
303 }
304 }
305}
306
307fn sheet_name_needs_quoting(name: &str) -> bool {
309 if name.is_empty() {
310 return false;
311 }
312
313 let bytes = name.as_bytes();
314
315 if bytes[0].is_ascii_digit() {
317 return true;
318 }
319
320 for &byte in bytes {
323 match byte {
324 b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
325 | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
326 | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
327 _ => {}
328 }
329 }
330
331 let upper = name.to_uppercase();
333 matches!(
334 upper.as_str(),
335 "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
336 )
337}
338
339#[derive(Debug, Clone)]
340struct OpenFormulaRefPart {
341 sheet: Option<String>,
342 coord: String,
343}
344
345impl ReferenceType {
346 pub fn cell(sheet: Option<String>, row: u32, col: u32) -> Self {
348 Self::Cell {
349 sheet,
350 row,
351 col,
352 row_abs: false,
353 col_abs: false,
354 }
355 }
356
357 pub fn cell_with_abs(
359 sheet: Option<String>,
360 row: u32,
361 col: u32,
362 row_abs: bool,
363 col_abs: bool,
364 ) -> Self {
365 Self::Cell {
366 sheet,
367 row,
368 col,
369 row_abs,
370 col_abs,
371 }
372 }
373
374 pub fn range(
376 sheet: Option<String>,
377 start_row: Option<u32>,
378 start_col: Option<u32>,
379 end_row: Option<u32>,
380 end_col: Option<u32>,
381 ) -> Self {
382 Self::Range {
383 sheet,
384 start_row,
385 start_col,
386 end_row,
387 end_col,
388 start_row_abs: false,
389 start_col_abs: false,
390 end_row_abs: false,
391 end_col_abs: false,
392 }
393 }
394
395 pub fn range_with_abs(
397 sheet: Option<String>,
398 start_row: Option<u32>,
399 start_col: Option<u32>,
400 end_row: Option<u32>,
401 end_col: Option<u32>,
402 start_row_abs: bool,
403 start_col_abs: bool,
404 end_row_abs: bool,
405 end_col_abs: bool,
406 ) -> Self {
407 Self::Range {
408 sheet,
409 start_row,
410 start_col,
411 end_row,
412 end_col,
413 start_row_abs,
414 start_col_abs,
415 end_row_abs,
416 end_col_abs,
417 }
418 }
419
420 pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
422 Self::parse_excel_reference(reference)
423 }
424
425 pub fn from_string_with_dialect(
427 reference: &str,
428 dialect: FormulaDialect,
429 ) -> Result<Self, ParsingError> {
430 match dialect {
431 FormulaDialect::Excel => Self::parse_excel_reference(reference),
432 FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
433 .or_else(|_| Self::parse_excel_reference(reference)),
434 }
435 }
436
437 pub fn parse_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
441 Self::parse_sheet_ref_with_dialect(reference, FormulaDialect::Excel)
442 }
443
444 pub fn parse_sheet_ref_with_dialect(
446 reference: &str,
447 dialect: FormulaDialect,
448 ) -> Result<SheetRef<'static>, ParsingError> {
449 match dialect {
450 FormulaDialect::Excel => Self::parse_excel_sheet_ref(reference),
451 FormulaDialect::OpenFormula => Self::parse_openformula_sheet_ref(reference)
452 .or_else(|_| Self::parse_excel_sheet_ref(reference)),
453 }
454 }
455
456 pub fn to_sheet_ref_lossy(&self) -> Option<SheetRef<'_>> {
459 match self {
460 ReferenceType::Cell {
461 sheet,
462 row,
463 col,
464 row_abs,
465 col_abs,
466 } => {
467 let row0 = row.checked_sub(1)?;
468 let col0 = col.checked_sub(1)?;
469 let sheet_loc = match sheet.as_deref() {
470 Some(name) => SheetLocator::from_name(name),
471 None => SheetLocator::Current,
472 };
473 let coord = RelativeCoord::new(row0, col0, *row_abs, *col_abs);
474 Some(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
475 }
476 ReferenceType::Range {
477 sheet,
478 start_row,
479 start_col,
480 end_row,
481 end_col,
482 start_row_abs,
483 start_col_abs,
484 end_row_abs,
485 end_col_abs,
486 } => {
487 let sheet_loc = match sheet.as_deref() {
488 Some(name) => SheetLocator::from_name(name),
489 None => SheetLocator::Current,
490 };
491 let sr = start_row
492 .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_row_abs)));
493 if start_row.is_some() && sr.is_none() {
494 return None;
495 }
496 let sc = start_col
497 .and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *start_col_abs)));
498 if start_col.is_some() && sc.is_none() {
499 return None;
500 }
501 let er =
502 end_row.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_row_abs)));
503 if end_row.is_some() && er.is_none() {
504 return None;
505 }
506 let ec =
507 end_col.and_then(|v| v.checked_sub(1).map(|i| AxisBound::new(i, *end_col_abs)));
508 if end_col.is_some() && ec.is_none() {
509 return None;
510 }
511 let range = SheetRangeRef::from_parts(sheet_loc, sr, sc, er, ec).ok()?;
512 Some(SheetRef::Range(range))
513 }
514 _ => None,
515 }
516 }
517
518 fn parse_excel_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
519 let (sheet, ref_part) = Self::extract_sheet_name(reference);
520
521 if ref_part.contains('[') {
522 return Err(ParsingError::InvalidReference(
523 "Table references are not supported for SheetRef".to_string(),
524 ));
525 }
526
527 let sheet_loc: SheetLocator<'static> = match sheet {
528 Some(name) => SheetLocator::from_name(name),
529 None => SheetLocator::Current,
530 };
531
532 if ref_part.contains(':') {
533 let mut parts = ref_part.splitn(2, ':');
534 let start = parts.next().unwrap();
535 let end = parts.next().ok_or_else(|| {
536 ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
537 })?;
538
539 let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
540 let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
541
542 let start_col = Self::axis_bound_from_1based(start_col)?;
543 let start_row = Self::axis_bound_from_1based(start_row)?;
544 let end_col = Self::axis_bound_from_1based(end_col)?;
545 let end_row = Self::axis_bound_from_1based(end_row)?;
546
547 let range =
548 SheetRangeRef::from_parts(sheet_loc, start_row, start_col, end_row, end_col)
549 .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
550 Ok(SheetRef::Range(range))
551 } else {
552 let (row, col, row_abs, col_abs) = parse_a1_1based(&ref_part)
553 .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
554 let coord = RelativeCoord::new(row - 1, col - 1, row_abs, col_abs);
555 Ok(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
556 }
557 }
558
559 fn parse_openformula_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
560 Self::parse_excel_sheet_ref(reference)
561 }
562
563 fn axis_bound_from_1based(
564 bound: Option<(u32, bool)>,
565 ) -> Result<Option<AxisBound>, ParsingError> {
566 match bound {
567 Some((index, abs)) => AxisBound::from_excel_1based(index, abs)
568 .map(Some)
569 .map_err(|err| ParsingError::InvalidReference(err.to_string())),
570 None => Ok(None),
571 }
572 }
573
574 fn parse_range_part_with_abs(
575 part: &str,
576 ) -> Result<(Option<(u32, bool)>, Option<(u32, bool)>), ParsingError> {
577 if let Ok((row, col, row_abs, col_abs)) = parse_a1_1based(part) {
578 return Ok((Some((col, col_abs)), Some((row, row_abs))));
579 }
580
581 let bytes = part.as_bytes();
582 let len = bytes.len();
583 let mut i = 0usize;
584
585 let mut col_abs = false;
586 let mut row_abs = false;
587
588 if i < len && bytes[i] == b'$' {
589 col_abs = true;
590 i += 1;
591 }
592
593 let col_start = i;
594 while i < len && bytes[i].is_ascii_alphabetic() {
595 i += 1;
596 }
597
598 if i > col_start {
599 let col_str = &part[col_start..i];
600 let col1 = Self::column_to_number(col_str)?;
601
602 if i == len {
603 return Ok((Some((col1, col_abs)), None));
604 }
605
606 if i < len && bytes[i] == b'$' {
607 row_abs = true;
608 i += 1;
609 }
610
611 if i >= len {
612 return Err(ParsingError::InvalidReference(format!(
613 "Invalid range part: {part}"
614 )));
615 }
616
617 let row_start = i;
618 while i < len && bytes[i].is_ascii_digit() {
619 i += 1;
620 }
621
622 if row_start == i || i != len {
623 return Err(ParsingError::InvalidReference(format!(
624 "Invalid range part: {part}"
625 )));
626 }
627
628 let row_str = &part[row_start..i];
629 let row1 = row_str
630 .parse::<u32>()
631 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
632 if row1 == 0 {
633 return Err(ParsingError::InvalidReference(format!(
634 "Invalid range part: {part}"
635 )));
636 }
637
638 return Ok((Some((col1, col_abs)), Some((row1, row_abs))));
639 }
640
641 i = 0;
642 if i < len && bytes[i] == b'$' {
643 row_abs = true;
644 i += 1;
645 }
646
647 let row_start = i;
648 while i < len && bytes[i].is_ascii_digit() {
649 i += 1;
650 }
651
652 if row_start == i || i != len {
653 return Err(ParsingError::InvalidReference(format!(
654 "Invalid range part: {part}"
655 )));
656 }
657
658 let row_str = &part[row_start..i];
659 let row1 = row_str
660 .parse::<u32>()
661 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
662 if row1 == 0 {
663 return Err(ParsingError::InvalidReference(format!(
664 "Invalid range part: {part}"
665 )));
666 }
667
668 Ok((None, Some((row1, row_abs))))
669 }
670
671 fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
672 let (sheet, ref_part) = Self::extract_sheet_name(reference);
674
675 if ref_part.contains('[') {
678 return Self::parse_table_reference(&ref_part);
679 }
680
681 let external_sheet = sheet.as_deref().and_then(|s| {
682 let lb = s.rfind('[')?;
686 let rb_rel = s[lb..].find(']')?;
687 let rb = lb + rb_rel;
688 if lb >= rb {
689 return None;
690 }
691
692 let token = &s[..=rb];
693 let sheet_name = &s[rb + 1..];
694 if sheet_name.is_empty() {
695 None
696 } else {
697 Some((token, sheet_name))
698 }
699 });
700
701 if ref_part.contains(':') {
702 let mut parts = ref_part.splitn(2, ':');
704 let start = parts.next().unwrap();
705 let end = parts.next().ok_or_else(|| {
706 ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
707 })?;
708 let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
709 let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
710
711 let split = |bound: Option<(u32, bool)>| match bound {
712 Some((index, abs)) => (Some(index), abs),
713 None => (None, false),
714 };
715 let (start_col, start_col_abs) = split(start_col);
716 let (start_row, start_row_abs) = split(start_row);
717 let (end_col, end_col_abs) = split(end_col);
718 let (end_row, end_row_abs) = split(end_row);
719
720 if let Some((book_token, sheet_name)) = external_sheet {
721 Ok(ReferenceType::External(ExternalReference {
722 raw: reference.to_string(),
723 book: ExternalBookRef::Token(book_token.to_string()),
724 sheet: sheet_name.to_string(),
725 kind: ExternalRefKind::Range {
726 start_row,
727 start_col,
728 end_row,
729 end_col,
730 start_row_abs,
731 start_col_abs,
732 end_row_abs,
733 end_col_abs,
734 },
735 }))
736 } else {
737 Ok(ReferenceType::Range {
738 sheet,
739 start_row,
740 start_col,
741 end_row,
742 end_col,
743 start_row_abs,
744 start_col_abs,
745 end_row_abs,
746 end_col_abs,
747 })
748 }
749 } else {
750 match Self::parse_cell_reference(&ref_part) {
752 Ok((col, row, col_abs, row_abs)) => {
753 if let Some((book_token, sheet_name)) = external_sheet {
754 Ok(ReferenceType::External(ExternalReference {
755 raw: reference.to_string(),
756 book: ExternalBookRef::Token(book_token.to_string()),
757 sheet: sheet_name.to_string(),
758 kind: ExternalRefKind::Cell {
759 row,
760 col,
761 row_abs,
762 col_abs,
763 },
764 }))
765 } else {
766 Ok(ReferenceType::Cell {
767 sheet,
768 row,
769 col,
770 row_abs,
771 col_abs,
772 })
773 }
774 }
775 Err(_) => {
776 Ok(ReferenceType::NamedRange(reference.to_string()))
778 }
779 }
780 }
781 }
782
783 fn parse_cell_reference(reference: &str) -> Result<(u32, u32, bool, bool), ParsingError> {
785 parse_a1_1based(reference)
786 .map(|(row, col, row_abs, col_abs)| (col, row, col_abs, row_abs))
787 .map_err(|_| {
788 ParsingError::InvalidReference(format!("Invalid cell reference: {reference}"))
789 })
790 }
791
792 pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
794 col_index_from_letters_1based(column)
795 .map_err(|_| ParsingError::InvalidReference(format!("Invalid column: {column}")))
796 }
797
798 pub(crate) fn number_to_column(num: u32) -> String {
800 if num == 0 {
801 return String::new();
802 }
803 if num > 0 && num <= 702 {
805 return COLUMN_LOOKUP[(num - 1) as usize].clone();
806 }
807
808 col_letters_from_1based(num).unwrap_or_default()
809 }
810
811 fn format_col(col: u32, abs: bool) -> String {
812 if abs {
813 format!("${}", Self::number_to_column(col))
814 } else {
815 Self::number_to_column(col)
816 }
817 }
818
819 fn format_row(row: u32, abs: bool) -> String {
820 if abs {
821 format!("${row}")
822 } else {
823 row.to_string()
824 }
825 }
826}
827
828impl Display for ReferenceType {
829 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
830 write!(
831 f,
832 "{}",
833 match self {
834 ReferenceType::Cell {
835 sheet,
836 row,
837 col,
838 row_abs,
839 col_abs,
840 } => {
841 let col_str = Self::format_col(*col, *col_abs);
842 let row_str = Self::format_row(*row, *row_abs);
843
844 if let Some(sheet_name) = sheet {
845 if sheet_name_needs_quoting(sheet_name) {
846 let escaped_name = sheet_name.replace('\'', "''");
848 format!("'{escaped_name}'!{col_str}{row_str}")
849 } else {
850 format!("{sheet_name}!{col_str}{row_str}")
851 }
852 } else {
853 format!("{col_str}{row_str}")
854 }
855 }
856 ReferenceType::Range {
857 sheet,
858 start_row,
859 start_col,
860 end_row,
861 end_col,
862 start_row_abs,
863 start_col_abs,
864 end_row_abs,
865 end_col_abs,
866 } => {
867 let start_ref = match (start_col, start_row) {
869 (Some(col), Some(row)) => format!(
870 "{}{}",
871 Self::format_col(*col, *start_col_abs),
872 Self::format_row(*row, *start_row_abs)
873 ),
874 (Some(col), None) => Self::format_col(*col, *start_col_abs),
875 (None, Some(row)) => Self::format_row(*row, *start_row_abs),
876 (None, None) => "".to_string(), };
878
879 let end_ref = match (end_col, end_row) {
881 (Some(col), Some(row)) => format!(
882 "{}{}",
883 Self::format_col(*col, *end_col_abs),
884 Self::format_row(*row, *end_row_abs)
885 ),
886 (Some(col), None) => Self::format_col(*col, *end_col_abs),
887 (None, Some(row)) => Self::format_row(*row, *end_row_abs),
888 (None, None) => "".to_string(), };
890
891 let range_part = format!("{start_ref}:{end_ref}");
892
893 if let Some(sheet_name) = sheet {
894 if sheet_name_needs_quoting(sheet_name) {
895 let escaped_name = sheet_name.replace('\'', "''");
897 format!("'{escaped_name}'!{range_part}")
898 } else {
899 format!("{sheet_name}!{range_part}")
900 }
901 } else {
902 range_part
903 }
904 }
905 ReferenceType::External(ext) => ext.raw.clone(),
906 ReferenceType::Table(table_ref) => {
907 if let Some(specifier) = &table_ref.specifier {
908 match specifier {
911 TableSpecifier::Column(column) => {
912 format!("{}[{}]", table_ref.name, column.trim())
913 }
914 TableSpecifier::ColumnRange(start, end) => {
915 format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
916 }
917 _ => {
918 format!("{}[{}]", table_ref.name, specifier)
920 }
921 }
922 } else {
923 table_ref.name.clone()
924 }
925 }
926 ReferenceType::NamedRange(name) => name.clone(),
927 }
928 )
929 }
930}
931
932impl TryFrom<&str> for ReferenceType {
933 type Error = ParsingError;
934
935 fn try_from(value: &str) -> Result<Self, Self::Error> {
936 ReferenceType::from_string(value)
937 }
938}
939
940impl FromStr for ReferenceType {
941 type Err = ParsingError;
942
943 fn from_str(s: &str) -> Result<Self, Self::Err> {
944 ReferenceType::from_string(s)
945 }
946}
947
948impl ReferenceType {
949 pub fn normalise(&self) -> String {
951 format!("{self}")
952 }
953
954 fn extract_sheet_name(reference: &str) -> (Option<String>, String) {
956 let bytes = reference.as_bytes();
957 let mut i = 0;
958
959 if i < bytes.len() && bytes[i] == b'\'' {
963 i += 1;
964 let start = i;
965
966 while i < bytes.len() {
967 if bytes[i] == b'\'' {
968 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
970 i += 2;
971 continue;
972 }
973
974 if i + 1 < bytes.len() && bytes[i + 1] == b'!' {
976 let raw = &reference[start..i];
977 let sheet = raw.replace("''", "'");
978 let ref_part = String::from(&reference[i + 2..]);
979 return (Some(sheet), ref_part);
980 }
981 }
982
983 i += 1;
984 }
985 }
986
987 i = 0;
989 while i < bytes.len() {
990 if bytes[i] == b'!' && i > 0 {
991 let sheet = String::from(&reference[0..i]);
992 let ref_part = String::from(&reference[i + 1..]);
993 return (Some(sheet), ref_part);
994 }
995 i += 1;
996 }
997
998 (None, reference.to_string())
999 }
1000
1001 fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
1003 if let Some(bracket_pos) = reference.find('[') {
1005 let table_name = reference[..bracket_pos].trim();
1006 if table_name.is_empty() {
1007 return Err(ParsingError::InvalidReference(reference.to_string()));
1008 }
1009
1010 let specifier_str = &reference[bracket_pos..];
1011 let specifier = Self::parse_table_specifier(specifier_str)?;
1012
1013 Ok(ReferenceType::Table(TableReference {
1014 name: table_name.to_string(),
1015 specifier,
1016 }))
1017 } else {
1018 Err(ParsingError::InvalidReference(reference.to_string()))
1019 }
1020 }
1021
1022 fn parse_table_specifier(specifier_str: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1024 if specifier_str.is_empty() || !specifier_str.starts_with('[') {
1025 return Ok(None);
1026 }
1027
1028 let mut depth = 0;
1030 let mut end_pos = 0;
1031
1032 for (i, c) in specifier_str.chars().enumerate() {
1033 if c == '[' {
1034 depth += 1;
1035 } else if c == ']' {
1036 depth -= 1;
1037 if depth == 0 {
1038 end_pos = i;
1039 break;
1040 }
1041 }
1042 }
1043
1044 if depth != 0 || end_pos == 0 {
1045 return Err(ParsingError::InvalidReference(format!(
1046 "Unbalanced brackets in table specifier: {specifier_str}"
1047 )));
1048 }
1049
1050 let content = &specifier_str[1..end_pos];
1052
1053 if content.is_empty() {
1055 return Ok(Some(TableSpecifier::All));
1057 }
1058
1059 if content.starts_with("#") {
1061 return Self::parse_special_item(content);
1062 }
1063
1064 if !content.contains('[') && !content.contains('#') {
1066 if let Some(colon_pos) = content.find(':') {
1068 let start = content[..colon_pos].trim();
1069 let end = content[colon_pos + 1..].trim();
1070 return Ok(Some(TableSpecifier::ColumnRange(
1071 start.to_string(),
1072 end.to_string(),
1073 )));
1074 } else {
1075 return Ok(Some(TableSpecifier::Column(content.trim().to_string())));
1077 }
1078 }
1079
1080 if content.contains('[') {
1082 return Self::parse_complex_table_specifier(content);
1083 }
1084
1085 Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1087 }
1088
1089 fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
1090 if reference.starts_with('[') && reference.ends_with(']') {
1091 let inner = &reference[1..reference.len() - 1];
1092 if inner.is_empty() {
1093 return Err(ParsingError::InvalidReference(
1094 "Empty OpenFormula reference".to_string(),
1095 ));
1096 }
1097
1098 let mut parts = inner.splitn(2, ':');
1099 let start_part_str = parts.next().unwrap();
1100 let end_part_str = parts.next();
1101
1102 let start_part = Self::parse_openformula_part(start_part_str)?;
1103 let end_part = if let Some(part) = end_part_str {
1104 Some(Self::parse_openformula_part(part)?)
1105 } else {
1106 None
1107 };
1108
1109 let sheet = match (&start_part.sheet, &end_part) {
1110 (Some(sheet), Some(end)) => {
1111 if let Some(end_sheet) = &end.sheet
1112 && end_sheet != sheet
1113 {
1114 return Err(ParsingError::InvalidReference(format!(
1115 "Mismatched sheets in reference: {sheet} vs {end_sheet}"
1116 )));
1117 }
1118 Some(sheet.clone())
1119 }
1120 (Some(sheet), None) => Some(sheet.clone()),
1121 (None, Some(end)) => end.sheet.clone(),
1122 (None, None) => None,
1123 };
1124
1125 let mut excel_like = String::new();
1126 if let Some(sheet_name) = sheet {
1127 if sheet_name_needs_quoting(&sheet_name) {
1128 let escaped = sheet_name.replace('\'', "''");
1129 excel_like.push('\'');
1130 excel_like.push_str(&escaped);
1131 excel_like.push('\'');
1132 } else {
1133 excel_like.push_str(&sheet_name);
1134 }
1135 excel_like.push('!');
1136 }
1137
1138 excel_like.push_str(&start_part.coord);
1139 if let Some(end) = end_part {
1140 excel_like.push(':');
1141 excel_like.push_str(&end.coord);
1142 }
1143
1144 return Self::parse_excel_reference(&excel_like);
1145 }
1146
1147 Err(ParsingError::InvalidReference(format!(
1148 "Unsupported OpenFormula reference: {reference}"
1149 )))
1150 }
1151
1152 fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
1153 let trimmed = part.trim();
1154 if trimmed.is_empty() {
1155 return Err(ParsingError::InvalidReference(
1156 "Empty component in OpenFormula reference".to_string(),
1157 ));
1158 }
1159
1160 if trimmed == "." {
1161 return Err(ParsingError::InvalidReference(
1162 "Incomplete OpenFormula reference component".to_string(),
1163 ));
1164 }
1165
1166 if trimmed.starts_with('[') {
1167 return Err(ParsingError::InvalidReference(format!(
1169 "Unexpected '[' in OpenFormula reference component: {trimmed}"
1170 )));
1171 }
1172
1173 let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
1174 (None, stripped.trim())
1175 } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
1176 let sheet_part = trimmed[..dot_idx].trim();
1177 let coord_part = trimmed[dot_idx + 1..].trim();
1178 if coord_part.is_empty() {
1179 return Err(ParsingError::InvalidReference(format!(
1180 "Missing coordinate in OpenFormula reference component: {trimmed}"
1181 )));
1182 }
1183 let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
1184 (Some(sheet_name), coord_part)
1185 } else {
1186 (None, trimmed)
1187 };
1188
1189 let coord = coord_slice.trim_start_matches('.').trim().to_string();
1190
1191 if coord.is_empty() {
1192 return Err(ParsingError::InvalidReference(format!(
1193 "Missing coordinate in OpenFormula reference component: {trimmed}"
1194 )));
1195 }
1196
1197 Ok(OpenFormulaRefPart { sheet, coord })
1198 }
1199
1200 fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
1201 let without_abs = sheet.trim().trim_start_matches('$');
1202
1203 if without_abs.starts_with('\'') {
1204 if without_abs.len() < 2 || !without_abs.ends_with('\'') {
1205 return Err(ParsingError::InvalidReference(format!(
1206 "Unterminated sheet name in OpenFormula reference: {sheet}"
1207 )));
1208 }
1209 let inner = &without_abs[1..without_abs.len() - 1];
1210 Ok(inner.replace("''", "'"))
1211 } else {
1212 Ok(without_abs.to_string())
1213 }
1214 }
1215
1216 fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
1217 let bytes = part.as_bytes();
1218 let mut i = 0;
1219 let mut in_quotes = false;
1220
1221 while i < bytes.len() {
1222 match bytes[i] {
1223 b'\'' => {
1224 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
1225 i += 2;
1226 continue;
1227 }
1228 in_quotes = !in_quotes;
1229 i += 1;
1230 }
1231 b'.' if !in_quotes => return Some(i),
1232 _ => i += 1,
1233 }
1234 }
1235
1236 None
1237 }
1238
1239 fn parse_special_item(content: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1241 match content {
1242 "#All" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::All))),
1243 "#Headers" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Headers))),
1244 "#Data" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Data))),
1245 "#Totals" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Totals))),
1246 "@" => Ok(Some(TableSpecifier::Row(TableRowSpecifier::Current))),
1247 _ => Err(ParsingError::InvalidReference(format!(
1248 "Unknown special item: {content}"
1249 ))),
1250 }
1251 }
1252
1253 fn parse_complex_table_specifier(
1255 content: &str,
1256 ) -> Result<Option<TableSpecifier>, ParsingError> {
1257 if content.contains("[#Headers]")
1262 || content.contains("[#All]")
1263 || content.contains("[#Data]")
1264 || content.contains("[#Totals]")
1265 || content.contains("[@]")
1266 {
1267 let mut specifiers = Vec::new();
1270
1271 if content.contains("[#Headers]") {
1273 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Headers)));
1274 }
1275 if content.contains("[#Data]") {
1276 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Data)));
1277 }
1278 if content.contains("[#Totals]") {
1279 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Totals)));
1280 }
1281 if content.contains("[#All]") {
1282 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::All)));
1283 }
1284
1285 if !specifiers.is_empty() {
1286 return Ok(Some(TableSpecifier::Combination(specifiers)));
1287 }
1288 }
1289
1290 Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1292 }
1293
1294 pub fn to_excel_string(&self) -> String {
1296 match self {
1297 ReferenceType::Cell {
1298 sheet,
1299 row,
1300 col,
1301 row_abs,
1302 col_abs,
1303 } => {
1304 let col_str = Self::format_col(*col, *col_abs);
1305 let row_str = Self::format_row(*row, *row_abs);
1306 if let Some(s) = sheet {
1307 if sheet_name_needs_quoting(s) {
1308 let escaped_name = s.replace('\'', "''");
1309 format!("'{}'!{}{}", escaped_name, col_str, row_str)
1310 } else {
1311 format!("{}!{}{}", s, col_str, row_str)
1312 }
1313 } else {
1314 format!("{}{}", col_str, row_str)
1315 }
1316 }
1317 ReferenceType::Range {
1318 sheet,
1319 start_row,
1320 start_col,
1321 end_row,
1322 end_col,
1323 start_row_abs,
1324 start_col_abs,
1325 end_row_abs,
1326 end_col_abs,
1327 } => {
1328 let start_ref = match (start_col, start_row) {
1330 (Some(col), Some(row)) => format!(
1331 "{}{}",
1332 Self::format_col(*col, *start_col_abs),
1333 Self::format_row(*row, *start_row_abs)
1334 ),
1335 (Some(col), None) => Self::format_col(*col, *start_col_abs),
1336 (None, Some(row)) => Self::format_row(*row, *start_row_abs),
1337 (None, None) => "".to_string(), };
1339
1340 let end_ref = match (end_col, end_row) {
1342 (Some(col), Some(row)) => format!(
1343 "{}{}",
1344 Self::format_col(*col, *end_col_abs),
1345 Self::format_row(*row, *end_row_abs)
1346 ),
1347 (Some(col), None) => Self::format_col(*col, *end_col_abs),
1348 (None, Some(row)) => Self::format_row(*row, *end_row_abs),
1349 (None, None) => "".to_string(), };
1351
1352 let range_part = format!("{start_ref}:{end_ref}");
1353
1354 if let Some(s) = sheet {
1355 if sheet_name_needs_quoting(s) {
1356 let escaped_name = s.replace('\'', "''");
1357 format!("'{escaped_name}'!{range_part}")
1358 } else {
1359 format!("{s}!{range_part}")
1360 }
1361 } else {
1362 range_part
1363 }
1364 }
1365 ReferenceType::External(ext) => ext.raw.clone(),
1366 ReferenceType::Table(table_ref) => {
1367 if let Some(specifier) = &table_ref.specifier {
1368 format!("{}[{}]", table_ref.name, specifier)
1369 } else {
1370 table_ref.name.clone()
1371 }
1372 }
1373 ReferenceType::NamedRange(name) => name.clone(),
1374 }
1375 }
1376}
1377
1378#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1380#[derive(Debug, Clone, PartialEq, Hash)]
1381pub enum ASTNodeType {
1382 Literal(LiteralValue),
1383 Reference {
1384 original: String, reference: ReferenceType, },
1387 UnaryOp {
1388 op: String,
1389 expr: Box<ASTNode>,
1390 },
1391 BinaryOp {
1392 op: String,
1393 left: Box<ASTNode>,
1394 right: Box<ASTNode>,
1395 },
1396 Function {
1397 name: String,
1398 args: Vec<ASTNode>, },
1400 Array(Vec<Vec<ASTNode>>), }
1402
1403impl Display for ASTNodeType {
1404 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1405 match self {
1406 ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
1407 ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
1408 ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
1409 ASTNodeType::BinaryOp { op, left, right } => {
1410 write!(f, "BinaryOp({op}, {left}, {right})")
1411 }
1412 ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
1413 ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
1414 }
1415 }
1416}
1417
1418#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1420#[derive(Debug, Clone, PartialEq)]
1421pub struct ASTNode {
1422 pub node_type: ASTNodeType,
1423 pub source_token: Option<Token>,
1424 pub contains_volatile: bool,
1429}
1430
1431impl ASTNode {
1432 pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1433 ASTNode {
1434 node_type,
1435 source_token,
1436 contains_volatile: false,
1437 }
1438 }
1439
1440 pub fn new_with_volatile(
1442 node_type: ASTNodeType,
1443 source_token: Option<Token>,
1444 contains_volatile: bool,
1445 ) -> Self {
1446 ASTNode {
1447 node_type,
1448 source_token,
1449 contains_volatile,
1450 }
1451 }
1452
1453 pub fn contains_volatile(&self) -> bool {
1455 self.contains_volatile
1456 }
1457
1458 pub fn fingerprint(&self) -> u64 {
1459 self.calculate_hash()
1460 }
1461
1462 pub fn calculate_hash(&self) -> u64 {
1464 let mut hasher = FormulaHasher::new();
1465 self.hash_node(&mut hasher);
1466 hasher.finish()
1467 }
1468
1469 fn hash_node(&self, hasher: &mut FormulaHasher) {
1470 match &self.node_type {
1471 ASTNodeType::Literal(value) => {
1472 hasher.write(&[1]); value.hash(hasher);
1474 }
1475 ASTNodeType::Reference { reference, .. } => {
1476 hasher.write(&[2]); reference.hash(hasher);
1478 }
1479 ASTNodeType::UnaryOp { op, expr } => {
1480 hasher.write(&[3]); hasher.write(op.as_bytes());
1482 expr.hash_node(hasher);
1483 }
1484 ASTNodeType::BinaryOp { op, left, right } => {
1485 hasher.write(&[4]); hasher.write(op.as_bytes());
1487 left.hash_node(hasher);
1488 right.hash_node(hasher);
1489 }
1490 ASTNodeType::Function { name, args } => {
1491 hasher.write(&[5]); let name_lower = name.to_lowercase();
1494 hasher.write(name_lower.as_bytes());
1495 hasher.write_usize(args.len());
1496 for arg in args {
1497 arg.hash_node(hasher);
1498 }
1499 }
1500 ASTNodeType::Array(rows) => {
1501 hasher.write(&[6]); hasher.write_usize(rows.len());
1503 for row in rows {
1504 hasher.write_usize(row.len());
1505 for item in row {
1506 item.hash_node(hasher);
1507 }
1508 }
1509 }
1510 }
1511 }
1512
1513 pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1514 let mut dependencies = Vec::new();
1515 self.collect_dependencies(&mut dependencies);
1516 dependencies
1517 }
1518
1519 pub fn get_dependency_strings(&self) -> Vec<String> {
1520 self.get_dependencies()
1521 .into_iter()
1522 .map(|dep| format!("{dep}"))
1523 .collect()
1524 }
1525
1526 fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1527 match &self.node_type {
1528 ASTNodeType::Reference { reference, .. } => {
1529 dependencies.push(reference);
1530 }
1531 ASTNodeType::UnaryOp { expr, .. } => {
1532 expr.collect_dependencies(dependencies);
1533 }
1534 ASTNodeType::BinaryOp { left, right, .. } => {
1535 left.collect_dependencies(dependencies);
1536 right.collect_dependencies(dependencies);
1537 }
1538 ASTNodeType::Function { args, .. } => {
1539 for arg in args {
1540 arg.collect_dependencies(dependencies);
1541 }
1542 }
1543 ASTNodeType::Array(rows) => {
1544 for row in rows {
1545 for item in row {
1546 item.collect_dependencies(dependencies);
1547 }
1548 }
1549 }
1550 _ => {}
1551 }
1552 }
1553
1554 pub fn refs(&self) -> RefIter<'_> {
1557 RefIter {
1558 stack: smallvec::smallvec![self],
1559 }
1560 }
1561
1562 pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1564 let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1565 stack.push(self);
1566 while let Some(node) = stack.pop() {
1567 match &node.node_type {
1568 ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1569 ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1570 ASTNodeType::BinaryOp { left, right, .. } => {
1571 stack.push(right);
1573 stack.push(left);
1574 }
1575 ASTNodeType::Function { args, .. } => {
1576 for a in args.iter().rev() {
1577 stack.push(a);
1578 }
1579 }
1580 ASTNodeType::Array(rows) => {
1581 for r in rows.iter().rev() {
1582 for item in r.iter().rev() {
1583 stack.push(item);
1584 }
1585 }
1586 }
1587 ASTNodeType::Literal(_) => {}
1588 }
1589 }
1590 }
1591
1592 pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1594 let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1595 self.visit_refs(|rv| match rv {
1596 RefView::Cell {
1597 sheet,
1598 row,
1599 col,
1600 row_abs,
1601 col_abs,
1602 } => out.push(ReferenceType::Cell {
1603 sheet: sheet.map(|s| s.to_string()),
1604 row,
1605 col,
1606 row_abs,
1607 col_abs,
1608 }),
1609 RefView::Range {
1610 sheet,
1611 start_row,
1612 start_col,
1613 end_row,
1614 end_col,
1615 start_row_abs,
1616 start_col_abs,
1617 end_row_abs,
1618 end_col_abs,
1619 } => {
1620 if policy.expand_small_ranges
1622 && let (Some(sr), Some(sc), Some(er), Some(ec)) =
1623 (start_row, start_col, end_row, end_col)
1624 {
1625 let rows = er.saturating_sub(sr) + 1;
1626 let cols = ec.saturating_sub(sc) + 1;
1627 let area = rows.saturating_mul(cols);
1628 if area as usize <= policy.range_expansion_limit {
1629 let row_abs = start_row_abs && end_row_abs;
1630 let col_abs = start_col_abs && end_col_abs;
1631 for r in sr..=er {
1632 for c in sc..=ec {
1633 out.push(ReferenceType::Cell {
1634 sheet: sheet.map(|s| s.to_string()),
1635 row: r,
1636 col: c,
1637 row_abs,
1638 col_abs,
1639 });
1640 }
1641 }
1642 return; }
1644 }
1645 out.push(ReferenceType::Range {
1646 sheet: sheet.map(|s| s.to_string()),
1647 start_row,
1648 start_col,
1649 end_row,
1650 end_col,
1651 start_row_abs,
1652 start_col_abs,
1653 end_row_abs,
1654 end_col_abs,
1655 });
1656 }
1657 RefView::External {
1658 raw,
1659 book,
1660 sheet,
1661 kind,
1662 } => out.push(ReferenceType::External(ExternalReference {
1663 raw: raw.to_string(),
1664 book: ExternalBookRef::Token(book.to_string()),
1665 sheet: sheet.to_string(),
1666 kind,
1667 })),
1668 RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
1669 name: name.to_string(),
1670 specifier: specifier.cloned(),
1671 })),
1672 RefView::NamedRange { name } => {
1673 if policy.include_names {
1674 out.push(ReferenceType::NamedRange(name.to_string()));
1675 }
1676 }
1677 });
1678 out
1679 }
1680}
1681
1682#[derive(Clone, Copy, Debug)]
1684pub enum RefView<'a> {
1685 Cell {
1686 sheet: Option<&'a str>,
1687 row: u32,
1688 col: u32,
1689 row_abs: bool,
1690 col_abs: bool,
1691 },
1692 Range {
1693 sheet: Option<&'a str>,
1694 start_row: Option<u32>,
1695 start_col: Option<u32>,
1696 end_row: Option<u32>,
1697 end_col: Option<u32>,
1698 start_row_abs: bool,
1699 start_col_abs: bool,
1700 end_row_abs: bool,
1701 end_col_abs: bool,
1702 },
1703 External {
1704 raw: &'a str,
1705 book: &'a str,
1706 sheet: &'a str,
1707 kind: ExternalRefKind,
1708 },
1709 Table {
1710 name: &'a str,
1711 specifier: Option<&'a TableSpecifier>,
1712 },
1713 NamedRange {
1714 name: &'a str,
1715 },
1716}
1717
1718impl<'a> From<&'a ReferenceType> for RefView<'a> {
1719 fn from(r: &'a ReferenceType) -> Self {
1720 match r {
1721 ReferenceType::Cell {
1722 sheet,
1723 row,
1724 col,
1725 row_abs,
1726 col_abs,
1727 } => RefView::Cell {
1728 sheet: sheet.as_deref(),
1729 row: *row,
1730 col: *col,
1731 row_abs: *row_abs,
1732 col_abs: *col_abs,
1733 },
1734 ReferenceType::Range {
1735 sheet,
1736 start_row,
1737 start_col,
1738 end_row,
1739 end_col,
1740 start_row_abs,
1741 start_col_abs,
1742 end_row_abs,
1743 end_col_abs,
1744 } => RefView::Range {
1745 sheet: sheet.as_deref(),
1746 start_row: *start_row,
1747 start_col: *start_col,
1748 end_row: *end_row,
1749 end_col: *end_col,
1750 start_row_abs: *start_row_abs,
1751 start_col_abs: *start_col_abs,
1752 end_row_abs: *end_row_abs,
1753 end_col_abs: *end_col_abs,
1754 },
1755 ReferenceType::External(ext) => RefView::External {
1756 raw: ext.raw.as_str(),
1757 book: ext.book.token(),
1758 sheet: ext.sheet.as_str(),
1759 kind: ext.kind,
1760 },
1761 ReferenceType::Table(tr) => RefView::Table {
1762 name: tr.name.as_str(),
1763 specifier: tr.specifier.as_ref(),
1764 },
1765 ReferenceType::NamedRange(name) => RefView::NamedRange { name },
1766 }
1767 }
1768}
1769
1770pub struct RefIter<'a> {
1772 stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
1773}
1774
1775impl<'a> Iterator for RefIter<'a> {
1776 type Item = RefView<'a>;
1777 fn next(&mut self) -> Option<Self::Item> {
1778 while let Some(node) = self.stack.pop() {
1779 match &node.node_type {
1780 ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
1781 ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
1782 ASTNodeType::BinaryOp { left, right, .. } => {
1783 self.stack.push(right);
1784 self.stack.push(left);
1785 }
1786 ASTNodeType::Function { args, .. } => {
1787 for a in args.iter().rev() {
1788 self.stack.push(a);
1789 }
1790 }
1791 ASTNodeType::Array(rows) => {
1792 for r in rows.iter().rev() {
1793 for item in r.iter().rev() {
1794 self.stack.push(item);
1795 }
1796 }
1797 }
1798 ASTNodeType::Literal(_) => {}
1799 }
1800 }
1801 None
1802 }
1803}
1804
1805#[derive(Debug, Clone)]
1807pub struct CollectPolicy {
1808 pub expand_small_ranges: bool,
1809 pub range_expansion_limit: usize,
1810 pub include_names: bool,
1811}
1812
1813impl Default for CollectPolicy {
1814 fn default() -> Self {
1815 Self {
1816 expand_small_ranges: false,
1817 range_expansion_limit: 0,
1818 include_names: true,
1819 }
1820 }
1821}
1822
1823impl Display for ASTNode {
1824 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1825 write!(f, "{}", self.node_type)
1826 }
1827}
1828
1829impl std::hash::Hash for ASTNode {
1830 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1831 let hash = self.calculate_hash();
1832 state.write_u64(hash);
1833 }
1834}
1835
1836pub struct Parser {
1838 tokens: Arc<[Token]>,
1839 position: usize,
1840 volatility_classifier: Option<VolatilityClassifierBox>,
1842 dialect: FormulaDialect,
1843}
1844
1845impl TryFrom<&str> for Parser {
1846 type Error = TokenizerError;
1847
1848 fn try_from(formula: &str) -> Result<Self, Self::Error> {
1849 let tokens = Tokenizer::new(formula)?.items;
1850 Ok(Self::new(tokens, false))
1851 }
1852}
1853
1854impl TryFrom<String> for Parser {
1855 type Error = TokenizerError;
1856
1857 fn try_from(formula: String) -> Result<Self, Self::Error> {
1858 Self::try_from(formula.as_str())
1859 }
1860}
1861
1862impl Parser {
1863 pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
1864 Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
1865 }
1866
1867 pub fn new_with_dialect(
1868 mut tokens: Vec<Token>,
1869 include_whitespace: bool,
1870 dialect: FormulaDialect,
1871 ) -> Self {
1872 if !include_whitespace {
1873 tokens.retain(|t| t.token_type != TokenType::Whitespace);
1874 }
1875
1876 Parser {
1877 tokens: Arc::from(tokens.into_boxed_slice()),
1878 position: 0,
1879 volatility_classifier: None,
1880 dialect,
1881 }
1882 }
1883
1884 pub fn try_from_formula(formula: &str) -> Result<Self, TokenizerError> {
1885 let tokens = Tokenizer::new(formula)?.items;
1886 Ok(Self::new(tokens, false))
1887 }
1888
1889 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1892 where
1893 F: Fn(&str) -> bool + Send + Sync + 'static,
1894 {
1895 self.volatility_classifier = Some(Box::new(f));
1896 self
1897 }
1898
1899 pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
1901 where
1902 F: Fn(&str) -> bool + Send + Sync + 'static,
1903 {
1904 Self::new(tokens, include_whitespace).with_volatility_classifier(f)
1905 }
1906
1907 pub fn new_with_classifier_and_dialect<F>(
1908 tokens: Vec<Token>,
1909 include_whitespace: bool,
1910 dialect: FormulaDialect,
1911 f: F,
1912 ) -> Self
1913 where
1914 F: Fn(&str) -> bool + Send + Sync + 'static,
1915 {
1916 Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
1917 }
1918
1919 fn skip_whitespace(&mut self) {
1920 while self.position < self.tokens.len()
1921 && self.tokens[self.position].token_type == TokenType::Whitespace
1922 {
1923 self.position += 1;
1924 }
1925 }
1926
1927 pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
1929 if self.tokens.is_empty() {
1930 return Err(ParserError {
1931 message: "No tokens to parse".to_string(),
1932 position: None,
1933 });
1934 }
1935
1936 self.skip_whitespace();
1937 if self.position >= self.tokens.len() {
1938 return Err(ParserError {
1939 message: "No tokens to parse".to_string(),
1940 position: None,
1941 });
1942 }
1943
1944 if self.tokens[self.position].token_type == TokenType::Literal {
1946 let token = self.tokens[self.position].clone();
1947 self.position += 1;
1948 self.skip_whitespace();
1949 if self.position < self.tokens.len() {
1950 return Err(ParserError {
1951 message: format!(
1952 "Unexpected token at position {}: {:?}",
1953 self.position, self.tokens[self.position]
1954 ),
1955 position: Some(self.position),
1956 });
1957 }
1958 return Ok(ASTNode::new(
1959 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
1960 Some(token),
1961 ));
1962 }
1963
1964 let ast = self.parse_expression()?;
1965 self.skip_whitespace();
1966 if self.position < self.tokens.len() {
1967 return Err(ParserError {
1968 message: format!(
1969 "Unexpected token at position {}: {:?}",
1970 self.position, self.tokens[self.position]
1971 ),
1972 position: Some(self.position),
1973 });
1974 }
1975 Ok(ast)
1976 }
1977
1978 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
1979 self.parse_bp(0)
1980 }
1981
1982 fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
1985 let mut left = self.parse_prefix()?;
1986
1987 loop {
1988 self.skip_whitespace();
1989 if self.position >= self.tokens.len() {
1990 break;
1991 }
1992
1993 if self.tokens[self.position].token_type == TokenType::OpPostfix {
1995 let (precedence, _) = self.tokens[self.position]
1996 .get_precedence()
1997 .unwrap_or((0, Associativity::Left));
1998 if precedence < min_precedence {
1999 break;
2000 }
2001
2002 let op_token = self.tokens[self.position].clone();
2003 self.position += 1;
2004 let contains_volatile = left.contains_volatile;
2005 left = ASTNode::new_with_volatile(
2006 ASTNodeType::UnaryOp {
2007 op: op_token.value.clone(),
2008 expr: Box::new(left),
2009 },
2010 Some(op_token),
2011 contains_volatile,
2012 );
2013 continue;
2014 }
2015
2016 let token = &self.tokens[self.position];
2017 if token.token_type != TokenType::OpInfix {
2018 break;
2019 }
2020
2021 let (precedence, associativity) =
2022 token.get_precedence().unwrap_or((0, Associativity::Left));
2023 if precedence < min_precedence {
2024 break;
2025 }
2026
2027 let op_token = self.tokens[self.position].clone();
2028 self.position += 1;
2029
2030 let next_min_precedence = if associativity == Associativity::Left {
2031 precedence + 1
2032 } else {
2033 precedence
2034 };
2035
2036 let right = self.parse_bp(next_min_precedence)?;
2037 let contains_volatile = left.contains_volatile || right.contains_volatile;
2038 left = ASTNode::new_with_volatile(
2039 ASTNodeType::BinaryOp {
2040 op: op_token.value.clone(),
2041 left: Box::new(left),
2042 right: Box::new(right),
2043 },
2044 Some(op_token),
2045 contains_volatile,
2046 );
2047 }
2048
2049 Ok(left)
2050 }
2051
2052 fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2053 self.skip_whitespace();
2054 if self.position < self.tokens.len()
2055 && self.tokens[self.position].token_type == TokenType::OpPrefix
2056 {
2057 let op_token = self.tokens[self.position].clone();
2058 self.position += 1;
2059
2060 let (precedence, _) = op_token
2063 .get_precedence()
2064 .unwrap_or((0, Associativity::Right));
2065
2066 let expr = self.parse_bp(precedence)?;
2067 let contains_volatile = expr.contains_volatile;
2068 return Ok(ASTNode::new_with_volatile(
2069 ASTNodeType::UnaryOp {
2070 op: op_token.value.clone(),
2071 expr: Box::new(expr),
2072 },
2073 Some(op_token),
2074 contains_volatile,
2075 ));
2076 }
2077
2078 self.parse_primary()
2079 }
2080
2081 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2082 self.skip_whitespace();
2083 if self.position >= self.tokens.len() {
2084 return Err(ParserError {
2085 message: "Unexpected end of tokens".to_string(),
2086 position: Some(self.position),
2087 });
2088 }
2089
2090 let token = &self.tokens[self.position];
2091 match token.token_type {
2092 TokenType::Operand => {
2093 let operand_token = self.tokens[self.position].clone();
2094 self.position += 1;
2095 self.parse_operand(operand_token)
2096 }
2097 TokenType::Func => {
2098 let func_token = self.tokens[self.position].clone();
2099 self.position += 1;
2100 self.parse_function(func_token)
2101 }
2102 TokenType::Paren if token.subtype == TokenSubType::Open => {
2103 self.position += 1;
2104 let expr = self.parse_expression()?;
2105 if self.position >= self.tokens.len()
2106 || self.tokens[self.position].token_type != TokenType::Paren
2107 || self.tokens[self.position].subtype != TokenSubType::Close
2108 {
2109 return Err(ParserError {
2110 message: "Expected closing parenthesis".to_string(),
2111 position: Some(self.position),
2112 });
2113 }
2114 self.position += 1;
2115 Ok(expr)
2116 }
2117 TokenType::Array if token.subtype == TokenSubType::Open => {
2118 self.position += 1;
2119 self.parse_array()
2120 }
2121 _ => Err(ParserError {
2122 message: format!("Unexpected token: {token:?}"),
2123 position: Some(self.position),
2124 }),
2125 }
2126 }
2127
2128 fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
2129 match token.subtype {
2130 TokenSubType::Number => {
2131 let value = token.value.parse::<f64>().map_err(|_| ParserError {
2132 message: format!("Invalid number: {}", token.value),
2133 position: Some(self.position),
2134 })?;
2135 Ok(ASTNode::new(
2136 ASTNodeType::Literal(LiteralValue::Number(value)),
2137 Some(token),
2138 ))
2139 }
2140 TokenSubType::Text => {
2141 let mut text = token.value.clone();
2143 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2144 text = text[1..text.len() - 1].to_string();
2145 text = text.replace("\"\"", "\"");
2147 }
2148 Ok(ASTNode::new(
2149 ASTNodeType::Literal(LiteralValue::Text(text)),
2150 Some(token),
2151 ))
2152 }
2153 TokenSubType::Logical => {
2154 let value = token.value.to_uppercase() == "TRUE";
2155 Ok(ASTNode::new(
2156 ASTNodeType::Literal(LiteralValue::Boolean(value)),
2157 Some(token),
2158 ))
2159 }
2160 TokenSubType::Error => {
2161 let error = ExcelError::from_error_string(&token.value);
2162 Ok(ASTNode::new(
2163 ASTNodeType::Literal(LiteralValue::Error(error)),
2164 Some(token),
2165 ))
2166 }
2167 TokenSubType::Range => {
2168 let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
2169 .map_err(|e| ParserError {
2170 message: format!("Invalid reference '{}': {}", token.value, e),
2171 position: Some(self.position),
2172 })?;
2173 Ok(ASTNode::new(
2174 ASTNodeType::Reference {
2175 original: token.value.clone(),
2176 reference,
2177 },
2178 Some(token),
2179 ))
2180 }
2181 _ => Err(ParserError {
2182 message: format!("Unexpected operand subtype: {:?}", token.subtype),
2183 position: Some(self.position),
2184 }),
2185 }
2186 }
2187
2188 fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
2189 let name = func_token.value[..func_token.value.len() - 1].to_string();
2190 let args = self.parse_function_arguments()?;
2191 let this_is_volatile = self
2193 .volatility_classifier
2194 .as_ref()
2195 .map(|f| f(name.as_str()))
2196 .unwrap_or(false);
2197 let args_volatile = args.iter().any(|a| a.contains_volatile);
2198
2199 Ok(ASTNode::new_with_volatile(
2200 ASTNodeType::Function { name, args },
2201 Some(func_token),
2202 this_is_volatile || args_volatile,
2203 ))
2204 }
2205
2206 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2208 let mut args = Vec::new();
2209
2210 if self.position < self.tokens.len()
2212 && self.tokens[self.position].token_type == TokenType::Func
2213 && self.tokens[self.position].subtype == TokenSubType::Close
2214 {
2215 self.position += 1;
2216 return Ok(args);
2217 }
2218
2219 if self.position < self.tokens.len()
2222 && self.tokens[self.position].token_type == TokenType::Sep
2223 && self.tokens[self.position].subtype == TokenSubType::Arg
2224 {
2225 args.push(ASTNode::new(
2227 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2228 None,
2229 ));
2230 self.position += 1;
2231 } else {
2232 args.push(self.parse_expression()?);
2234 }
2235
2236 while self.position < self.tokens.len() {
2238 let token = &self.tokens[self.position];
2239
2240 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2241 self.position += 1;
2242 if self.position < self.tokens.len() {
2244 let next_token = &self.tokens[self.position];
2245 if next_token.token_type == TokenType::Sep
2246 && next_token.subtype == TokenSubType::Arg
2247 {
2248 args.push(ASTNode::new(
2250 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2251 None,
2252 ));
2253 } else if next_token.token_type == TokenType::Func
2254 && next_token.subtype == TokenSubType::Close
2255 {
2256 args.push(ASTNode::new(
2258 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2259 None,
2260 ));
2261 self.position += 1;
2262 break;
2263 } else {
2264 args.push(self.parse_expression()?);
2265 }
2266 } else {
2267 args.push(ASTNode::new(
2269 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2270 None,
2271 ));
2272 }
2273 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2274 self.position += 1;
2275 break;
2276 } else {
2277 return Err(ParserError {
2278 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2279 position: Some(self.position),
2280 });
2281 }
2282 }
2283
2284 Ok(args)
2285 }
2286
2287 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2288 let mut rows = Vec::new();
2289 let mut current_row = Vec::new();
2290
2291 if self.position < self.tokens.len()
2293 && self.tokens[self.position].token_type == TokenType::Array
2294 && self.tokens[self.position].subtype == TokenSubType::Close
2295 {
2296 self.position += 1;
2297 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2298 }
2299
2300 current_row.push(self.parse_expression()?);
2302
2303 while self.position < self.tokens.len() {
2304 let token = &self.tokens[self.position];
2305
2306 if token.token_type == TokenType::Sep {
2307 if token.subtype == TokenSubType::Arg {
2308 self.position += 1;
2310 current_row.push(self.parse_expression()?);
2311 } else if token.subtype == TokenSubType::Row {
2312 self.position += 1;
2314 rows.push(current_row);
2315 current_row = vec![self.parse_expression()?];
2316 }
2317 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2318 self.position += 1;
2319 rows.push(current_row);
2320 break;
2321 } else {
2322 return Err(ParserError {
2323 message: format!("Unexpected token in array: {token:?}"),
2324 position: Some(self.position),
2325 });
2326 }
2327 }
2328
2329 let contains_volatile = rows
2331 .iter()
2332 .flat_map(|r| r.iter())
2333 .any(|n| n.contains_volatile);
2334 Ok(ASTNode::new_with_volatile(
2335 ASTNodeType::Array(rows),
2336 None,
2337 contains_volatile,
2338 ))
2339 }
2340}
2341
2342impl From<TokenizerError> for ParserError {
2343 fn from(err: TokenizerError) -> Self {
2344 ParserError {
2345 message: err.message,
2346 position: Some(err.pos),
2347 }
2348 }
2349}
2350
2351struct SpanParser<'a> {
2352 source: &'a str,
2353 tokens: &'a [crate::tokenizer::TokenSpan],
2354 position: usize,
2355 volatility_classifier: Option<VolatilityClassifierBox>,
2356 dialect: FormulaDialect,
2357}
2358
2359impl<'a> SpanParser<'a> {
2360 fn new(
2361 source: &'a str,
2362 tokens: &'a [crate::tokenizer::TokenSpan],
2363 dialect: FormulaDialect,
2364 ) -> Self {
2365 SpanParser {
2366 source,
2367 tokens,
2368 position: 0,
2369 volatility_classifier: None,
2370 dialect,
2371 }
2372 }
2373
2374 fn with_volatility_classifier<F>(mut self, f: F) -> Self
2375 where
2376 F: Fn(&str) -> bool + Send + Sync + 'static,
2377 {
2378 self.volatility_classifier = Some(Box::new(f));
2379 self
2380 }
2381
2382 fn skip_whitespace(&mut self) {
2383 while self.position < self.tokens.len()
2384 && self.tokens[self.position].token_type == TokenType::Whitespace
2385 {
2386 self.position += 1;
2387 }
2388 }
2389
2390 fn span_value(&self, span: &crate::tokenizer::TokenSpan) -> &str {
2391 &self.source[span.start..span.end]
2392 }
2393
2394 fn span_to_token(&self, span: &crate::tokenizer::TokenSpan) -> Token {
2395 Token::new_with_span(
2396 self.span_value(span).to_string(),
2397 span.token_type,
2398 span.subtype,
2399 span.start,
2400 span.end,
2401 )
2402 }
2403
2404 fn span_precedence(&self, span: &crate::tokenizer::TokenSpan) -> Option<(u8, Associativity)> {
2405 if !matches!(
2406 span.token_type,
2407 TokenType::OpPrefix | TokenType::OpInfix | TokenType::OpPostfix
2408 ) {
2409 return None;
2410 }
2411
2412 let op = if span.token_type == TokenType::OpPrefix {
2413 "u"
2414 } else {
2415 self.span_value(span)
2416 };
2417
2418 match op {
2419 ":" | " " | "," => Some((8, Associativity::Left)),
2420 "%" => Some((7, Associativity::Left)),
2421 "^" => Some((6, Associativity::Right)),
2422 "u" => Some((5, Associativity::Right)),
2423 "*" | "/" => Some((4, Associativity::Left)),
2424 "+" | "-" => Some((3, Associativity::Left)),
2425 "&" => Some((2, Associativity::Left)),
2426 "=" | "<" | ">" | "<=" | ">=" | "<>" => Some((1, Associativity::Left)),
2427 _ => None,
2428 }
2429 }
2430
2431 fn parse(&mut self) -> Result<ASTNode, ParserError> {
2432 if self.tokens.is_empty() {
2433 return Err(ParserError {
2434 message: "No tokens to parse".to_string(),
2435 position: None,
2436 });
2437 }
2438
2439 self.skip_whitespace();
2440 if self.position >= self.tokens.len() {
2441 return Err(ParserError {
2442 message: "No tokens to parse".to_string(),
2443 position: None,
2444 });
2445 }
2446
2447 if self.tokens[self.position].token_type == TokenType::Literal {
2448 let span = self.tokens[self.position];
2449 self.position += 1;
2450 self.skip_whitespace();
2451 if self.position < self.tokens.len() {
2452 return Err(ParserError {
2453 message: format!(
2454 "Unexpected token at position {}: {:?}",
2455 self.position, self.tokens[self.position]
2456 ),
2457 position: Some(self.position),
2458 });
2459 }
2460
2461 let token = self.span_to_token(&span);
2462 return Ok(ASTNode::new(
2463 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2464 Some(token),
2465 ));
2466 }
2467
2468 let ast = self.parse_expression()?;
2469 self.skip_whitespace();
2470 if self.position < self.tokens.len() {
2471 return Err(ParserError {
2472 message: format!(
2473 "Unexpected token at position {}: {:?}",
2474 self.position, self.tokens[self.position]
2475 ),
2476 position: Some(self.position),
2477 });
2478 }
2479 Ok(ast)
2480 }
2481
2482 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2483 self.parse_bp(0)
2484 }
2485
2486 fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2487 let mut left = self.parse_prefix()?;
2488
2489 loop {
2490 self.skip_whitespace();
2491 if self.position >= self.tokens.len() {
2492 break;
2493 }
2494
2495 if self.tokens[self.position].token_type == TokenType::OpPostfix {
2496 let (precedence, _) = self
2497 .span_precedence(&self.tokens[self.position])
2498 .unwrap_or((0, Associativity::Left));
2499 if precedence < min_precedence {
2500 break;
2501 }
2502
2503 let op_span = self.tokens[self.position];
2504 self.position += 1;
2505 let op_token = self.span_to_token(&op_span);
2506 let contains_volatile = left.contains_volatile;
2507 left = ASTNode::new_with_volatile(
2508 ASTNodeType::UnaryOp {
2509 op: op_token.value.clone(),
2510 expr: Box::new(left),
2511 },
2512 Some(op_token),
2513 contains_volatile,
2514 );
2515 continue;
2516 }
2517
2518 let token = &self.tokens[self.position];
2519 if token.token_type != TokenType::OpInfix {
2520 break;
2521 }
2522
2523 let (precedence, associativity) = self
2524 .span_precedence(token)
2525 .unwrap_or((0, Associativity::Left));
2526 if precedence < min_precedence {
2527 break;
2528 }
2529
2530 let op_span = self.tokens[self.position];
2531 self.position += 1;
2532
2533 let next_min_precedence = if associativity == Associativity::Left {
2534 precedence + 1
2535 } else {
2536 precedence
2537 };
2538
2539 let right = self.parse_bp(next_min_precedence)?;
2540 let op_token = self.span_to_token(&op_span);
2541 let contains_volatile = left.contains_volatile || right.contains_volatile;
2542 left = ASTNode::new_with_volatile(
2543 ASTNodeType::BinaryOp {
2544 op: op_token.value.clone(),
2545 left: Box::new(left),
2546 right: Box::new(right),
2547 },
2548 Some(op_token),
2549 contains_volatile,
2550 );
2551 }
2552
2553 Ok(left)
2554 }
2555
2556 fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2557 self.skip_whitespace();
2558 if self.position < self.tokens.len()
2559 && self.tokens[self.position].token_type == TokenType::OpPrefix
2560 {
2561 let op_span = self.tokens[self.position];
2562 self.position += 1;
2563
2564 let (precedence, _) = self
2565 .span_precedence(&op_span)
2566 .unwrap_or((0, Associativity::Right));
2567
2568 let expr = self.parse_bp(precedence)?;
2569 let op_token = self.span_to_token(&op_span);
2570 let contains_volatile = expr.contains_volatile;
2571 return Ok(ASTNode::new_with_volatile(
2572 ASTNodeType::UnaryOp {
2573 op: op_token.value.clone(),
2574 expr: Box::new(expr),
2575 },
2576 Some(op_token),
2577 contains_volatile,
2578 ));
2579 }
2580
2581 self.parse_primary()
2582 }
2583
2584 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2585 self.skip_whitespace();
2586 if self.position >= self.tokens.len() {
2587 return Err(ParserError {
2588 message: "Unexpected end of tokens".to_string(),
2589 position: Some(self.position),
2590 });
2591 }
2592
2593 let token = &self.tokens[self.position];
2594 match token.token_type {
2595 TokenType::Operand => {
2596 let span = self.tokens[self.position];
2597 self.position += 1;
2598 self.parse_operand(span)
2599 }
2600 TokenType::Func => {
2601 let span = self.tokens[self.position];
2602 self.position += 1;
2603 self.parse_function(span)
2604 }
2605 TokenType::Paren if token.subtype == TokenSubType::Open => {
2606 self.position += 1;
2607 let expr = self.parse_expression()?;
2608 self.skip_whitespace();
2609 if self.position >= self.tokens.len()
2610 || self.tokens[self.position].token_type != TokenType::Paren
2611 || self.tokens[self.position].subtype != TokenSubType::Close
2612 {
2613 return Err(ParserError {
2614 message: "Expected closing parenthesis".to_string(),
2615 position: Some(self.position),
2616 });
2617 }
2618 self.position += 1;
2619 Ok(expr)
2620 }
2621 TokenType::Array if token.subtype == TokenSubType::Open => {
2622 self.position += 1;
2623 self.parse_array()
2624 }
2625 _ => Err(ParserError {
2626 message: format!("Unexpected token: {token:?}"),
2627 position: Some(self.position),
2628 }),
2629 }
2630 }
2631
2632 fn parse_operand(&mut self, span: crate::tokenizer::TokenSpan) -> Result<ASTNode, ParserError> {
2633 let value = self.span_value(&span);
2634 let token = self.span_to_token(&span);
2635
2636 match span.subtype {
2637 TokenSubType::Number => {
2638 let value = value.parse::<f64>().map_err(|_| ParserError {
2639 message: format!("Invalid number: {value}"),
2640 position: Some(self.position),
2641 })?;
2642 Ok(ASTNode::new(
2643 ASTNodeType::Literal(LiteralValue::Number(value)),
2644 Some(token),
2645 ))
2646 }
2647 TokenSubType::Text => {
2648 let mut text = value.to_string();
2649 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2650 text = text[1..text.len() - 1].to_string();
2651 text = text.replace("\"\"", "\"");
2652 }
2653 Ok(ASTNode::new(
2654 ASTNodeType::Literal(LiteralValue::Text(text)),
2655 Some(token),
2656 ))
2657 }
2658 TokenSubType::Logical => {
2659 let v = value.to_uppercase() == "TRUE";
2660 Ok(ASTNode::new(
2661 ASTNodeType::Literal(LiteralValue::Boolean(v)),
2662 Some(token),
2663 ))
2664 }
2665 TokenSubType::Error => {
2666 let error = ExcelError::from_error_string(value);
2667 Ok(ASTNode::new(
2668 ASTNodeType::Literal(LiteralValue::Error(error)),
2669 Some(token),
2670 ))
2671 }
2672 TokenSubType::Range => {
2673 let reference = ReferenceType::from_string_with_dialect(value, self.dialect)
2674 .map_err(|e| ParserError {
2675 message: format!("Invalid reference '{value}': {e}"),
2676 position: Some(self.position),
2677 })?;
2678 Ok(ASTNode::new(
2679 ASTNodeType::Reference {
2680 original: value.to_string(),
2681 reference,
2682 },
2683 Some(token),
2684 ))
2685 }
2686 _ => Err(ParserError {
2687 message: format!("Unexpected operand subtype: {:?}", span.subtype),
2688 position: Some(self.position),
2689 }),
2690 }
2691 }
2692
2693 fn parse_function(
2694 &mut self,
2695 func_span: crate::tokenizer::TokenSpan,
2696 ) -> Result<ASTNode, ParserError> {
2697 let func_value = self.span_value(&func_span);
2698 if func_value.is_empty() {
2699 return Err(ParserError {
2700 message: "Invalid function token".to_string(),
2701 position: Some(self.position),
2702 });
2703 }
2704 let name = func_value[..func_value.len() - 1].to_string();
2705 let args = self.parse_function_arguments()?;
2706
2707 let this_is_volatile = self
2708 .volatility_classifier
2709 .as_ref()
2710 .map(|f| f(name.as_str()))
2711 .unwrap_or(false);
2712 let args_volatile = args.iter().any(|a| a.contains_volatile);
2713
2714 let func_token = self.span_to_token(&func_span);
2715 Ok(ASTNode::new_with_volatile(
2716 ASTNodeType::Function { name, args },
2717 Some(func_token),
2718 this_is_volatile || args_volatile,
2719 ))
2720 }
2721
2722 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2723 let mut args = Vec::new();
2724
2725 self.skip_whitespace();
2726 if self.position < self.tokens.len()
2727 && self.tokens[self.position].token_type == TokenType::Func
2728 && self.tokens[self.position].subtype == TokenSubType::Close
2729 {
2730 self.position += 1;
2731 return Ok(args);
2732 }
2733
2734 self.skip_whitespace();
2735 if self.position < self.tokens.len()
2736 && self.tokens[self.position].token_type == TokenType::Sep
2737 && self.tokens[self.position].subtype == TokenSubType::Arg
2738 {
2739 args.push(ASTNode::new(
2740 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2741 None,
2742 ));
2743 self.position += 1;
2744 } else {
2745 args.push(self.parse_expression()?);
2746 }
2747
2748 while self.position < self.tokens.len() {
2749 self.skip_whitespace();
2750 if self.position >= self.tokens.len() {
2751 break;
2752 }
2753
2754 let token = &self.tokens[self.position];
2755 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2756 self.position += 1;
2757 self.skip_whitespace();
2758 if self.position < self.tokens.len() {
2759 let next_token = &self.tokens[self.position];
2760 if next_token.token_type == TokenType::Sep
2761 && next_token.subtype == TokenSubType::Arg
2762 {
2763 args.push(ASTNode::new(
2764 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2765 None,
2766 ));
2767 } else if next_token.token_type == TokenType::Func
2768 && next_token.subtype == TokenSubType::Close
2769 {
2770 args.push(ASTNode::new(
2771 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2772 None,
2773 ));
2774 self.position += 1;
2775 break;
2776 } else {
2777 args.push(self.parse_expression()?);
2778 }
2779 } else {
2780 args.push(ASTNode::new(
2781 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2782 None,
2783 ));
2784 }
2785 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2786 self.position += 1;
2787 break;
2788 } else {
2789 return Err(ParserError {
2790 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2791 position: Some(self.position),
2792 });
2793 }
2794 }
2795
2796 Ok(args)
2797 }
2798
2799 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2800 let mut rows = Vec::new();
2801 let mut current_row = Vec::new();
2802
2803 self.skip_whitespace();
2804 if self.position < self.tokens.len()
2805 && self.tokens[self.position].token_type == TokenType::Array
2806 && self.tokens[self.position].subtype == TokenSubType::Close
2807 {
2808 self.position += 1;
2809 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2810 }
2811
2812 current_row.push(self.parse_expression()?);
2813
2814 while self.position < self.tokens.len() {
2815 self.skip_whitespace();
2816 if self.position >= self.tokens.len() {
2817 break;
2818 }
2819 let token = &self.tokens[self.position];
2820
2821 if token.token_type == TokenType::Sep {
2822 if token.subtype == TokenSubType::Arg {
2823 self.position += 1;
2824 current_row.push(self.parse_expression()?);
2825 } else if token.subtype == TokenSubType::Row {
2826 self.position += 1;
2827 rows.push(current_row);
2828 current_row = vec![self.parse_expression()?];
2829 }
2830 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2831 self.position += 1;
2832 rows.push(current_row);
2833 break;
2834 } else {
2835 return Err(ParserError {
2836 message: format!("Unexpected token in array: {token:?}"),
2837 position: Some(self.position),
2838 });
2839 }
2840 }
2841
2842 let contains_volatile = rows
2843 .iter()
2844 .flat_map(|r| r.iter())
2845 .any(|n| n.contains_volatile);
2846
2847 Ok(ASTNode::new_with_volatile(
2848 ASTNodeType::Array(rows),
2849 None,
2850 contains_volatile,
2851 ))
2852 }
2853}
2854
2855pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
2857 let ref_type = ReferenceType::from_string(reference)?;
2858 Ok(ref_type.to_string())
2859}
2860
2861pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
2862 parse_with_dialect(formula, FormulaDialect::Excel)
2863}
2864
2865pub fn parse_with_dialect<T: AsRef<str>>(
2866 formula: T,
2867 dialect: FormulaDialect,
2868) -> Result<ASTNode, ParserError> {
2869 let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2870 let mut parser = SpanParser::new(formula.as_ref(), &spans, dialect);
2871 parser.parse()
2872}
2873
2874pub fn parse_with_volatility_classifier<T, F>(
2877 formula: T,
2878 classifier: F,
2879) -> Result<ASTNode, ParserError>
2880where
2881 T: AsRef<str>,
2882 F: Fn(&str) -> bool + Send + Sync + 'static,
2883{
2884 parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
2885}
2886
2887pub fn parse_with_dialect_and_volatility_classifier<T, F>(
2888 formula: T,
2889 dialect: FormulaDialect,
2890 classifier: F,
2891) -> Result<ASTNode, ParserError>
2892where
2893 T: AsRef<str>,
2894 F: Fn(&str) -> bool + Send + Sync + 'static,
2895{
2896 let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2897 let mut parser =
2898 SpanParser::new(formula.as_ref(), &spans, dialect).with_volatility_classifier(classifier);
2899 parser.parse()
2900}
2901
2902pub struct BatchParser {
2907 include_whitespace: bool,
2908 volatility_classifier: Option<VolatilityClassifierArc>,
2909 token_cache: std::collections::HashMap<String, Arc<[crate::tokenizer::TokenSpan]>>, dialect: FormulaDialect,
2911}
2912
2913impl BatchParser {
2914 pub fn builder() -> BatchParserBuilder {
2915 BatchParserBuilder::default()
2916 }
2917
2918 pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
2920 let spans = if let Some(tokens) = self.token_cache.get(formula) {
2921 Arc::clone(tokens)
2922 } else {
2923 let mut spans = crate::tokenizer::tokenize_spans_with_dialect(formula, self.dialect)?;
2924 if !self.include_whitespace {
2925 spans.retain(|t| t.token_type != TokenType::Whitespace);
2926 }
2927
2928 let spans: Arc<[crate::tokenizer::TokenSpan]> = Arc::from(spans.into_boxed_slice());
2929 self.token_cache
2930 .insert(formula.to_string(), Arc::clone(&spans));
2931 spans
2932 };
2933
2934 let mut parser = SpanParser::new(formula, spans.as_ref(), self.dialect);
2935 if let Some(classifier) = self.volatility_classifier.clone() {
2936 parser = parser.with_volatility_classifier(move |name| classifier(name));
2937 }
2938 parser.parse()
2939 }
2940}
2941
2942#[derive(Default)]
2943pub struct BatchParserBuilder {
2944 include_whitespace: bool,
2945 volatility_classifier: Option<VolatilityClassifierArc>,
2946 dialect: FormulaDialect,
2947}
2948
2949impl BatchParserBuilder {
2950 pub fn include_whitespace(mut self, include: bool) -> Self {
2951 self.include_whitespace = include;
2952 self
2953 }
2954
2955 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
2956 where
2957 F: Fn(&str) -> bool + Send + Sync + 'static,
2958 {
2959 self.volatility_classifier = Some(Arc::new(f));
2960 self
2961 }
2962
2963 pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
2964 self.dialect = dialect;
2965 self
2966 }
2967
2968 pub fn build(self) -> BatchParser {
2969 BatchParser {
2970 include_whitespace: self.include_whitespace,
2971 volatility_classifier: self.volatility_classifier,
2972 token_cache: std::collections::HashMap::new(),
2973 dialect: self.dialect,
2974 }
2975 }
2976}