1use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
2use crate::types::{FormulaDialect, ParsingError};
3use crate::{ExcelError, LiteralValue};
4
5use crate::hasher::FormulaHasher;
6use formualizer_common::coord::{
7 col_index_from_letters_1based, col_letters_from_1based, parse_a1_1based,
8};
9use formualizer_common::{
10 AxisBound, RelativeCoord, SheetCellRef, SheetLocator, SheetRangeRef, SheetRef,
11};
12use once_cell::sync::Lazy;
13use smallvec::SmallVec;
14use std::error::Error;
15use std::fmt::{self, Display};
16use std::hash::{Hash, Hasher};
17use std::str::FromStr;
18use std::sync::Arc;
19
20type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
21type VolatilityClassifierBox = Box<VolatilityFn>;
22type VolatilityClassifierArc = Arc<VolatilityFn>;
23
24#[derive(Debug)]
26pub struct ParserError {
27 pub message: String,
28 pub position: Option<usize>,
29}
30
31impl Display for ParserError {
32 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33 if let Some(pos) = self.position {
34 write!(f, "ParserError at position {}: {}", pos, self.message)
35 } else {
36 write!(f, "ParserError: {}", self.message)
37 }
38 }
39}
40
41impl Error for ParserError {}
42
43static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
45 let mut cols = Vec::with_capacity(702);
46 for c in b'A'..=b'Z' {
48 cols.push(String::from(c as char));
49 }
50 for c1 in b'A'..=b'Z' {
52 for c2 in b'A'..=b'Z' {
53 cols.push(format!("{}{}", c1 as char, c2 as char));
54 }
55 }
56 cols
57});
58
59#[derive(Debug, Clone, PartialEq, Hash)]
61pub enum TableSpecifier {
62 All,
64 Data,
66 Headers,
68 Totals,
70 Row(TableRowSpecifier),
72 Column(String),
74 ColumnRange(String, String),
76 SpecialItem(SpecialItem),
78 Combination(Vec<Box<TableSpecifier>>),
80}
81
82#[derive(Debug, Clone, PartialEq, Hash)]
84pub enum TableRowSpecifier {
85 Current,
87 All,
89 Data,
91 Headers,
93 Totals,
95 Index(u32),
97}
98
99#[derive(Debug, Clone, PartialEq, Hash)]
101pub enum SpecialItem {
102 Headers,
104 Data,
106 Totals,
108 All,
110 ThisRow,
112}
113
114#[derive(Debug, Clone, PartialEq, Hash)]
116pub struct TableReference {
117 pub name: String,
119 pub specifier: Option<TableSpecifier>,
121}
122
123#[derive(Debug, Clone, PartialEq, Hash)]
125pub enum ReferenceType {
126 Cell {
127 sheet: Option<String>,
128 row: u32,
129 col: u32,
130 },
131 Range {
132 sheet: Option<String>,
133 start_row: Option<u32>,
134 start_col: Option<u32>,
135 end_row: Option<u32>,
136 end_col: Option<u32>,
137 },
138 Table(TableReference),
139 NamedRange(String),
140}
141
142impl Display for TableSpecifier {
143 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144 match self {
145 TableSpecifier::All => write!(f, "#All"),
146 TableSpecifier::Data => write!(f, "#Data"),
147 TableSpecifier::Headers => write!(f, "#Headers"),
148 TableSpecifier::Totals => write!(f, "#Totals"),
149 TableSpecifier::Row(row) => write!(f, "{row}"),
150 TableSpecifier::Column(column) => write!(f, "{column}"),
151 TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
152 TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
153 TableSpecifier::Combination(specs) => {
154 let parts: Vec<String> = specs.iter().map(|s| format!("[{s}]")).collect();
157 write!(f, "{}", parts.join(","))
158 }
159 }
160 }
161}
162
163impl Display for TableRowSpecifier {
164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 match self {
166 TableRowSpecifier::Current => write!(f, "@"),
167 TableRowSpecifier::All => write!(f, "#All"),
168 TableRowSpecifier::Data => write!(f, "#Data"),
169 TableRowSpecifier::Headers => write!(f, "#Headers"),
170 TableRowSpecifier::Totals => write!(f, "#Totals"),
171 TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
172 }
173 }
174}
175
176impl Display for SpecialItem {
177 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
178 match self {
179 SpecialItem::Headers => write!(f, "#Headers"),
180 SpecialItem::Data => write!(f, "#Data"),
181 SpecialItem::Totals => write!(f, "#Totals"),
182 SpecialItem::All => write!(f, "#All"),
183 SpecialItem::ThisRow => write!(f, "@"),
184 }
185 }
186}
187
188fn sheet_name_needs_quoting(name: &str) -> bool {
190 if name.is_empty() {
191 return false;
192 }
193
194 let bytes = name.as_bytes();
195
196 if bytes[0].is_ascii_digit() {
198 return true;
199 }
200
201 for &byte in bytes {
204 match byte {
205 b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
206 | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
207 | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
208 _ => {}
209 }
210 }
211
212 let upper = name.to_uppercase();
214 matches!(
215 upper.as_str(),
216 "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
217 )
218}
219
220#[derive(Debug, Clone)]
221struct OpenFormulaRefPart {
222 sheet: Option<String>,
223 coord: String,
224}
225
226impl ReferenceType {
227 pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
229 Self::parse_excel_reference(reference)
230 }
231
232 pub fn from_string_with_dialect(
234 reference: &str,
235 dialect: FormulaDialect,
236 ) -> Result<Self, ParsingError> {
237 match dialect {
238 FormulaDialect::Excel => Self::parse_excel_reference(reference),
239 FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
240 .or_else(|_| Self::parse_excel_reference(reference)),
241 }
242 }
243
244 pub fn parse_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
248 Self::parse_sheet_ref_with_dialect(reference, FormulaDialect::Excel)
249 }
250
251 pub fn parse_sheet_ref_with_dialect(
253 reference: &str,
254 dialect: FormulaDialect,
255 ) -> Result<SheetRef<'static>, ParsingError> {
256 match dialect {
257 FormulaDialect::Excel => Self::parse_excel_sheet_ref(reference),
258 FormulaDialect::OpenFormula => Self::parse_openformula_sheet_ref(reference)
259 .or_else(|_| Self::parse_excel_sheet_ref(reference)),
260 }
261 }
262
263 pub fn to_sheet_ref_lossy(&self) -> Option<SheetRef<'_>> {
266 fn bound_from_1based(v: Option<u32>) -> Option<AxisBound> {
267 v.and_then(|x| x.checked_sub(1).map(|i| AxisBound::new(i, false)))
268 }
269
270 match self {
271 ReferenceType::Cell { sheet, row, col } => {
272 let row0 = row.checked_sub(1)?;
273 let col0 = col.checked_sub(1)?;
274 let sheet_loc = match sheet.as_deref() {
275 Some(name) => SheetLocator::from_name(name),
276 None => SheetLocator::Current,
277 };
278 let coord = RelativeCoord::new(row0, col0, false, false);
279 Some(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
280 }
281 ReferenceType::Range {
282 sheet,
283 start_row,
284 start_col,
285 end_row,
286 end_col,
287 } => {
288 let sheet_loc = match sheet.as_deref() {
289 Some(name) => SheetLocator::from_name(name),
290 None => SheetLocator::Current,
291 };
292 let sr = bound_from_1based(*start_row);
293 if start_row.is_some() && sr.is_none() {
294 return None;
295 }
296 let sc = bound_from_1based(*start_col);
297 if start_col.is_some() && sc.is_none() {
298 return None;
299 }
300 let er = bound_from_1based(*end_row);
301 if end_row.is_some() && er.is_none() {
302 return None;
303 }
304 let ec = bound_from_1based(*end_col);
305 if end_col.is_some() && ec.is_none() {
306 return None;
307 }
308 let range = SheetRangeRef::from_parts(sheet_loc, sr, sc, er, ec).ok()?;
309 Some(SheetRef::Range(range))
310 }
311 _ => None,
312 }
313 }
314
315 fn parse_excel_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
316 if reference.contains('[') {
317 return Err(ParsingError::InvalidReference(
318 "Table references are not supported for SheetRef".to_string(),
319 ));
320 }
321
322 let (sheet, ref_part) = Self::extract_sheet_name(reference);
323 let sheet_loc: SheetLocator<'static> = match sheet {
324 Some(name) => SheetLocator::from_name(name),
325 None => SheetLocator::Current,
326 };
327
328 if ref_part.contains(':') {
329 let mut parts = ref_part.splitn(2, ':');
330 let start = parts.next().unwrap();
331 let end = parts.next().ok_or_else(|| {
332 ParsingError::InvalidReference(format!("Invalid range: {ref_part}"))
333 })?;
334
335 let (start_col, start_row) = Self::parse_range_part_with_abs(start)?;
336 let (end_col, end_row) = Self::parse_range_part_with_abs(end)?;
337
338 let range =
339 SheetRangeRef::from_parts(sheet_loc, start_row, start_col, end_row, end_col)
340 .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
341 Ok(SheetRef::Range(range))
342 } else {
343 let (row, col, row_abs, col_abs) = parse_a1_1based(&ref_part)
344 .map_err(|err| ParsingError::InvalidReference(err.to_string()))?;
345 let coord = RelativeCoord::new(row - 1, col - 1, row_abs, col_abs);
346 Ok(SheetRef::Cell(SheetCellRef::new(sheet_loc, coord)))
347 }
348 }
349
350 fn parse_openformula_sheet_ref(reference: &str) -> Result<SheetRef<'static>, ParsingError> {
351 Self::parse_excel_sheet_ref(reference)
352 }
353
354 fn parse_range_part_with_abs(
355 part: &str,
356 ) -> Result<(Option<AxisBound>, Option<AxisBound>), ParsingError> {
357 if let Ok((row, col, row_abs, col_abs)) = parse_a1_1based(part) {
358 let row_b = AxisBound::new(row - 1, row_abs);
359 let col_b = AxisBound::new(col - 1, col_abs);
360 return Ok((Some(col_b), Some(row_b)));
361 }
362
363 let bytes = part.as_bytes();
364 let len = bytes.len();
365 let mut i = 0usize;
366
367 let mut col_abs = false;
368 let mut row_abs = false;
369
370 if i < len && bytes[i] == b'$' {
371 col_abs = true;
372 i += 1;
373 }
374
375 let col_start = i;
376 while i < len && bytes[i].is_ascii_alphabetic() {
377 i += 1;
378 }
379
380 if i > col_start {
381 let col_str = &part[col_start..i];
382 let col1 = Self::column_to_number(col_str)?;
383
384 if i == len {
385 let col_b = AxisBound::new(col1 - 1, col_abs);
386 return Ok((Some(col_b), None));
387 }
388
389 if i < len && bytes[i] == b'$' {
390 row_abs = true;
391 i += 1;
392 }
393
394 if i >= len {
395 return Err(ParsingError::InvalidReference(format!(
396 "Invalid range part: {part}"
397 )));
398 }
399
400 let row_start = i;
401 while i < len && bytes[i].is_ascii_digit() {
402 i += 1;
403 }
404
405 if row_start == i || i != len {
406 return Err(ParsingError::InvalidReference(format!(
407 "Invalid range part: {part}"
408 )));
409 }
410
411 let row_str = &part[row_start..i];
412 let row1 = row_str
413 .parse::<u32>()
414 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
415 if row1 == 0 {
416 return Err(ParsingError::InvalidReference(format!(
417 "Invalid range part: {part}"
418 )));
419 }
420
421 let col_b = AxisBound::new(col1 - 1, col_abs);
422 let row_b = AxisBound::new(row1 - 1, row_abs);
423 return Ok((Some(col_b), Some(row_b)));
424 }
425
426 i = 0;
427 if i < len && bytes[i] == b'$' {
428 row_abs = true;
429 i += 1;
430 }
431
432 let row_start = i;
433 while i < len && bytes[i].is_ascii_digit() {
434 i += 1;
435 }
436
437 if row_start == i || i != len {
438 return Err(ParsingError::InvalidReference(format!(
439 "Invalid range part: {part}"
440 )));
441 }
442
443 let row_str = &part[row_start..i];
444 let row1 = row_str
445 .parse::<u32>()
446 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
447 if row1 == 0 {
448 return Err(ParsingError::InvalidReference(format!(
449 "Invalid range part: {part}"
450 )));
451 }
452
453 Ok((None, Some(AxisBound::new(row1 - 1, row_abs))))
454 }
455
456 fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
457 if reference.contains('[') {
459 return Self::parse_table_reference(reference);
460 }
461
462 let (sheet, ref_part) = Self::extract_sheet_name(reference);
464
465 if ref_part.contains(':') {
466 Self::parse_range_reference(&ref_part, sheet)
468 } else {
469 match Self::parse_cell_reference(&ref_part) {
471 Ok((col, row)) => Ok(ReferenceType::Cell { sheet, row, col }),
472 Err(_) => {
473 Ok(ReferenceType::NamedRange(reference.to_string()))
475 }
476 }
477 }
478 }
479
480 fn parse_range_reference(reference: &str, sheet: Option<String>) -> Result<Self, ParsingError> {
482 let mut parts = reference.splitn(2, ':');
483 let start = parts.next().unwrap();
484 let end = parts
485 .next()
486 .ok_or_else(|| ParsingError::InvalidReference(format!("Invalid range: {reference}")))?;
487
488 let (start_col, start_row) = Self::parse_range_part(start)?;
489 let (end_col, end_row) = Self::parse_range_part(end)?;
490
491 Ok(ReferenceType::Range {
492 sheet,
493 start_row,
494 start_col,
495 end_row,
496 end_col,
497 })
498 }
499
500 fn parse_range_part(part: &str) -> Result<(Option<u32>, Option<u32>), ParsingError> {
503 if let Ok((col, row)) = Self::parse_cell_reference(part) {
505 return Ok((Some(col), Some(row)));
506 }
507
508 let bytes = part.as_bytes();
510 let mut i = 0;
511
512 if i < bytes.len() && bytes[i] == b'$' {
514 i += 1;
515 }
516
517 let col_start = i;
519 while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
520 i += 1;
521 }
522
523 if i > col_start {
524 let col_str = &part[col_start..i];
526 let col = Self::column_to_number(col_str)?;
527
528 if i < bytes.len() && bytes[i] == b'$' {
530 i += 1;
531 }
532
533 let row_start = i;
535 while i < bytes.len() && bytes[i].is_ascii_digit() {
536 i += 1;
537 }
538
539 if i > row_start && i == bytes.len() {
540 let row_str = &part[row_start..i];
542 let row = row_str.parse::<u32>().map_err(|_| {
543 ParsingError::InvalidReference(format!("Invalid row: {row_str}"))
544 })?;
545 return Ok((Some(col), Some(row)));
546 } else if i == col_start + col_str.len()
547 || (i == col_start + col_str.len() + 1 && bytes[col_start + col_str.len()] == b'$')
548 {
549 return Ok((Some(col), None));
551 }
552 } else {
553 i = 0;
555 if i < bytes.len() && bytes[i] == b'$' {
556 i += 1;
557 }
558
559 let row_start = i;
560 while i < bytes.len() && bytes[i].is_ascii_digit() {
561 i += 1;
562 }
563
564 if i > row_start && i == bytes.len() {
565 let row_str = &part[row_start..i];
566 let row = row_str.parse::<u32>().map_err(|_| {
567 ParsingError::InvalidReference(format!("Invalid row: {row_str}"))
568 })?;
569 return Ok((None, Some(row)));
570 }
571 }
572
573 Err(ParsingError::InvalidReference(format!(
574 "Invalid range part: {part}"
575 )))
576 }
577
578 fn parse_cell_reference(reference: &str) -> Result<(u32, u32), ParsingError> {
580 parse_a1_1based(reference)
581 .map(|(row, col, _, _)| (col, row))
582 .map_err(|_| {
583 ParsingError::InvalidReference(format!("Invalid cell reference: {reference}"))
584 })
585 }
586
587 pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
589 col_index_from_letters_1based(column)
590 .map_err(|_| ParsingError::InvalidReference(format!("Invalid column: {column}")))
591 }
592
593 pub(crate) fn number_to_column(num: u32) -> String {
595 if num == 0 {
596 return String::new();
597 }
598 if num > 0 && num <= 702 {
600 return COLUMN_LOOKUP[(num - 1) as usize].clone();
601 }
602
603 col_letters_from_1based(num).unwrap_or_default()
604 }
605}
606
607impl Display for ReferenceType {
608 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
609 write!(
610 f,
611 "{}",
612 match self {
613 ReferenceType::Cell { sheet, row, col } => {
614 let col_str = Self::number_to_column(*col);
615 let row_str = row.to_string();
616
617 if let Some(sheet_name) = sheet {
618 if sheet_name_needs_quoting(sheet_name) {
619 let escaped_name = sheet_name.replace('\'', "''");
621 format!("'{escaped_name}'!{col_str}{row_str}")
622 } else {
623 format!("{sheet_name}!{col_str}{row_str}")
624 }
625 } else {
626 format!("{col_str}{row_str}")
627 }
628 }
629 ReferenceType::Range {
630 sheet,
631 start_row,
632 start_col,
633 end_row,
634 end_col,
635 } => {
636 let start_ref = match (start_col, start_row) {
638 (Some(col), Some(row)) => {
639 format!("{}{}", Self::number_to_column(*col), row)
640 }
641 (Some(col), None) => Self::number_to_column(*col),
642 (None, Some(row)) => row.to_string(),
643 (None, None) => "".to_string(), };
645
646 let end_ref = match (end_col, end_row) {
648 (Some(col), Some(row)) => {
649 format!("{}{}", Self::number_to_column(*col), row)
650 }
651 (Some(col), None) => Self::number_to_column(*col),
652 (None, Some(row)) => row.to_string(),
653 (None, None) => "".to_string(), };
655
656 let range_part = format!("{start_ref}:{end_ref}");
657
658 if let Some(sheet_name) = sheet {
659 if sheet_name_needs_quoting(sheet_name) {
660 let escaped_name = sheet_name.replace('\'', "''");
662 format!("'{escaped_name}'!{range_part}")
663 } else {
664 format!("{sheet_name}!{range_part}")
665 }
666 } else {
667 range_part
668 }
669 }
670 ReferenceType::Table(table_ref) => {
671 if let Some(specifier) = &table_ref.specifier {
672 match specifier {
675 TableSpecifier::Column(column) => {
676 format!("{}[{}]", table_ref.name, column.trim())
677 }
678 TableSpecifier::ColumnRange(start, end) => {
679 format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
680 }
681 _ => {
682 format!("{}[{}]", table_ref.name, specifier)
684 }
685 }
686 } else {
687 table_ref.name.clone()
688 }
689 }
690 ReferenceType::NamedRange(name) => name.clone(),
691 }
692 )
693 }
694}
695
696impl TryFrom<&str> for ReferenceType {
697 type Error = ParsingError;
698
699 fn try_from(value: &str) -> Result<Self, Self::Error> {
700 ReferenceType::from_string(value)
701 }
702}
703
704impl FromStr for ReferenceType {
705 type Err = ParsingError;
706
707 fn from_str(s: &str) -> Result<Self, Self::Err> {
708 ReferenceType::from_string(s)
709 }
710}
711
712impl ReferenceType {
713 pub fn normalise(&self) -> String {
715 format!("{self}")
716 }
717
718 fn extract_sheet_name(reference: &str) -> (Option<String>, String) {
720 let bytes = reference.as_bytes();
721 let mut i = 0;
722
723 if i < bytes.len() && bytes[i] == b'\'' {
727 i += 1;
728 let start = i;
729
730 while i < bytes.len() {
731 if bytes[i] == b'\'' {
732 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
734 i += 2;
735 continue;
736 }
737
738 if i + 1 < bytes.len() && bytes[i + 1] == b'!' {
740 let raw = &reference[start..i];
741 let sheet = raw.replace("''", "'");
742 let ref_part = String::from(&reference[i + 2..]);
743 return (Some(sheet), ref_part);
744 }
745 }
746
747 i += 1;
748 }
749 }
750
751 i = 0;
753 while i < bytes.len() {
754 if bytes[i] == b'!' && i > 0 {
755 let sheet = String::from(&reference[0..i]);
756 let ref_part = String::from(&reference[i + 1..]);
757 return (Some(sheet), ref_part);
758 }
759 i += 1;
760 }
761
762 (None, reference.to_string())
763 }
764
765 fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
767 if let Some(bracket_pos) = reference.find('[') {
769 let table_name = reference[..bracket_pos].trim();
770 if table_name.is_empty() {
771 return Err(ParsingError::InvalidReference(reference.to_string()));
772 }
773
774 let specifier_str = &reference[bracket_pos..];
775 let specifier = Self::parse_table_specifier(specifier_str)?;
776
777 Ok(ReferenceType::Table(TableReference {
778 name: table_name.to_string(),
779 specifier,
780 }))
781 } else {
782 Err(ParsingError::InvalidReference(reference.to_string()))
783 }
784 }
785
786 fn parse_table_specifier(specifier_str: &str) -> Result<Option<TableSpecifier>, ParsingError> {
788 if specifier_str.is_empty() || !specifier_str.starts_with('[') {
789 return Ok(None);
790 }
791
792 let mut depth = 0;
794 let mut end_pos = 0;
795
796 for (i, c) in specifier_str.chars().enumerate() {
797 if c == '[' {
798 depth += 1;
799 } else if c == ']' {
800 depth -= 1;
801 if depth == 0 {
802 end_pos = i;
803 break;
804 }
805 }
806 }
807
808 if depth != 0 || end_pos == 0 {
809 return Err(ParsingError::InvalidReference(format!(
810 "Unbalanced brackets in table specifier: {specifier_str}"
811 )));
812 }
813
814 let content = &specifier_str[1..end_pos];
816
817 if content.is_empty() {
819 return Ok(Some(TableSpecifier::All));
821 }
822
823 if content.starts_with("#") {
825 return Self::parse_special_item(content);
826 }
827
828 if !content.contains('[') && !content.contains('#') {
830 if let Some(colon_pos) = content.find(':') {
832 let start = content[..colon_pos].trim();
833 let end = content[colon_pos + 1..].trim();
834 return Ok(Some(TableSpecifier::ColumnRange(
835 start.to_string(),
836 end.to_string(),
837 )));
838 } else {
839 return Ok(Some(TableSpecifier::Column(content.trim().to_string())));
841 }
842 }
843
844 if content.contains('[') {
846 return Self::parse_complex_table_specifier(content);
847 }
848
849 Ok(Some(TableSpecifier::Column(content.trim().to_string())))
851 }
852
853 fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
854 if reference.starts_with('[') && reference.ends_with(']') {
855 let inner = &reference[1..reference.len() - 1];
856 if inner.is_empty() {
857 return Err(ParsingError::InvalidReference(
858 "Empty OpenFormula reference".to_string(),
859 ));
860 }
861
862 let mut parts = inner.splitn(2, ':');
863 let start_part_str = parts.next().unwrap();
864 let end_part_str = parts.next();
865
866 let start_part = Self::parse_openformula_part(start_part_str)?;
867 let end_part = if let Some(part) = end_part_str {
868 Some(Self::parse_openformula_part(part)?)
869 } else {
870 None
871 };
872
873 let sheet = match (&start_part.sheet, &end_part) {
874 (Some(sheet), Some(end)) => {
875 if let Some(end_sheet) = &end.sheet
876 && end_sheet != sheet
877 {
878 return Err(ParsingError::InvalidReference(format!(
879 "Mismatched sheets in reference: {sheet} vs {end_sheet}"
880 )));
881 }
882 Some(sheet.clone())
883 }
884 (Some(sheet), None) => Some(sheet.clone()),
885 (None, Some(end)) => end.sheet.clone(),
886 (None, None) => None,
887 };
888
889 let mut excel_like = String::new();
890 if let Some(sheet_name) = sheet {
891 if sheet_name_needs_quoting(&sheet_name) {
892 let escaped = sheet_name.replace('\'', "''");
893 excel_like.push('\'');
894 excel_like.push_str(&escaped);
895 excel_like.push('\'');
896 } else {
897 excel_like.push_str(&sheet_name);
898 }
899 excel_like.push('!');
900 }
901
902 excel_like.push_str(&start_part.coord);
903 if let Some(end) = end_part {
904 excel_like.push(':');
905 excel_like.push_str(&end.coord);
906 }
907
908 return Self::parse_excel_reference(&excel_like);
909 }
910
911 Err(ParsingError::InvalidReference(format!(
912 "Unsupported OpenFormula reference: {reference}"
913 )))
914 }
915
916 fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
917 let trimmed = part.trim();
918 if trimmed.is_empty() {
919 return Err(ParsingError::InvalidReference(
920 "Empty component in OpenFormula reference".to_string(),
921 ));
922 }
923
924 if trimmed == "." {
925 return Err(ParsingError::InvalidReference(
926 "Incomplete OpenFormula reference component".to_string(),
927 ));
928 }
929
930 if trimmed.starts_with('[') {
931 return Err(ParsingError::InvalidReference(format!(
933 "Unexpected '[' in OpenFormula reference component: {trimmed}"
934 )));
935 }
936
937 let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
938 (None, stripped.trim())
939 } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
940 let sheet_part = trimmed[..dot_idx].trim();
941 let coord_part = trimmed[dot_idx + 1..].trim();
942 if coord_part.is_empty() {
943 return Err(ParsingError::InvalidReference(format!(
944 "Missing coordinate in OpenFormula reference component: {trimmed}"
945 )));
946 }
947 let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
948 (Some(sheet_name), coord_part)
949 } else {
950 (None, trimmed)
951 };
952
953 let coord = coord_slice.trim_start_matches('.').trim().to_string();
954
955 if coord.is_empty() {
956 return Err(ParsingError::InvalidReference(format!(
957 "Missing coordinate in OpenFormula reference component: {trimmed}"
958 )));
959 }
960
961 Ok(OpenFormulaRefPart { sheet, coord })
962 }
963
964 fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
965 let without_abs = sheet.trim().trim_start_matches('$');
966
967 if without_abs.starts_with('\'') {
968 if without_abs.len() < 2 || !without_abs.ends_with('\'') {
969 return Err(ParsingError::InvalidReference(format!(
970 "Unterminated sheet name in OpenFormula reference: {sheet}"
971 )));
972 }
973 let inner = &without_abs[1..without_abs.len() - 1];
974 Ok(inner.replace("''", "'"))
975 } else {
976 Ok(without_abs.to_string())
977 }
978 }
979
980 fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
981 let bytes = part.as_bytes();
982 let mut i = 0;
983 let mut in_quotes = false;
984
985 while i < bytes.len() {
986 match bytes[i] {
987 b'\'' => {
988 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
989 i += 2;
990 continue;
991 }
992 in_quotes = !in_quotes;
993 i += 1;
994 }
995 b'.' if !in_quotes => return Some(i),
996 _ => i += 1,
997 }
998 }
999
1000 None
1001 }
1002
1003 fn parse_special_item(content: &str) -> Result<Option<TableSpecifier>, ParsingError> {
1005 match content {
1006 "#All" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::All))),
1007 "#Headers" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Headers))),
1008 "#Data" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Data))),
1009 "#Totals" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Totals))),
1010 "@" => Ok(Some(TableSpecifier::Row(TableRowSpecifier::Current))),
1011 _ => Err(ParsingError::InvalidReference(format!(
1012 "Unknown special item: {content}"
1013 ))),
1014 }
1015 }
1016
1017 fn parse_complex_table_specifier(
1019 content: &str,
1020 ) -> Result<Option<TableSpecifier>, ParsingError> {
1021 if content.contains("[#Headers]")
1026 || content.contains("[#All]")
1027 || content.contains("[#Data]")
1028 || content.contains("[#Totals]")
1029 || content.contains("[@]")
1030 {
1031 let mut specifiers = Vec::new();
1034
1035 if content.contains("[#Headers]") {
1037 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Headers)));
1038 }
1039 if content.contains("[#Data]") {
1040 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Data)));
1041 }
1042 if content.contains("[#Totals]") {
1043 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Totals)));
1044 }
1045 if content.contains("[#All]") {
1046 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::All)));
1047 }
1048
1049 if !specifiers.is_empty() {
1050 return Ok(Some(TableSpecifier::Combination(specifiers)));
1051 }
1052 }
1053
1054 Ok(Some(TableSpecifier::Column(content.trim().to_string())))
1056 }
1057
1058 pub fn to_excel_string(&self) -> String {
1060 match self {
1061 ReferenceType::Cell { sheet, row, col } => {
1062 if let Some(s) = sheet {
1063 if sheet_name_needs_quoting(s) {
1064 let escaped_name = s.replace('\'', "''");
1065 format!("'{}'!{}{}", escaped_name, Self::number_to_column(*col), row)
1066 } else {
1067 format!("{}!{}{}", s, Self::number_to_column(*col), row)
1068 }
1069 } else {
1070 format!("{}{}", Self::number_to_column(*col), row)
1071 }
1072 }
1073 ReferenceType::Range {
1074 sheet,
1075 start_row,
1076 start_col,
1077 end_row,
1078 end_col,
1079 } => {
1080 let start_ref = match (start_col, start_row) {
1082 (Some(col), Some(row)) => format!("{}{}", Self::number_to_column(*col), row),
1083 (Some(col), None) => Self::number_to_column(*col),
1084 (None, Some(row)) => row.to_string(),
1085 (None, None) => "".to_string(), };
1087
1088 let end_ref = match (end_col, end_row) {
1090 (Some(col), Some(row)) => format!("{}{}", Self::number_to_column(*col), row),
1091 (Some(col), None) => Self::number_to_column(*col),
1092 (None, Some(row)) => row.to_string(),
1093 (None, None) => "".to_string(), };
1095
1096 let range_part = format!("{start_ref}:{end_ref}");
1097
1098 if let Some(s) = sheet {
1099 if sheet_name_needs_quoting(s) {
1100 let escaped_name = s.replace('\'', "''");
1101 format!("'{escaped_name}'!{range_part}")
1102 } else {
1103 format!("{s}!{range_part}")
1104 }
1105 } else {
1106 range_part
1107 }
1108 }
1109 ReferenceType::Table(table_ref) => {
1110 if let Some(specifier) = &table_ref.specifier {
1111 format!("{}[{}]", table_ref.name, specifier)
1112 } else {
1113 table_ref.name.clone()
1114 }
1115 }
1116 ReferenceType::NamedRange(name) => name.clone(),
1117 }
1118 }
1119}
1120
1121#[derive(Debug, Clone, PartialEq, Hash)]
1123pub enum ASTNodeType {
1124 Literal(LiteralValue),
1125 Reference {
1126 original: String, reference: ReferenceType, },
1129 UnaryOp {
1130 op: String,
1131 expr: Box<ASTNode>,
1132 },
1133 BinaryOp {
1134 op: String,
1135 left: Box<ASTNode>,
1136 right: Box<ASTNode>,
1137 },
1138 Function {
1139 name: String,
1140 args: Vec<ASTNode>, },
1142 Array(Vec<Vec<ASTNode>>), }
1144
1145impl Display for ASTNodeType {
1146 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1147 match self {
1148 ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
1149 ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
1150 ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
1151 ASTNodeType::BinaryOp { op, left, right } => {
1152 write!(f, "BinaryOp({op}, {left}, {right})")
1153 }
1154 ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
1155 ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
1156 }
1157 }
1158}
1159
1160#[derive(Debug, Clone, PartialEq)]
1162pub struct ASTNode {
1163 pub node_type: ASTNodeType,
1164 pub source_token: Option<Token>,
1165 pub contains_volatile: bool,
1170}
1171
1172impl ASTNode {
1173 pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1174 ASTNode {
1175 node_type,
1176 source_token,
1177 contains_volatile: false,
1178 }
1179 }
1180
1181 pub fn new_with_volatile(
1183 node_type: ASTNodeType,
1184 source_token: Option<Token>,
1185 contains_volatile: bool,
1186 ) -> Self {
1187 ASTNode {
1188 node_type,
1189 source_token,
1190 contains_volatile,
1191 }
1192 }
1193
1194 pub fn contains_volatile(&self) -> bool {
1196 self.contains_volatile
1197 }
1198
1199 pub fn fingerprint(&self) -> u64 {
1200 self.calculate_hash()
1201 }
1202
1203 pub fn calculate_hash(&self) -> u64 {
1205 let mut hasher = FormulaHasher::new();
1206 self.hash_node(&mut hasher);
1207 hasher.finish()
1208 }
1209
1210 fn hash_node(&self, hasher: &mut FormulaHasher) {
1211 match &self.node_type {
1212 ASTNodeType::Literal(value) => {
1213 hasher.write(&[1]); value.hash(hasher);
1215 }
1216 ASTNodeType::Reference { reference, .. } => {
1217 hasher.write(&[2]); reference.hash(hasher);
1219 }
1220 ASTNodeType::UnaryOp { op, expr } => {
1221 hasher.write(&[3]); hasher.write(op.as_bytes());
1223 expr.hash_node(hasher);
1224 }
1225 ASTNodeType::BinaryOp { op, left, right } => {
1226 hasher.write(&[4]); hasher.write(op.as_bytes());
1228 left.hash_node(hasher);
1229 right.hash_node(hasher);
1230 }
1231 ASTNodeType::Function { name, args } => {
1232 hasher.write(&[5]); let name_lower = name.to_lowercase();
1235 hasher.write(name_lower.as_bytes());
1236 hasher.write_usize(args.len());
1237 for arg in args {
1238 arg.hash_node(hasher);
1239 }
1240 }
1241 ASTNodeType::Array(rows) => {
1242 hasher.write(&[6]); hasher.write_usize(rows.len());
1244 for row in rows {
1245 hasher.write_usize(row.len());
1246 for item in row {
1247 item.hash_node(hasher);
1248 }
1249 }
1250 }
1251 }
1252 }
1253
1254 pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1255 let mut dependencies = Vec::new();
1256 self.collect_dependencies(&mut dependencies);
1257 dependencies
1258 }
1259
1260 pub fn get_dependency_strings(&self) -> Vec<String> {
1261 self.get_dependencies()
1262 .into_iter()
1263 .map(|dep| format!("{dep}"))
1264 .collect()
1265 }
1266
1267 fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1268 match &self.node_type {
1269 ASTNodeType::Reference { reference, .. } => {
1270 dependencies.push(reference);
1271 }
1272 ASTNodeType::UnaryOp { expr, .. } => {
1273 expr.collect_dependencies(dependencies);
1274 }
1275 ASTNodeType::BinaryOp { left, right, .. } => {
1276 left.collect_dependencies(dependencies);
1277 right.collect_dependencies(dependencies);
1278 }
1279 ASTNodeType::Function { args, .. } => {
1280 for arg in args {
1281 arg.collect_dependencies(dependencies);
1282 }
1283 }
1284 ASTNodeType::Array(rows) => {
1285 for row in rows {
1286 for item in row {
1287 item.collect_dependencies(dependencies);
1288 }
1289 }
1290 }
1291 _ => {}
1292 }
1293 }
1294
1295 pub fn refs(&self) -> RefIter<'_> {
1298 RefIter {
1299 stack: smallvec::smallvec![self],
1300 }
1301 }
1302
1303 pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1305 let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1306 stack.push(self);
1307 while let Some(node) = stack.pop() {
1308 match &node.node_type {
1309 ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1310 ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1311 ASTNodeType::BinaryOp { left, right, .. } => {
1312 stack.push(right);
1314 stack.push(left);
1315 }
1316 ASTNodeType::Function { args, .. } => {
1317 for a in args.iter().rev() {
1318 stack.push(a);
1319 }
1320 }
1321 ASTNodeType::Array(rows) => {
1322 for r in rows.iter().rev() {
1323 for item in r.iter().rev() {
1324 stack.push(item);
1325 }
1326 }
1327 }
1328 ASTNodeType::Literal(_) => {}
1329 }
1330 }
1331 }
1332
1333 pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1335 let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1336 self.visit_refs(|rv| match rv {
1337 RefView::Cell { sheet, row, col } => out.push(ReferenceType::Cell {
1338 sheet: sheet.map(|s| s.to_string()),
1339 row,
1340 col,
1341 }),
1342 RefView::Range {
1343 sheet,
1344 start_row,
1345 start_col,
1346 end_row,
1347 end_col,
1348 } => {
1349 if policy.expand_small_ranges
1351 && let (Some(sr), Some(sc), Some(er), Some(ec)) =
1352 (start_row, start_col, end_row, end_col)
1353 {
1354 let rows = er.saturating_sub(sr) + 1;
1355 let cols = ec.saturating_sub(sc) + 1;
1356 let area = rows.saturating_mul(cols);
1357 if area as usize <= policy.range_expansion_limit {
1358 for r in sr..=er {
1359 for c in sc..=ec {
1360 out.push(ReferenceType::Cell {
1361 sheet: sheet.map(|s| s.to_string()),
1362 row: r,
1363 col: c,
1364 });
1365 }
1366 }
1367 return; }
1369 }
1370 out.push(ReferenceType::Range {
1371 sheet: sheet.map(|s| s.to_string()),
1372 start_row,
1373 start_col,
1374 end_row,
1375 end_col,
1376 });
1377 }
1378 RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
1379 name: name.to_string(),
1380 specifier: specifier.cloned(),
1381 })),
1382 RefView::NamedRange { name } => {
1383 if policy.include_names {
1384 out.push(ReferenceType::NamedRange(name.to_string()));
1385 }
1386 }
1387 });
1388 out
1389 }
1390}
1391
1392#[derive(Clone, Copy, Debug)]
1394pub enum RefView<'a> {
1395 Cell {
1396 sheet: Option<&'a str>,
1397 row: u32,
1398 col: u32,
1399 },
1400 Range {
1401 sheet: Option<&'a str>,
1402 start_row: Option<u32>,
1403 start_col: Option<u32>,
1404 end_row: Option<u32>,
1405 end_col: Option<u32>,
1406 },
1407 Table {
1408 name: &'a str,
1409 specifier: Option<&'a TableSpecifier>,
1410 },
1411 NamedRange {
1412 name: &'a str,
1413 },
1414}
1415
1416impl<'a> From<&'a ReferenceType> for RefView<'a> {
1417 fn from(r: &'a ReferenceType) -> Self {
1418 match r {
1419 ReferenceType::Cell { sheet, row, col } => RefView::Cell {
1420 sheet: sheet.as_deref(),
1421 row: *row,
1422 col: *col,
1423 },
1424 ReferenceType::Range {
1425 sheet,
1426 start_row,
1427 start_col,
1428 end_row,
1429 end_col,
1430 } => RefView::Range {
1431 sheet: sheet.as_deref(),
1432 start_row: *start_row,
1433 start_col: *start_col,
1434 end_row: *end_row,
1435 end_col: *end_col,
1436 },
1437 ReferenceType::Table(tr) => RefView::Table {
1438 name: tr.name.as_str(),
1439 specifier: tr.specifier.as_ref(),
1440 },
1441 ReferenceType::NamedRange(name) => RefView::NamedRange { name },
1442 }
1443 }
1444}
1445
1446pub struct RefIter<'a> {
1448 stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
1449}
1450
1451impl<'a> Iterator for RefIter<'a> {
1452 type Item = RefView<'a>;
1453 fn next(&mut self) -> Option<Self::Item> {
1454 while let Some(node) = self.stack.pop() {
1455 match &node.node_type {
1456 ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
1457 ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
1458 ASTNodeType::BinaryOp { left, right, .. } => {
1459 self.stack.push(right);
1460 self.stack.push(left);
1461 }
1462 ASTNodeType::Function { args, .. } => {
1463 for a in args.iter().rev() {
1464 self.stack.push(a);
1465 }
1466 }
1467 ASTNodeType::Array(rows) => {
1468 for r in rows.iter().rev() {
1469 for item in r.iter().rev() {
1470 self.stack.push(item);
1471 }
1472 }
1473 }
1474 ASTNodeType::Literal(_) => {}
1475 }
1476 }
1477 None
1478 }
1479}
1480
1481#[derive(Debug, Clone)]
1483pub struct CollectPolicy {
1484 pub expand_small_ranges: bool,
1485 pub range_expansion_limit: usize,
1486 pub include_names: bool,
1487}
1488
1489impl Default for CollectPolicy {
1490 fn default() -> Self {
1491 Self {
1492 expand_small_ranges: false,
1493 range_expansion_limit: 0,
1494 include_names: true,
1495 }
1496 }
1497}
1498
1499impl Display for ASTNode {
1500 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1501 write!(f, "{}", self.node_type)
1502 }
1503}
1504
1505impl std::hash::Hash for ASTNode {
1506 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1507 let hash = self.calculate_hash();
1508 state.write_u64(hash);
1509 }
1510}
1511
1512pub struct Parser {
1514 tokens: Arc<[Token]>,
1515 position: usize,
1516 volatility_classifier: Option<VolatilityClassifierBox>,
1518 dialect: FormulaDialect,
1519}
1520
1521impl<T> From<T> for Parser
1522where
1523 T: AsRef<str>,
1524{
1525 fn from(formula: T) -> Self {
1526 let tokens = Tokenizer::new(formula.as_ref()).unwrap().items;
1527 Self::new(tokens, false)
1528 }
1529}
1530
1531impl Parser {
1532 pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
1533 Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
1534 }
1535
1536 pub fn new_with_dialect(
1537 mut tokens: Vec<Token>,
1538 include_whitespace: bool,
1539 dialect: FormulaDialect,
1540 ) -> Self {
1541 if !include_whitespace {
1542 tokens.retain(|t| t.token_type != TokenType::Whitespace);
1543 }
1544
1545 Parser {
1546 tokens: Arc::from(tokens.into_boxed_slice()),
1547 position: 0,
1548 volatility_classifier: None,
1549 dialect,
1550 }
1551 }
1552
1553 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1556 where
1557 F: Fn(&str) -> bool + Send + Sync + 'static,
1558 {
1559 self.volatility_classifier = Some(Box::new(f));
1560 self
1561 }
1562
1563 pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
1565 where
1566 F: Fn(&str) -> bool + Send + Sync + 'static,
1567 {
1568 Self::new(tokens, include_whitespace).with_volatility_classifier(f)
1569 }
1570
1571 pub fn new_with_classifier_and_dialect<F>(
1572 tokens: Vec<Token>,
1573 include_whitespace: bool,
1574 dialect: FormulaDialect,
1575 f: F,
1576 ) -> Self
1577 where
1578 F: Fn(&str) -> bool + Send + Sync + 'static,
1579 {
1580 Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
1581 }
1582
1583 fn skip_whitespace(&mut self) {
1584 while self.position < self.tokens.len()
1585 && self.tokens[self.position].token_type == TokenType::Whitespace
1586 {
1587 self.position += 1;
1588 }
1589 }
1590
1591 pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
1593 if self.tokens.is_empty() {
1594 return Err(ParserError {
1595 message: "No tokens to parse".to_string(),
1596 position: None,
1597 });
1598 }
1599
1600 self.skip_whitespace();
1601 if self.position >= self.tokens.len() {
1602 return Err(ParserError {
1603 message: "No tokens to parse".to_string(),
1604 position: None,
1605 });
1606 }
1607
1608 if self.tokens[self.position].token_type == TokenType::Literal {
1610 let token = self.tokens[self.position].clone();
1611 self.position += 1;
1612 self.skip_whitespace();
1613 if self.position < self.tokens.len() {
1614 return Err(ParserError {
1615 message: format!(
1616 "Unexpected token at position {}: {:?}",
1617 self.position, self.tokens[self.position]
1618 ),
1619 position: Some(self.position),
1620 });
1621 }
1622 return Ok(ASTNode::new(
1623 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
1624 Some(token),
1625 ));
1626 }
1627
1628 let ast = self.parse_expression()?;
1629 self.skip_whitespace();
1630 if self.position < self.tokens.len() {
1631 return Err(ParserError {
1632 message: format!(
1633 "Unexpected token at position {}: {:?}",
1634 self.position, self.tokens[self.position]
1635 ),
1636 position: Some(self.position),
1637 });
1638 }
1639 Ok(ast)
1640 }
1641
1642 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
1643 self.parse_bp(0)
1644 }
1645
1646 fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
1649 let mut left = self.parse_prefix()?;
1650
1651 loop {
1652 self.skip_whitespace();
1653 if self.position >= self.tokens.len() {
1654 break;
1655 }
1656
1657 if self.tokens[self.position].token_type == TokenType::OpPostfix {
1659 let (precedence, _) = self.tokens[self.position]
1660 .get_precedence()
1661 .unwrap_or((0, Associativity::Left));
1662 if precedence < min_precedence {
1663 break;
1664 }
1665
1666 let op_token = self.tokens[self.position].clone();
1667 self.position += 1;
1668 let contains_volatile = left.contains_volatile;
1669 left = ASTNode::new_with_volatile(
1670 ASTNodeType::UnaryOp {
1671 op: op_token.value.clone(),
1672 expr: Box::new(left),
1673 },
1674 Some(op_token),
1675 contains_volatile,
1676 );
1677 continue;
1678 }
1679
1680 let token = &self.tokens[self.position];
1681 if token.token_type != TokenType::OpInfix {
1682 break;
1683 }
1684
1685 let (precedence, associativity) =
1686 token.get_precedence().unwrap_or((0, Associativity::Left));
1687 if precedence < min_precedence {
1688 break;
1689 }
1690
1691 let op_token = self.tokens[self.position].clone();
1692 self.position += 1;
1693
1694 let next_min_precedence = if associativity == Associativity::Left {
1695 precedence + 1
1696 } else {
1697 precedence
1698 };
1699
1700 let right = self.parse_bp(next_min_precedence)?;
1701 let contains_volatile = left.contains_volatile || right.contains_volatile;
1702 left = ASTNode::new_with_volatile(
1703 ASTNodeType::BinaryOp {
1704 op: op_token.value.clone(),
1705 left: Box::new(left),
1706 right: Box::new(right),
1707 },
1708 Some(op_token),
1709 contains_volatile,
1710 );
1711 }
1712
1713 Ok(left)
1714 }
1715
1716 fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
1717 self.skip_whitespace();
1718 if self.position < self.tokens.len()
1719 && self.tokens[self.position].token_type == TokenType::OpPrefix
1720 {
1721 let op_token = self.tokens[self.position].clone();
1722 self.position += 1;
1723
1724 let (precedence, _) = op_token
1727 .get_precedence()
1728 .unwrap_or((0, Associativity::Right));
1729
1730 let expr = self.parse_bp(precedence)?;
1731 let contains_volatile = expr.contains_volatile;
1732 return Ok(ASTNode::new_with_volatile(
1733 ASTNodeType::UnaryOp {
1734 op: op_token.value.clone(),
1735 expr: Box::new(expr),
1736 },
1737 Some(op_token),
1738 contains_volatile,
1739 ));
1740 }
1741
1742 self.parse_primary()
1743 }
1744
1745 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
1746 self.skip_whitespace();
1747 if self.position >= self.tokens.len() {
1748 return Err(ParserError {
1749 message: "Unexpected end of tokens".to_string(),
1750 position: Some(self.position),
1751 });
1752 }
1753
1754 let token = &self.tokens[self.position];
1755 match token.token_type {
1756 TokenType::Operand => {
1757 let operand_token = self.tokens[self.position].clone();
1758 self.position += 1;
1759 self.parse_operand(operand_token)
1760 }
1761 TokenType::Func => {
1762 let func_token = self.tokens[self.position].clone();
1763 self.position += 1;
1764 self.parse_function(func_token)
1765 }
1766 TokenType::Paren if token.subtype == TokenSubType::Open => {
1767 self.position += 1;
1768 let expr = self.parse_expression()?;
1769 if self.position >= self.tokens.len()
1770 || self.tokens[self.position].token_type != TokenType::Paren
1771 || self.tokens[self.position].subtype != TokenSubType::Close
1772 {
1773 return Err(ParserError {
1774 message: "Expected closing parenthesis".to_string(),
1775 position: Some(self.position),
1776 });
1777 }
1778 self.position += 1;
1779 Ok(expr)
1780 }
1781 TokenType::Array if token.subtype == TokenSubType::Open => {
1782 self.position += 1;
1783 self.parse_array()
1784 }
1785 _ => Err(ParserError {
1786 message: format!("Unexpected token: {token:?}"),
1787 position: Some(self.position),
1788 }),
1789 }
1790 }
1791
1792 fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
1793 match token.subtype {
1794 TokenSubType::Number => {
1795 let value = token.value.parse::<f64>().map_err(|_| ParserError {
1796 message: format!("Invalid number: {}", token.value),
1797 position: Some(self.position),
1798 })?;
1799 Ok(ASTNode::new(
1800 ASTNodeType::Literal(LiteralValue::Number(value)),
1801 Some(token),
1802 ))
1803 }
1804 TokenSubType::Text => {
1805 let mut text = token.value.clone();
1807 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
1808 text = text[1..text.len() - 1].to_string();
1809 text = text.replace("\"\"", "\"");
1811 }
1812 Ok(ASTNode::new(
1813 ASTNodeType::Literal(LiteralValue::Text(text)),
1814 Some(token),
1815 ))
1816 }
1817 TokenSubType::Logical => {
1818 let value = token.value.to_uppercase() == "TRUE";
1819 Ok(ASTNode::new(
1820 ASTNodeType::Literal(LiteralValue::Boolean(value)),
1821 Some(token),
1822 ))
1823 }
1824 TokenSubType::Error => {
1825 let error = ExcelError::from_error_string(&token.value);
1826 Ok(ASTNode::new(
1827 ASTNodeType::Literal(LiteralValue::Error(error)),
1828 Some(token),
1829 ))
1830 }
1831 TokenSubType::Range => {
1832 let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
1833 .map_err(|e| ParserError {
1834 message: format!("Invalid reference '{}': {}", token.value, e),
1835 position: Some(self.position),
1836 })?;
1837 Ok(ASTNode::new(
1838 ASTNodeType::Reference {
1839 original: token.value.clone(),
1840 reference,
1841 },
1842 Some(token),
1843 ))
1844 }
1845 _ => Err(ParserError {
1846 message: format!("Unexpected operand subtype: {:?}", token.subtype),
1847 position: Some(self.position),
1848 }),
1849 }
1850 }
1851
1852 fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
1853 let name = func_token.value[..func_token.value.len() - 1].to_string();
1854 let args = self.parse_function_arguments()?;
1855 let this_is_volatile = self
1857 .volatility_classifier
1858 .as_ref()
1859 .map(|f| f(name.as_str()))
1860 .unwrap_or(false);
1861 let args_volatile = args.iter().any(|a| a.contains_volatile);
1862
1863 Ok(ASTNode::new_with_volatile(
1864 ASTNodeType::Function { name, args },
1865 Some(func_token),
1866 this_is_volatile || args_volatile,
1867 ))
1868 }
1869
1870 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
1872 let mut args = Vec::new();
1873
1874 if self.position < self.tokens.len()
1876 && self.tokens[self.position].token_type == TokenType::Func
1877 && self.tokens[self.position].subtype == TokenSubType::Close
1878 {
1879 self.position += 1;
1880 return Ok(args);
1881 }
1882
1883 if self.position < self.tokens.len()
1886 && self.tokens[self.position].token_type == TokenType::Sep
1887 && self.tokens[self.position].subtype == TokenSubType::Arg
1888 {
1889 args.push(ASTNode::new(
1891 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1892 None,
1893 ));
1894 self.position += 1;
1895 } else {
1896 args.push(self.parse_expression()?);
1898 }
1899
1900 while self.position < self.tokens.len() {
1902 let token = &self.tokens[self.position];
1903
1904 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
1905 self.position += 1;
1906 if self.position < self.tokens.len() {
1908 let next_token = &self.tokens[self.position];
1909 if next_token.token_type == TokenType::Sep
1910 && next_token.subtype == TokenSubType::Arg
1911 {
1912 args.push(ASTNode::new(
1914 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1915 None,
1916 ));
1917 } else if next_token.token_type == TokenType::Func
1918 && next_token.subtype == TokenSubType::Close
1919 {
1920 args.push(ASTNode::new(
1922 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1923 None,
1924 ));
1925 self.position += 1;
1926 break;
1927 } else {
1928 args.push(self.parse_expression()?);
1929 }
1930 } else {
1931 args.push(ASTNode::new(
1933 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1934 None,
1935 ));
1936 }
1937 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
1938 self.position += 1;
1939 break;
1940 } else {
1941 return Err(ParserError {
1942 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
1943 position: Some(self.position),
1944 });
1945 }
1946 }
1947
1948 Ok(args)
1949 }
1950
1951 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
1952 let mut rows = Vec::new();
1953 let mut current_row = Vec::new();
1954
1955 if self.position < self.tokens.len()
1957 && self.tokens[self.position].token_type == TokenType::Array
1958 && self.tokens[self.position].subtype == TokenSubType::Close
1959 {
1960 self.position += 1;
1961 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
1962 }
1963
1964 current_row.push(self.parse_expression()?);
1966
1967 while self.position < self.tokens.len() {
1968 let token = &self.tokens[self.position];
1969
1970 if token.token_type == TokenType::Sep {
1971 if token.subtype == TokenSubType::Arg {
1972 self.position += 1;
1974 current_row.push(self.parse_expression()?);
1975 } else if token.subtype == TokenSubType::Row {
1976 self.position += 1;
1978 rows.push(current_row);
1979 current_row = vec![self.parse_expression()?];
1980 }
1981 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
1982 self.position += 1;
1983 rows.push(current_row);
1984 break;
1985 } else {
1986 return Err(ParserError {
1987 message: format!("Unexpected token in array: {token:?}"),
1988 position: Some(self.position),
1989 });
1990 }
1991 }
1992
1993 let contains_volatile = rows
1995 .iter()
1996 .flat_map(|r| r.iter())
1997 .any(|n| n.contains_volatile);
1998 Ok(ASTNode::new_with_volatile(
1999 ASTNodeType::Array(rows),
2000 None,
2001 contains_volatile,
2002 ))
2003 }
2004}
2005
2006impl From<TokenizerError> for ParserError {
2007 fn from(err: TokenizerError) -> Self {
2008 ParserError {
2009 message: err.message,
2010 position: Some(err.pos),
2011 }
2012 }
2013}
2014
2015struct SpanParser<'a> {
2016 source: &'a str,
2017 tokens: &'a [crate::tokenizer::TokenSpan],
2018 position: usize,
2019 volatility_classifier: Option<VolatilityClassifierBox>,
2020 dialect: FormulaDialect,
2021}
2022
2023impl<'a> SpanParser<'a> {
2024 fn new(
2025 source: &'a str,
2026 tokens: &'a [crate::tokenizer::TokenSpan],
2027 dialect: FormulaDialect,
2028 ) -> Self {
2029 SpanParser {
2030 source,
2031 tokens,
2032 position: 0,
2033 volatility_classifier: None,
2034 dialect,
2035 }
2036 }
2037
2038 fn with_volatility_classifier<F>(mut self, f: F) -> Self
2039 where
2040 F: Fn(&str) -> bool + Send + Sync + 'static,
2041 {
2042 self.volatility_classifier = Some(Box::new(f));
2043 self
2044 }
2045
2046 fn skip_whitespace(&mut self) {
2047 while self.position < self.tokens.len()
2048 && self.tokens[self.position].token_type == TokenType::Whitespace
2049 {
2050 self.position += 1;
2051 }
2052 }
2053
2054 fn span_value(&self, span: &crate::tokenizer::TokenSpan) -> &str {
2055 &self.source[span.start..span.end]
2056 }
2057
2058 fn span_to_token(&self, span: &crate::tokenizer::TokenSpan) -> Token {
2059 Token::new_with_span(
2060 self.span_value(span).to_string(),
2061 span.token_type,
2062 span.subtype,
2063 span.start,
2064 span.end,
2065 )
2066 }
2067
2068 fn span_precedence(&self, span: &crate::tokenizer::TokenSpan) -> Option<(u8, Associativity)> {
2069 if !matches!(
2070 span.token_type,
2071 TokenType::OpPrefix | TokenType::OpInfix | TokenType::OpPostfix
2072 ) {
2073 return None;
2074 }
2075
2076 let op = if span.token_type == TokenType::OpPrefix {
2077 "u"
2078 } else {
2079 self.span_value(span)
2080 };
2081
2082 match op {
2083 ":" | " " | "," => Some((8, Associativity::Left)),
2084 "%" => Some((7, Associativity::Left)),
2085 "^" => Some((6, Associativity::Right)),
2086 "u" => Some((5, Associativity::Right)),
2087 "*" | "/" => Some((4, Associativity::Left)),
2088 "+" | "-" => Some((3, Associativity::Left)),
2089 "&" => Some((2, Associativity::Left)),
2090 "=" | "<" | ">" | "<=" | ">=" | "<>" => Some((1, Associativity::Left)),
2091 _ => None,
2092 }
2093 }
2094
2095 fn parse(&mut self) -> Result<ASTNode, ParserError> {
2096 if self.tokens.is_empty() {
2097 return Err(ParserError {
2098 message: "No tokens to parse".to_string(),
2099 position: None,
2100 });
2101 }
2102
2103 self.skip_whitespace();
2104 if self.position >= self.tokens.len() {
2105 return Err(ParserError {
2106 message: "No tokens to parse".to_string(),
2107 position: None,
2108 });
2109 }
2110
2111 if self.tokens[self.position].token_type == TokenType::Literal {
2112 let span = self.tokens[self.position];
2113 self.position += 1;
2114 self.skip_whitespace();
2115 if self.position < self.tokens.len() {
2116 return Err(ParserError {
2117 message: format!(
2118 "Unexpected token at position {}: {:?}",
2119 self.position, self.tokens[self.position]
2120 ),
2121 position: Some(self.position),
2122 });
2123 }
2124
2125 let token = self.span_to_token(&span);
2126 return Ok(ASTNode::new(
2127 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
2128 Some(token),
2129 ));
2130 }
2131
2132 let ast = self.parse_expression()?;
2133 self.skip_whitespace();
2134 if self.position < self.tokens.len() {
2135 return Err(ParserError {
2136 message: format!(
2137 "Unexpected token at position {}: {:?}",
2138 self.position, self.tokens[self.position]
2139 ),
2140 position: Some(self.position),
2141 });
2142 }
2143 Ok(ast)
2144 }
2145
2146 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
2147 self.parse_bp(0)
2148 }
2149
2150 fn parse_bp(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
2151 let mut left = self.parse_prefix()?;
2152
2153 loop {
2154 self.skip_whitespace();
2155 if self.position >= self.tokens.len() {
2156 break;
2157 }
2158
2159 if self.tokens[self.position].token_type == TokenType::OpPostfix {
2160 let (precedence, _) = self
2161 .span_precedence(&self.tokens[self.position])
2162 .unwrap_or((0, Associativity::Left));
2163 if precedence < min_precedence {
2164 break;
2165 }
2166
2167 let op_span = self.tokens[self.position];
2168 self.position += 1;
2169 let op_token = self.span_to_token(&op_span);
2170 let contains_volatile = left.contains_volatile;
2171 left = ASTNode::new_with_volatile(
2172 ASTNodeType::UnaryOp {
2173 op: op_token.value.clone(),
2174 expr: Box::new(left),
2175 },
2176 Some(op_token),
2177 contains_volatile,
2178 );
2179 continue;
2180 }
2181
2182 let token = &self.tokens[self.position];
2183 if token.token_type != TokenType::OpInfix {
2184 break;
2185 }
2186
2187 let (precedence, associativity) = self
2188 .span_precedence(token)
2189 .unwrap_or((0, Associativity::Left));
2190 if precedence < min_precedence {
2191 break;
2192 }
2193
2194 let op_span = self.tokens[self.position];
2195 self.position += 1;
2196
2197 let next_min_precedence = if associativity == Associativity::Left {
2198 precedence + 1
2199 } else {
2200 precedence
2201 };
2202
2203 let right = self.parse_bp(next_min_precedence)?;
2204 let op_token = self.span_to_token(&op_span);
2205 let contains_volatile = left.contains_volatile || right.contains_volatile;
2206 left = ASTNode::new_with_volatile(
2207 ASTNodeType::BinaryOp {
2208 op: op_token.value.clone(),
2209 left: Box::new(left),
2210 right: Box::new(right),
2211 },
2212 Some(op_token),
2213 contains_volatile,
2214 );
2215 }
2216
2217 Ok(left)
2218 }
2219
2220 fn parse_prefix(&mut self) -> Result<ASTNode, ParserError> {
2221 self.skip_whitespace();
2222 if self.position < self.tokens.len()
2223 && self.tokens[self.position].token_type == TokenType::OpPrefix
2224 {
2225 let op_span = self.tokens[self.position];
2226 self.position += 1;
2227
2228 let (precedence, _) = self
2229 .span_precedence(&op_span)
2230 .unwrap_or((0, Associativity::Right));
2231
2232 let expr = self.parse_bp(precedence)?;
2233 let op_token = self.span_to_token(&op_span);
2234 let contains_volatile = expr.contains_volatile;
2235 return Ok(ASTNode::new_with_volatile(
2236 ASTNodeType::UnaryOp {
2237 op: op_token.value.clone(),
2238 expr: Box::new(expr),
2239 },
2240 Some(op_token),
2241 contains_volatile,
2242 ));
2243 }
2244
2245 self.parse_primary()
2246 }
2247
2248 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
2249 self.skip_whitespace();
2250 if self.position >= self.tokens.len() {
2251 return Err(ParserError {
2252 message: "Unexpected end of tokens".to_string(),
2253 position: Some(self.position),
2254 });
2255 }
2256
2257 let token = &self.tokens[self.position];
2258 match token.token_type {
2259 TokenType::Operand => {
2260 let span = self.tokens[self.position];
2261 self.position += 1;
2262 self.parse_operand(span)
2263 }
2264 TokenType::Func => {
2265 let span = self.tokens[self.position];
2266 self.position += 1;
2267 self.parse_function(span)
2268 }
2269 TokenType::Paren if token.subtype == TokenSubType::Open => {
2270 self.position += 1;
2271 let expr = self.parse_expression()?;
2272 self.skip_whitespace();
2273 if self.position >= self.tokens.len()
2274 || self.tokens[self.position].token_type != TokenType::Paren
2275 || self.tokens[self.position].subtype != TokenSubType::Close
2276 {
2277 return Err(ParserError {
2278 message: "Expected closing parenthesis".to_string(),
2279 position: Some(self.position),
2280 });
2281 }
2282 self.position += 1;
2283 Ok(expr)
2284 }
2285 TokenType::Array if token.subtype == TokenSubType::Open => {
2286 self.position += 1;
2287 self.parse_array()
2288 }
2289 _ => Err(ParserError {
2290 message: format!("Unexpected token: {token:?}"),
2291 position: Some(self.position),
2292 }),
2293 }
2294 }
2295
2296 fn parse_operand(&mut self, span: crate::tokenizer::TokenSpan) -> Result<ASTNode, ParserError> {
2297 let value = self.span_value(&span);
2298 let token = self.span_to_token(&span);
2299
2300 match span.subtype {
2301 TokenSubType::Number => {
2302 let value = value.parse::<f64>().map_err(|_| ParserError {
2303 message: format!("Invalid number: {value}"),
2304 position: Some(self.position),
2305 })?;
2306 Ok(ASTNode::new(
2307 ASTNodeType::Literal(LiteralValue::Number(value)),
2308 Some(token),
2309 ))
2310 }
2311 TokenSubType::Text => {
2312 let mut text = value.to_string();
2313 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
2314 text = text[1..text.len() - 1].to_string();
2315 text = text.replace("\"\"", "\"");
2316 }
2317 Ok(ASTNode::new(
2318 ASTNodeType::Literal(LiteralValue::Text(text)),
2319 Some(token),
2320 ))
2321 }
2322 TokenSubType::Logical => {
2323 let v = value.to_uppercase() == "TRUE";
2324 Ok(ASTNode::new(
2325 ASTNodeType::Literal(LiteralValue::Boolean(v)),
2326 Some(token),
2327 ))
2328 }
2329 TokenSubType::Error => {
2330 let error = ExcelError::from_error_string(value);
2331 Ok(ASTNode::new(
2332 ASTNodeType::Literal(LiteralValue::Error(error)),
2333 Some(token),
2334 ))
2335 }
2336 TokenSubType::Range => {
2337 let reference = ReferenceType::from_string_with_dialect(value, self.dialect)
2338 .map_err(|e| ParserError {
2339 message: format!("Invalid reference '{value}': {e}"),
2340 position: Some(self.position),
2341 })?;
2342 Ok(ASTNode::new(
2343 ASTNodeType::Reference {
2344 original: value.to_string(),
2345 reference,
2346 },
2347 Some(token),
2348 ))
2349 }
2350 _ => Err(ParserError {
2351 message: format!("Unexpected operand subtype: {:?}", span.subtype),
2352 position: Some(self.position),
2353 }),
2354 }
2355 }
2356
2357 fn parse_function(
2358 &mut self,
2359 func_span: crate::tokenizer::TokenSpan,
2360 ) -> Result<ASTNode, ParserError> {
2361 let func_value = self.span_value(&func_span);
2362 if func_value.is_empty() {
2363 return Err(ParserError {
2364 message: "Invalid function token".to_string(),
2365 position: Some(self.position),
2366 });
2367 }
2368 let name = func_value[..func_value.len() - 1].to_string();
2369 let args = self.parse_function_arguments()?;
2370
2371 let this_is_volatile = self
2372 .volatility_classifier
2373 .as_ref()
2374 .map(|f| f(name.as_str()))
2375 .unwrap_or(false);
2376 let args_volatile = args.iter().any(|a| a.contains_volatile);
2377
2378 let func_token = self.span_to_token(&func_span);
2379 Ok(ASTNode::new_with_volatile(
2380 ASTNodeType::Function { name, args },
2381 Some(func_token),
2382 this_is_volatile || args_volatile,
2383 ))
2384 }
2385
2386 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
2387 let mut args = Vec::new();
2388
2389 self.skip_whitespace();
2390 if self.position < self.tokens.len()
2391 && self.tokens[self.position].token_type == TokenType::Func
2392 && self.tokens[self.position].subtype == TokenSubType::Close
2393 {
2394 self.position += 1;
2395 return Ok(args);
2396 }
2397
2398 self.skip_whitespace();
2399 if self.position < self.tokens.len()
2400 && self.tokens[self.position].token_type == TokenType::Sep
2401 && self.tokens[self.position].subtype == TokenSubType::Arg
2402 {
2403 args.push(ASTNode::new(
2404 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2405 None,
2406 ));
2407 self.position += 1;
2408 } else {
2409 args.push(self.parse_expression()?);
2410 }
2411
2412 while self.position < self.tokens.len() {
2413 self.skip_whitespace();
2414 if self.position >= self.tokens.len() {
2415 break;
2416 }
2417
2418 let token = &self.tokens[self.position];
2419 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
2420 self.position += 1;
2421 self.skip_whitespace();
2422 if self.position < self.tokens.len() {
2423 let next_token = &self.tokens[self.position];
2424 if next_token.token_type == TokenType::Sep
2425 && next_token.subtype == TokenSubType::Arg
2426 {
2427 args.push(ASTNode::new(
2428 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2429 None,
2430 ));
2431 } else if next_token.token_type == TokenType::Func
2432 && next_token.subtype == TokenSubType::Close
2433 {
2434 args.push(ASTNode::new(
2435 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2436 None,
2437 ));
2438 self.position += 1;
2439 break;
2440 } else {
2441 args.push(self.parse_expression()?);
2442 }
2443 } else {
2444 args.push(ASTNode::new(
2445 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
2446 None,
2447 ));
2448 }
2449 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
2450 self.position += 1;
2451 break;
2452 } else {
2453 return Err(ParserError {
2454 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
2455 position: Some(self.position),
2456 });
2457 }
2458 }
2459
2460 Ok(args)
2461 }
2462
2463 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
2464 let mut rows = Vec::new();
2465 let mut current_row = Vec::new();
2466
2467 self.skip_whitespace();
2468 if self.position < self.tokens.len()
2469 && self.tokens[self.position].token_type == TokenType::Array
2470 && self.tokens[self.position].subtype == TokenSubType::Close
2471 {
2472 self.position += 1;
2473 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
2474 }
2475
2476 current_row.push(self.parse_expression()?);
2477
2478 while self.position < self.tokens.len() {
2479 self.skip_whitespace();
2480 if self.position >= self.tokens.len() {
2481 break;
2482 }
2483 let token = &self.tokens[self.position];
2484
2485 if token.token_type == TokenType::Sep {
2486 if token.subtype == TokenSubType::Arg {
2487 self.position += 1;
2488 current_row.push(self.parse_expression()?);
2489 } else if token.subtype == TokenSubType::Row {
2490 self.position += 1;
2491 rows.push(current_row);
2492 current_row = vec![self.parse_expression()?];
2493 }
2494 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
2495 self.position += 1;
2496 rows.push(current_row);
2497 break;
2498 } else {
2499 return Err(ParserError {
2500 message: format!("Unexpected token in array: {token:?}"),
2501 position: Some(self.position),
2502 });
2503 }
2504 }
2505
2506 let contains_volatile = rows
2507 .iter()
2508 .flat_map(|r| r.iter())
2509 .any(|n| n.contains_volatile);
2510
2511 Ok(ASTNode::new_with_volatile(
2512 ASTNodeType::Array(rows),
2513 None,
2514 contains_volatile,
2515 ))
2516 }
2517}
2518
2519pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
2521 let ref_type = ReferenceType::from_string(reference)?;
2522 Ok(ref_type.to_string())
2523}
2524
2525pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
2526 parse_with_dialect(formula, FormulaDialect::Excel)
2527}
2528
2529pub fn parse_with_dialect<T: AsRef<str>>(
2530 formula: T,
2531 dialect: FormulaDialect,
2532) -> Result<ASTNode, ParserError> {
2533 let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2534 let mut parser = SpanParser::new(formula.as_ref(), &spans, dialect);
2535 parser.parse()
2536}
2537
2538pub fn parse_with_volatility_classifier<T, F>(
2541 formula: T,
2542 classifier: F,
2543) -> Result<ASTNode, ParserError>
2544where
2545 T: AsRef<str>,
2546 F: Fn(&str) -> bool + Send + Sync + 'static,
2547{
2548 parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
2549}
2550
2551pub fn parse_with_dialect_and_volatility_classifier<T, F>(
2552 formula: T,
2553 dialect: FormulaDialect,
2554 classifier: F,
2555) -> Result<ASTNode, ParserError>
2556where
2557 T: AsRef<str>,
2558 F: Fn(&str) -> bool + Send + Sync + 'static,
2559{
2560 let spans = crate::tokenizer::tokenize_spans_with_dialect(formula.as_ref(), dialect)?;
2561 let mut parser =
2562 SpanParser::new(formula.as_ref(), &spans, dialect).with_volatility_classifier(classifier);
2563 parser.parse()
2564}
2565
2566pub struct BatchParser {
2571 include_whitespace: bool,
2572 volatility_classifier: Option<VolatilityClassifierArc>,
2573 token_cache: std::collections::HashMap<String, Arc<[crate::tokenizer::TokenSpan]>>, dialect: FormulaDialect,
2575}
2576
2577impl BatchParser {
2578 pub fn builder() -> BatchParserBuilder {
2579 BatchParserBuilder::default()
2580 }
2581
2582 pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
2584 let spans = if let Some(tokens) = self.token_cache.get(formula) {
2585 Arc::clone(tokens)
2586 } else {
2587 let mut spans = crate::tokenizer::tokenize_spans_with_dialect(formula, self.dialect)?;
2588 if !self.include_whitespace {
2589 spans.retain(|t| t.token_type != TokenType::Whitespace);
2590 }
2591
2592 let spans: Arc<[crate::tokenizer::TokenSpan]> = Arc::from(spans.into_boxed_slice());
2593 self.token_cache
2594 .insert(formula.to_string(), Arc::clone(&spans));
2595 spans
2596 };
2597
2598 let mut parser = SpanParser::new(formula, spans.as_ref(), self.dialect);
2599 if let Some(classifier) = self.volatility_classifier.clone() {
2600 parser = parser.with_volatility_classifier(move |name| classifier(name));
2601 }
2602 parser.parse()
2603 }
2604}
2605
2606#[derive(Default)]
2607pub struct BatchParserBuilder {
2608 include_whitespace: bool,
2609 volatility_classifier: Option<VolatilityClassifierArc>,
2610 dialect: FormulaDialect,
2611}
2612
2613impl BatchParserBuilder {
2614 pub fn include_whitespace(mut self, include: bool) -> Self {
2615 self.include_whitespace = include;
2616 self
2617 }
2618
2619 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
2620 where
2621 F: Fn(&str) -> bool + Send + Sync + 'static,
2622 {
2623 self.volatility_classifier = Some(Arc::new(f));
2624 self
2625 }
2626
2627 pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
2628 self.dialect = dialect;
2629 self
2630 }
2631
2632 pub fn build(self) -> BatchParser {
2633 BatchParser {
2634 include_whitespace: self.include_whitespace,
2635 volatility_classifier: self.volatility_classifier,
2636 token_cache: std::collections::HashMap::new(),
2637 dialect: self.dialect,
2638 }
2639 }
2640}