1use crate::tokenizer::{Associativity, Token, TokenSubType, TokenType, Tokenizer, TokenizerError};
2use crate::types::{FormulaDialect, ParsingError};
3use crate::{ExcelError, LiteralValue};
4
5use crate::hasher::FormulaHasher;
6use once_cell::sync::Lazy;
7use smallvec::SmallVec;
8use std::error::Error;
9use std::fmt::{self, Display};
10use std::hash::{Hash, Hasher};
11use std::sync::Arc;
12
13type VolatilityFn = dyn Fn(&str) -> bool + Send + Sync + 'static;
14type VolatilityClassifierBox = Box<VolatilityFn>;
15type VolatilityClassifierArc = Arc<VolatilityFn>;
16
17#[derive(Debug)]
19pub struct ParserError {
20 pub message: String,
21 pub position: Option<usize>,
22}
23
24impl Display for ParserError {
25 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26 if let Some(pos) = self.position {
27 write!(f, "ParserError at position {}: {}", pos, self.message)
28 } else {
29 write!(f, "ParserError: {}", self.message)
30 }
31 }
32}
33
34impl Error for ParserError {}
35
36static COLUMN_LOOKUP: Lazy<Vec<String>> = Lazy::new(|| {
38 let mut cols = Vec::with_capacity(702);
39 for c in b'A'..=b'Z' {
41 cols.push(String::from(c as char));
42 }
43 for c1 in b'A'..=b'Z' {
45 for c2 in b'A'..=b'Z' {
46 cols.push(format!("{}{}", c1 as char, c2 as char));
47 }
48 }
49 cols
50});
51
52#[derive(Debug, Clone, PartialEq, Hash)]
54pub enum TableSpecifier {
55 All,
57 Data,
59 Headers,
61 Totals,
63 Row(TableRowSpecifier),
65 Column(String),
67 ColumnRange(String, String),
69 SpecialItem(SpecialItem),
71 Combination(Vec<Box<TableSpecifier>>),
73}
74
75#[derive(Debug, Clone, PartialEq, Hash)]
77pub enum TableRowSpecifier {
78 Current,
80 All,
82 Data,
84 Headers,
86 Totals,
88 Index(u32),
90}
91
92#[derive(Debug, Clone, PartialEq, Hash)]
94pub enum SpecialItem {
95 Headers,
97 Data,
99 Totals,
101 All,
103 ThisRow,
105}
106
107#[derive(Debug, Clone, PartialEq, Hash)]
109pub struct TableReference {
110 pub name: String,
112 pub specifier: Option<TableSpecifier>,
114}
115
116#[derive(Debug, Clone, PartialEq, Hash)]
118pub enum ReferenceType {
119 Cell {
120 sheet: Option<String>,
121 row: u32,
122 col: u32,
123 },
124 Range {
125 sheet: Option<String>,
126 start_row: Option<u32>,
127 start_col: Option<u32>,
128 end_row: Option<u32>,
129 end_col: Option<u32>,
130 },
131 Table(TableReference),
132 NamedRange(String),
133}
134
135impl Display for TableSpecifier {
136 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
137 match self {
138 TableSpecifier::All => write!(f, "#All"),
139 TableSpecifier::Data => write!(f, "#Data"),
140 TableSpecifier::Headers => write!(f, "#Headers"),
141 TableSpecifier::Totals => write!(f, "#Totals"),
142 TableSpecifier::Row(row) => write!(f, "{row}"),
143 TableSpecifier::Column(column) => write!(f, "{column}"),
144 TableSpecifier::ColumnRange(start, end) => write!(f, "{start}:{end}"),
145 TableSpecifier::SpecialItem(item) => write!(f, "{item}"),
146 TableSpecifier::Combination(specs) => {
147 let parts: Vec<String> = specs.iter().map(|s| format!("[{s}]")).collect();
150 write!(f, "{}", parts.join(","))
151 }
152 }
153 }
154}
155
156impl Display for TableRowSpecifier {
157 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158 match self {
159 TableRowSpecifier::Current => write!(f, "@"),
160 TableRowSpecifier::All => write!(f, "#All"),
161 TableRowSpecifier::Data => write!(f, "#Data"),
162 TableRowSpecifier::Headers => write!(f, "#Headers"),
163 TableRowSpecifier::Totals => write!(f, "#Totals"),
164 TableRowSpecifier::Index(idx) => write!(f, "{idx}"),
165 }
166 }
167}
168
169impl Display for SpecialItem {
170 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171 match self {
172 SpecialItem::Headers => write!(f, "#Headers"),
173 SpecialItem::Data => write!(f, "#Data"),
174 SpecialItem::Totals => write!(f, "#Totals"),
175 SpecialItem::All => write!(f, "#All"),
176 SpecialItem::ThisRow => write!(f, "@"),
177 }
178 }
179}
180
181fn sheet_name_needs_quoting(name: &str) -> bool {
183 if name.is_empty() {
184 return false;
185 }
186
187 let bytes = name.as_bytes();
188
189 if bytes[0].is_ascii_digit() {
191 return true;
192 }
193
194 for &byte in bytes {
197 match byte {
198 b' ' | b'!' | b'"' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
199 | b',' | b'-' | b'.' | b'/' | b':' | b';' | b'<' | b'=' | b'>' | b'?' | b'@' | b'['
200 | b'\\' | b']' | b'^' | b'`' | b'{' | b'|' | b'}' | b'~' => return true,
201 _ => {}
202 }
203 }
204
205 let upper = name.to_uppercase();
207 matches!(
208 upper.as_str(),
209 "TRUE" | "FALSE" | "NULL" | "REF" | "DIV" | "NAME" | "NUM" | "VALUE" | "N/A"
210 )
211}
212
213#[derive(Debug, Clone)]
214struct OpenFormulaRefPart {
215 sheet: Option<String>,
216 coord: String,
217}
218
219impl ReferenceType {
220 pub fn from_string(reference: &str) -> Result<Self, ParsingError> {
222 Self::parse_excel_reference(reference)
223 }
224
225 pub fn from_string_with_dialect(
227 reference: &str,
228 dialect: FormulaDialect,
229 ) -> Result<Self, ParsingError> {
230 match dialect {
231 FormulaDialect::Excel => Self::parse_excel_reference(reference),
232 FormulaDialect::OpenFormula => Self::parse_openformula_reference(reference)
233 .or_else(|_| Self::parse_excel_reference(reference)),
234 }
235 }
236
237 fn parse_excel_reference(reference: &str) -> Result<Self, ParsingError> {
238 if reference.contains('[') {
240 return Self::parse_table_reference(reference);
241 }
242
243 let (sheet, ref_part) = Self::extract_sheet_name(reference);
245
246 if ref_part.contains(':') {
247 Self::parse_range_reference(&ref_part, sheet)
249 } else {
250 match Self::parse_cell_reference(&ref_part) {
252 Ok((col, row)) => Ok(ReferenceType::Cell { sheet, row, col }),
253 Err(_) => {
254 Ok(ReferenceType::NamedRange(reference.to_string()))
256 }
257 }
258 }
259 }
260
261 fn parse_range_reference(reference: &str, sheet: Option<String>) -> Result<Self, ParsingError> {
263 let mut parts = reference.splitn(2, ':');
264 let start = parts.next().unwrap();
265 let end = parts
266 .next()
267 .ok_or_else(|| ParsingError::InvalidReference(format!("Invalid range: {reference}")))?;
268
269 let (start_col, start_row) = Self::parse_range_part(start)?;
270 let (end_col, end_row) = Self::parse_range_part(end)?;
271
272 Ok(ReferenceType::Range {
273 sheet,
274 start_row,
275 start_col,
276 end_row,
277 end_col,
278 })
279 }
280
281 fn parse_range_part(part: &str) -> Result<(Option<u32>, Option<u32>), ParsingError> {
284 if let Ok((col, row)) = Self::parse_cell_reference(part) {
286 return Ok((Some(col), Some(row)));
287 }
288
289 let bytes = part.as_bytes();
291 let mut i = 0;
292
293 if i < bytes.len() && bytes[i] == b'$' {
295 i += 1;
296 }
297
298 let col_start = i;
300 while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
301 i += 1;
302 }
303
304 if i > col_start {
305 let col_str = &part[col_start..i];
307 let col = Self::column_to_number(col_str)?;
308
309 if i < bytes.len() && bytes[i] == b'$' {
311 i += 1;
312 }
313
314 let row_start = i;
316 while i < bytes.len() && bytes[i].is_ascii_digit() {
317 i += 1;
318 }
319
320 if i > row_start && i == bytes.len() {
321 let row_str = &part[row_start..i];
323 let row = row_str.parse::<u32>().map_err(|_| {
324 ParsingError::InvalidReference(format!("Invalid row: {row_str}"))
325 })?;
326 return Ok((Some(col), Some(row)));
327 } else if i == col_start + col_str.len()
328 || (i == col_start + col_str.len() + 1 && bytes[col_start + col_str.len()] == b'$')
329 {
330 return Ok((Some(col), None));
332 }
333 } else {
334 i = 0;
336 if i < bytes.len() && bytes[i] == b'$' {
337 i += 1;
338 }
339
340 let row_start = i;
341 while i < bytes.len() && bytes[i].is_ascii_digit() {
342 i += 1;
343 }
344
345 if i > row_start && i == bytes.len() {
346 let row_str = &part[row_start..i];
347 let row = row_str.parse::<u32>().map_err(|_| {
348 ParsingError::InvalidReference(format!("Invalid row: {row_str}"))
349 })?;
350 return Ok((None, Some(row)));
351 }
352 }
353
354 Err(ParsingError::InvalidReference(format!(
355 "Invalid range part: {part}"
356 )))
357 }
358
359 fn parse_cell_reference(reference: &str) -> Result<(u32, u32), ParsingError> {
361 let bytes = reference.as_bytes();
362 let mut i = 0;
363
364 if i < bytes.len() && bytes[i] == b'$' {
366 i += 1;
367 }
368
369 let col_start = i;
371 while i < bytes.len() && bytes[i].is_ascii_alphabetic() {
372 i += 1;
373 }
374
375 if i == col_start {
376 return Err(ParsingError::InvalidReference(format!(
377 "Invalid cell reference: {reference}"
378 )));
379 }
380
381 let col_str = &reference[col_start..i];
382 let col = Self::column_to_number(col_str)?;
383
384 if i < bytes.len() && bytes[i] == b'$' {
386 i += 1;
387 }
388
389 let row_start = i;
391 while i < bytes.len() && bytes[i].is_ascii_digit() {
392 i += 1;
393 }
394
395 if i == row_start || i != bytes.len() {
396 return Err(ParsingError::InvalidReference(format!(
397 "Invalid cell reference: {reference}"
398 )));
399 }
400
401 let row_str = &reference[row_start..i];
402 let row = row_str
403 .parse::<u32>()
404 .map_err(|_| ParsingError::InvalidReference(format!("Invalid row: {row_str}")))?;
405
406 Ok((col, row))
407 }
408
409 pub(crate) fn column_to_number(column: &str) -> Result<u32, ParsingError> {
411 let bytes = column.as_bytes();
412
413 if bytes.is_empty() || bytes.len() > 3 {
416 return Err(ParsingError::InvalidReference(format!(
417 "Invalid column: {column}"
418 )));
419 }
420
421 let mut result = 0u32;
422
423 for &b in bytes {
424 if !b.is_ascii_alphabetic() {
425 return Err(ParsingError::InvalidReference(format!(
426 "Invalid column: {column}"
427 )));
428 }
429 result = result
431 .checked_mul(26)
432 .and_then(|r| r.checked_add((b.to_ascii_uppercase() - b'A' + 1) as u32))
433 .ok_or_else(|| {
434 ParsingError::InvalidReference(format!("Invalid column: {column}"))
435 })?;
436 }
437
438 Ok(result)
439 }
440
441 pub(crate) fn number_to_column(mut num: u32) -> String {
443 if num > 0 && num <= 702 {
445 return COLUMN_LOOKUP[(num - 1) as usize].clone();
446 }
447
448 let mut result = String::with_capacity(3);
450 while num > 0 {
451 num -= 1;
452 result.insert(0, ((num % 26) as u8 + b'A') as char);
453 num /= 26;
454 }
455 result
456 }
457}
458
459impl Display for ReferenceType {
460 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
461 write!(
462 f,
463 "{}",
464 match self {
465 ReferenceType::Cell { sheet, row, col } => {
466 let col_str = Self::number_to_column(*col);
467 let row_str = row.to_string();
468
469 if let Some(sheet_name) = sheet {
470 if sheet_name_needs_quoting(sheet_name) {
471 let escaped_name = sheet_name.replace('\'', "''");
473 format!("'{escaped_name}'!{col_str}{row_str}")
474 } else {
475 format!("{sheet_name}!{col_str}{row_str}")
476 }
477 } else {
478 format!("{col_str}{row_str}")
479 }
480 }
481 ReferenceType::Range {
482 sheet,
483 start_row,
484 start_col,
485 end_row,
486 end_col,
487 } => {
488 let start_ref = match (start_col, start_row) {
490 (Some(col), Some(row)) => {
491 format!("{}{}", Self::number_to_column(*col), row)
492 }
493 (Some(col), None) => Self::number_to_column(*col),
494 (None, Some(row)) => row.to_string(),
495 (None, None) => "".to_string(), };
497
498 let end_ref = match (end_col, end_row) {
500 (Some(col), Some(row)) => {
501 format!("{}{}", Self::number_to_column(*col), row)
502 }
503 (Some(col), None) => Self::number_to_column(*col),
504 (None, Some(row)) => row.to_string(),
505 (None, None) => "".to_string(), };
507
508 let range_part = format!("{start_ref}:{end_ref}");
509
510 if let Some(sheet_name) = sheet {
511 if sheet_name_needs_quoting(sheet_name) {
512 let escaped_name = sheet_name.replace('\'', "''");
514 format!("'{escaped_name}'!{range_part}")
515 } else {
516 format!("{sheet_name}!{range_part}")
517 }
518 } else {
519 range_part
520 }
521 }
522 ReferenceType::Table(table_ref) => {
523 if let Some(specifier) = &table_ref.specifier {
524 match specifier {
527 TableSpecifier::Column(column) => {
528 format!("{}[{}]", table_ref.name, column.trim())
529 }
530 TableSpecifier::ColumnRange(start, end) => {
531 format!("{}[{}:{}]", table_ref.name, start.trim(), end.trim())
532 }
533 _ => {
534 format!("{}[{}]", table_ref.name, specifier)
536 }
537 }
538 } else {
539 table_ref.name.clone()
540 }
541 }
542 ReferenceType::NamedRange(name) => name.clone(),
543 }
544 )
545 }
546}
547
548impl ReferenceType {
549 pub fn normalise(&self) -> String {
551 format!("{self}")
552 }
553
554 fn extract_sheet_name(reference: &str) -> (Option<String>, String) {
556 let bytes = reference.as_bytes();
557 let mut i = 0;
558
559 if i < bytes.len() && bytes[i] == b'\'' {
561 i += 1;
562 let start = i;
563
564 while i < bytes.len() {
566 if bytes[i] == b'\'' {
567 if i + 1 < bytes.len() && bytes[i + 1] == b'!' {
569 let sheet = String::from(&reference[start..i]);
570 let ref_part = String::from(&reference[i + 2..]);
571 return (Some(sheet), ref_part);
572 }
573 }
574 i += 1;
575 }
576 }
577
578 i = 0;
580 while i < bytes.len() {
581 if bytes[i] == b'!' && i > 0 {
582 let sheet = String::from(&reference[0..i]);
583 let ref_part = String::from(&reference[i + 1..]);
584 return (Some(sheet), ref_part);
585 }
586 i += 1;
587 }
588
589 (None, reference.to_string())
590 }
591
592 fn parse_table_reference(reference: &str) -> Result<Self, ParsingError> {
594 if let Some(bracket_pos) = reference.find('[') {
596 let table_name = reference[..bracket_pos].trim();
597 if table_name.is_empty() {
598 return Err(ParsingError::InvalidReference(reference.to_string()));
599 }
600
601 let specifier_str = &reference[bracket_pos..];
602 let specifier = Self::parse_table_specifier(specifier_str)?;
603
604 Ok(ReferenceType::Table(TableReference {
605 name: table_name.to_string(),
606 specifier,
607 }))
608 } else {
609 Err(ParsingError::InvalidReference(reference.to_string()))
610 }
611 }
612
613 fn parse_table_specifier(specifier_str: &str) -> Result<Option<TableSpecifier>, ParsingError> {
615 if specifier_str.is_empty() || !specifier_str.starts_with('[') {
616 return Ok(None);
617 }
618
619 let mut depth = 0;
621 let mut end_pos = 0;
622
623 for (i, c) in specifier_str.chars().enumerate() {
624 if c == '[' {
625 depth += 1;
626 } else if c == ']' {
627 depth -= 1;
628 if depth == 0 {
629 end_pos = i;
630 break;
631 }
632 }
633 }
634
635 if depth != 0 || end_pos == 0 {
636 return Err(ParsingError::InvalidReference(format!(
637 "Unbalanced brackets in table specifier: {specifier_str}"
638 )));
639 }
640
641 let content = &specifier_str[1..end_pos];
643
644 if content.is_empty() {
646 return Ok(Some(TableSpecifier::All));
648 }
649
650 if content.starts_with("#") {
652 return Self::parse_special_item(content);
653 }
654
655 if !content.contains('[') && !content.contains('#') {
657 if let Some(colon_pos) = content.find(':') {
659 let start = content[..colon_pos].trim();
660 let end = content[colon_pos + 1..].trim();
661 return Ok(Some(TableSpecifier::ColumnRange(
662 start.to_string(),
663 end.to_string(),
664 )));
665 } else {
666 return Ok(Some(TableSpecifier::Column(content.trim().to_string())));
668 }
669 }
670
671 if content.contains('[') {
673 return Self::parse_complex_table_specifier(content);
674 }
675
676 Ok(Some(TableSpecifier::Column(content.trim().to_string())))
678 }
679
680 fn parse_openformula_reference(reference: &str) -> Result<Self, ParsingError> {
681 if reference.starts_with('[') && reference.ends_with(']') {
682 let inner = &reference[1..reference.len() - 1];
683 if inner.is_empty() {
684 return Err(ParsingError::InvalidReference(
685 "Empty OpenFormula reference".to_string(),
686 ));
687 }
688
689 let mut parts = inner.splitn(2, ':');
690 let start_part_str = parts.next().unwrap();
691 let end_part_str = parts.next();
692
693 let start_part = Self::parse_openformula_part(start_part_str)?;
694 let end_part = if let Some(part) = end_part_str {
695 Some(Self::parse_openformula_part(part)?)
696 } else {
697 None
698 };
699
700 let sheet = match (&start_part.sheet, &end_part) {
701 (Some(sheet), Some(end)) => {
702 if let Some(end_sheet) = &end.sheet {
703 if end_sheet != sheet {
704 return Err(ParsingError::InvalidReference(format!(
705 "Mismatched sheets in reference: {sheet} vs {end_sheet}"
706 )));
707 }
708 }
709 Some(sheet.clone())
710 }
711 (Some(sheet), None) => Some(sheet.clone()),
712 (None, Some(end)) => end.sheet.clone(),
713 (None, None) => None,
714 };
715
716 let mut excel_like = String::new();
717 if let Some(sheet_name) = sheet {
718 if sheet_name_needs_quoting(&sheet_name) {
719 let escaped = sheet_name.replace('\'', "''");
720 excel_like.push('\'');
721 excel_like.push_str(&escaped);
722 excel_like.push('\'');
723 } else {
724 excel_like.push_str(&sheet_name);
725 }
726 excel_like.push('!');
727 }
728
729 excel_like.push_str(&start_part.coord);
730 if let Some(end) = end_part {
731 excel_like.push(':');
732 excel_like.push_str(&end.coord);
733 }
734
735 return Self::parse_excel_reference(&excel_like);
736 }
737
738 Err(ParsingError::InvalidReference(format!(
739 "Unsupported OpenFormula reference: {reference}"
740 )))
741 }
742
743 fn parse_openformula_part(part: &str) -> Result<OpenFormulaRefPart, ParsingError> {
744 let trimmed = part.trim();
745 if trimmed.is_empty() {
746 return Err(ParsingError::InvalidReference(
747 "Empty component in OpenFormula reference".to_string(),
748 ));
749 }
750
751 if trimmed == "." {
752 return Err(ParsingError::InvalidReference(
753 "Incomplete OpenFormula reference component".to_string(),
754 ));
755 }
756
757 if trimmed.starts_with('[') {
758 return Err(ParsingError::InvalidReference(format!(
760 "Unexpected '[' in OpenFormula reference component: {trimmed}"
761 )));
762 }
763
764 let (sheet, coord_slice) = if let Some(stripped) = trimmed.strip_prefix('.') {
765 (None, stripped.trim())
766 } else if let Some(dot_idx) = Self::find_openformula_sheet_separator(trimmed) {
767 let sheet_part = trimmed[..dot_idx].trim();
768 let coord_part = trimmed[dot_idx + 1..].trim();
769 if coord_part.is_empty() {
770 return Err(ParsingError::InvalidReference(format!(
771 "Missing coordinate in OpenFormula reference component: {trimmed}"
772 )));
773 }
774 let sheet_name = Self::normalise_openformula_sheet(sheet_part)?;
775 (Some(sheet_name), coord_part)
776 } else {
777 (None, trimmed)
778 };
779
780 let coord = coord_slice.trim_start_matches('.').trim().to_string();
781
782 if coord.is_empty() {
783 return Err(ParsingError::InvalidReference(format!(
784 "Missing coordinate in OpenFormula reference component: {trimmed}"
785 )));
786 }
787
788 Ok(OpenFormulaRefPart { sheet, coord })
789 }
790
791 fn normalise_openformula_sheet(sheet: &str) -> Result<String, ParsingError> {
792 let without_abs = sheet.trim().trim_start_matches('$');
793
794 if without_abs.starts_with('\'') {
795 if without_abs.len() < 2 || !without_abs.ends_with('\'') {
796 return Err(ParsingError::InvalidReference(format!(
797 "Unterminated sheet name in OpenFormula reference: {sheet}"
798 )));
799 }
800 let inner = &without_abs[1..without_abs.len() - 1];
801 Ok(inner.replace("''", "'"))
802 } else {
803 Ok(without_abs.to_string())
804 }
805 }
806
807 fn find_openformula_sheet_separator(part: &str) -> Option<usize> {
808 let bytes = part.as_bytes();
809 let mut i = 0;
810 let mut in_quotes = false;
811
812 while i < bytes.len() {
813 match bytes[i] {
814 b'\'' => {
815 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
816 i += 2;
817 continue;
818 }
819 in_quotes = !in_quotes;
820 i += 1;
821 }
822 b'.' if !in_quotes => return Some(i),
823 _ => i += 1,
824 }
825 }
826
827 None
828 }
829
830 fn parse_special_item(content: &str) -> Result<Option<TableSpecifier>, ParsingError> {
832 match content {
833 "#All" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::All))),
834 "#Headers" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Headers))),
835 "#Data" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Data))),
836 "#Totals" => Ok(Some(TableSpecifier::SpecialItem(SpecialItem::Totals))),
837 "@" => Ok(Some(TableSpecifier::Row(TableRowSpecifier::Current))),
838 _ => Err(ParsingError::InvalidReference(format!(
839 "Unknown special item: {content}"
840 ))),
841 }
842 }
843
844 fn parse_complex_table_specifier(
846 content: &str,
847 ) -> Result<Option<TableSpecifier>, ParsingError> {
848 if content.contains("[#Headers]")
853 || content.contains("[#All]")
854 || content.contains("[#Data]")
855 || content.contains("[#Totals]")
856 || content.contains("[@]")
857 {
858 let mut specifiers = Vec::new();
861
862 if content.contains("[#Headers]") {
864 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Headers)));
865 }
866 if content.contains("[#Data]") {
867 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Data)));
868 }
869 if content.contains("[#Totals]") {
870 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::Totals)));
871 }
872 if content.contains("[#All]") {
873 specifiers.push(Box::new(TableSpecifier::SpecialItem(SpecialItem::All)));
874 }
875
876 if !specifiers.is_empty() {
877 return Ok(Some(TableSpecifier::Combination(specifiers)));
878 }
879 }
880
881 Ok(Some(TableSpecifier::Column(content.trim().to_string())))
883 }
884
885 pub fn to_excel_string(&self) -> String {
887 match self {
888 ReferenceType::Cell { sheet, row, col } => {
889 if let Some(s) = sheet {
890 if sheet_name_needs_quoting(s) {
891 let escaped_name = s.replace('\'', "''");
892 format!("'{}'!{}{}", escaped_name, Self::number_to_column(*col), row)
893 } else {
894 format!("{}!{}{}", s, Self::number_to_column(*col), row)
895 }
896 } else {
897 format!("{}{}", Self::number_to_column(*col), row)
898 }
899 }
900 ReferenceType::Range {
901 sheet,
902 start_row,
903 start_col,
904 end_row,
905 end_col,
906 } => {
907 let start_ref = match (start_col, start_row) {
909 (Some(col), Some(row)) => format!("{}{}", Self::number_to_column(*col), row),
910 (Some(col), None) => Self::number_to_column(*col),
911 (None, Some(row)) => row.to_string(),
912 (None, None) => "".to_string(), };
914
915 let end_ref = match (end_col, end_row) {
917 (Some(col), Some(row)) => format!("{}{}", Self::number_to_column(*col), row),
918 (Some(col), None) => Self::number_to_column(*col),
919 (None, Some(row)) => row.to_string(),
920 (None, None) => "".to_string(), };
922
923 let range_part = format!("{start_ref}:{end_ref}");
924
925 if let Some(s) = sheet {
926 if sheet_name_needs_quoting(s) {
927 let escaped_name = s.replace('\'', "''");
928 format!("'{escaped_name}'!{range_part}")
929 } else {
930 format!("{s}!{range_part}")
931 }
932 } else {
933 range_part
934 }
935 }
936 ReferenceType::Table(table_ref) => {
937 if let Some(specifier) = &table_ref.specifier {
938 format!("{}[{}]", table_ref.name, specifier)
939 } else {
940 table_ref.name.clone()
941 }
942 }
943 ReferenceType::NamedRange(name) => name.clone(),
944 }
945 }
946}
947
948#[derive(Debug, Clone, PartialEq, Hash)]
950pub enum ASTNodeType {
951 Literal(LiteralValue),
952 Reference {
953 original: String, reference: ReferenceType, },
956 UnaryOp {
957 op: String,
958 expr: Box<ASTNode>,
959 },
960 BinaryOp {
961 op: String,
962 left: Box<ASTNode>,
963 right: Box<ASTNode>,
964 },
965 Function {
966 name: String,
967 args: Vec<ASTNode>, },
969 Array(Vec<Vec<ASTNode>>), }
971
972impl Display for ASTNodeType {
973 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
974 match self {
975 ASTNodeType::Literal(value) => write!(f, "Literal({value})"),
976 ASTNodeType::Reference { reference, .. } => write!(f, "Reference({reference:?})"),
977 ASTNodeType::UnaryOp { op, expr } => write!(f, "UnaryOp({op}, {expr})"),
978 ASTNodeType::BinaryOp { op, left, right } => {
979 write!(f, "BinaryOp({op}, {left}, {right})")
980 }
981 ASTNodeType::Function { name, args } => write!(f, "Function({name}, {args:?})"),
982 ASTNodeType::Array(rows) => write!(f, "Array({rows:?})"),
983 }
984 }
985}
986
987#[derive(Debug, Clone, PartialEq)]
989pub struct ASTNode {
990 pub node_type: ASTNodeType,
991 pub source_token: Option<Token>,
992 pub contains_volatile: bool,
997}
998
999impl ASTNode {
1000 pub fn new(node_type: ASTNodeType, source_token: Option<Token>) -> Self {
1001 ASTNode {
1002 node_type,
1003 source_token,
1004 contains_volatile: false,
1005 }
1006 }
1007
1008 pub fn new_with_volatile(
1010 node_type: ASTNodeType,
1011 source_token: Option<Token>,
1012 contains_volatile: bool,
1013 ) -> Self {
1014 ASTNode {
1015 node_type,
1016 source_token,
1017 contains_volatile,
1018 }
1019 }
1020
1021 pub fn contains_volatile(&self) -> bool {
1023 self.contains_volatile
1024 }
1025
1026 pub fn fingerprint(&self) -> u64 {
1027 self.calculate_hash()
1028 }
1029
1030 pub fn calculate_hash(&self) -> u64 {
1032 let mut hasher = FormulaHasher::new();
1033 self.hash_node(&mut hasher);
1034 hasher.finish()
1035 }
1036
1037 fn hash_node(&self, hasher: &mut FormulaHasher) {
1038 match &self.node_type {
1039 ASTNodeType::Literal(value) => {
1040 hasher.write(&[1]); value.hash(hasher);
1042 }
1043 ASTNodeType::Reference { reference, .. } => {
1044 hasher.write(&[2]); reference.hash(hasher);
1046 }
1047 ASTNodeType::UnaryOp { op, expr } => {
1048 hasher.write(&[3]); hasher.write(op.as_bytes());
1050 expr.hash_node(hasher);
1051 }
1052 ASTNodeType::BinaryOp { op, left, right } => {
1053 hasher.write(&[4]); hasher.write(op.as_bytes());
1055 left.hash_node(hasher);
1056 right.hash_node(hasher);
1057 }
1058 ASTNodeType::Function { name, args } => {
1059 hasher.write(&[5]); let name_lower = name.to_lowercase();
1062 hasher.write(name_lower.as_bytes());
1063 hasher.write_usize(args.len());
1064 for arg in args {
1065 arg.hash_node(hasher);
1066 }
1067 }
1068 ASTNodeType::Array(rows) => {
1069 hasher.write(&[6]); hasher.write_usize(rows.len());
1071 for row in rows {
1072 hasher.write_usize(row.len());
1073 for item in row {
1074 item.hash_node(hasher);
1075 }
1076 }
1077 }
1078 }
1079 }
1080
1081 pub fn get_dependencies(&self) -> Vec<&ReferenceType> {
1082 let mut dependencies = Vec::new();
1083 self.collect_dependencies(&mut dependencies);
1084 dependencies
1085 }
1086
1087 pub fn get_dependency_strings(&self) -> Vec<String> {
1088 self.get_dependencies()
1089 .into_iter()
1090 .map(|dep| format!("{dep}"))
1091 .collect()
1092 }
1093
1094 fn collect_dependencies<'a>(&'a self, dependencies: &mut Vec<&'a ReferenceType>) {
1095 match &self.node_type {
1096 ASTNodeType::Reference { reference, .. } => {
1097 dependencies.push(reference);
1098 }
1099 ASTNodeType::UnaryOp { expr, .. } => {
1100 expr.collect_dependencies(dependencies);
1101 }
1102 ASTNodeType::BinaryOp { left, right, .. } => {
1103 left.collect_dependencies(dependencies);
1104 right.collect_dependencies(dependencies);
1105 }
1106 ASTNodeType::Function { args, .. } => {
1107 for arg in args {
1108 arg.collect_dependencies(dependencies);
1109 }
1110 }
1111 ASTNodeType::Array(rows) => {
1112 for row in rows {
1113 for item in row {
1114 item.collect_dependencies(dependencies);
1115 }
1116 }
1117 }
1118 _ => {}
1119 }
1120 }
1121
1122 pub fn refs(&self) -> RefIter<'_> {
1125 RefIter {
1126 stack: smallvec::smallvec![self],
1127 }
1128 }
1129
1130 pub fn visit_refs<V: FnMut(RefView<'_>)>(&self, mut visitor: V) {
1132 let mut stack: Vec<&ASTNode> = Vec::with_capacity(8);
1133 stack.push(self);
1134 while let Some(node) = stack.pop() {
1135 match &node.node_type {
1136 ASTNodeType::Reference { reference, .. } => visitor(RefView::from(reference)),
1137 ASTNodeType::UnaryOp { expr, .. } => stack.push(expr),
1138 ASTNodeType::BinaryOp { left, right, .. } => {
1139 stack.push(right);
1141 stack.push(left);
1142 }
1143 ASTNodeType::Function { args, .. } => {
1144 for a in args.iter().rev() {
1145 stack.push(a);
1146 }
1147 }
1148 ASTNodeType::Array(rows) => {
1149 for r in rows.iter().rev() {
1150 for item in r.iter().rev() {
1151 stack.push(item);
1152 }
1153 }
1154 }
1155 ASTNodeType::Literal(_) => {}
1156 }
1157 }
1158 }
1159
1160 pub fn collect_references(&self, policy: &CollectPolicy) -> SmallVec<[ReferenceType; 4]> {
1162 let mut out: SmallVec<[ReferenceType; 4]> = SmallVec::new();
1163 self.visit_refs(|rv| match rv {
1164 RefView::Cell { sheet, row, col } => out.push(ReferenceType::Cell {
1165 sheet: sheet.map(|s| s.to_string()),
1166 row,
1167 col,
1168 }),
1169 RefView::Range {
1170 sheet,
1171 start_row,
1172 start_col,
1173 end_row,
1174 end_col,
1175 } => {
1176 if policy.expand_small_ranges {
1178 if let (Some(sr), Some(sc), Some(er), Some(ec)) =
1179 (start_row, start_col, end_row, end_col)
1180 {
1181 let rows = er.saturating_sub(sr) + 1;
1182 let cols = ec.saturating_sub(sc) + 1;
1183 let area = rows.saturating_mul(cols);
1184 if area as usize <= policy.range_expansion_limit {
1185 for r in sr..=er {
1186 for c in sc..=ec {
1187 out.push(ReferenceType::Cell {
1188 sheet: sheet.map(|s| s.to_string()),
1189 row: r,
1190 col: c,
1191 });
1192 }
1193 }
1194 return; }
1196 }
1197 }
1198 out.push(ReferenceType::Range {
1199 sheet: sheet.map(|s| s.to_string()),
1200 start_row,
1201 start_col,
1202 end_row,
1203 end_col,
1204 });
1205 }
1206 RefView::Table { name, specifier } => out.push(ReferenceType::Table(TableReference {
1207 name: name.to_string(),
1208 specifier: specifier.cloned(),
1209 })),
1210 RefView::NamedRange { name } => {
1211 if policy.include_names {
1212 out.push(ReferenceType::NamedRange(name.to_string()));
1213 }
1214 }
1215 });
1216 out
1217 }
1218}
1219
1220#[derive(Clone, Copy, Debug)]
1222pub enum RefView<'a> {
1223 Cell {
1224 sheet: Option<&'a str>,
1225 row: u32,
1226 col: u32,
1227 },
1228 Range {
1229 sheet: Option<&'a str>,
1230 start_row: Option<u32>,
1231 start_col: Option<u32>,
1232 end_row: Option<u32>,
1233 end_col: Option<u32>,
1234 },
1235 Table {
1236 name: &'a str,
1237 specifier: Option<&'a TableSpecifier>,
1238 },
1239 NamedRange {
1240 name: &'a str,
1241 },
1242}
1243
1244impl<'a> From<&'a ReferenceType> for RefView<'a> {
1245 fn from(r: &'a ReferenceType) -> Self {
1246 match r {
1247 ReferenceType::Cell { sheet, row, col } => RefView::Cell {
1248 sheet: sheet.as_deref(),
1249 row: *row,
1250 col: *col,
1251 },
1252 ReferenceType::Range {
1253 sheet,
1254 start_row,
1255 start_col,
1256 end_row,
1257 end_col,
1258 } => RefView::Range {
1259 sheet: sheet.as_deref(),
1260 start_row: *start_row,
1261 start_col: *start_col,
1262 end_row: *end_row,
1263 end_col: *end_col,
1264 },
1265 ReferenceType::Table(tr) => RefView::Table {
1266 name: tr.name.as_str(),
1267 specifier: tr.specifier.as_ref(),
1268 },
1269 ReferenceType::NamedRange(name) => RefView::NamedRange { name },
1270 }
1271 }
1272}
1273
1274pub struct RefIter<'a> {
1276 stack: smallvec::SmallVec<[&'a ASTNode; 8]>,
1277}
1278
1279impl<'a> Iterator for RefIter<'a> {
1280 type Item = RefView<'a>;
1281 fn next(&mut self) -> Option<Self::Item> {
1282 while let Some(node) = self.stack.pop() {
1283 match &node.node_type {
1284 ASTNodeType::Reference { reference, .. } => return Some(RefView::from(reference)),
1285 ASTNodeType::UnaryOp { expr, .. } => self.stack.push(expr),
1286 ASTNodeType::BinaryOp { left, right, .. } => {
1287 self.stack.push(right);
1288 self.stack.push(left);
1289 }
1290 ASTNodeType::Function { args, .. } => {
1291 for a in args.iter().rev() {
1292 self.stack.push(a);
1293 }
1294 }
1295 ASTNodeType::Array(rows) => {
1296 for r in rows.iter().rev() {
1297 for item in r.iter().rev() {
1298 self.stack.push(item);
1299 }
1300 }
1301 }
1302 ASTNodeType::Literal(_) => {}
1303 }
1304 }
1305 None
1306 }
1307}
1308
1309#[derive(Debug, Clone)]
1311pub struct CollectPolicy {
1312 pub expand_small_ranges: bool,
1313 pub range_expansion_limit: usize,
1314 pub include_names: bool,
1315}
1316
1317impl Default for CollectPolicy {
1318 fn default() -> Self {
1319 Self {
1320 expand_small_ranges: false,
1321 range_expansion_limit: 0,
1322 include_names: true,
1323 }
1324 }
1325}
1326
1327impl Display for ASTNode {
1328 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1329 write!(f, "{}", self.node_type)
1330 }
1331}
1332
1333impl std::hash::Hash for ASTNode {
1334 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1335 let hash = self.calculate_hash();
1336 state.write_u64(hash);
1337 }
1338}
1339
1340pub struct Parser {
1342 tokens: Vec<Token>,
1343 position: usize,
1344 volatility_classifier: Option<VolatilityClassifierBox>,
1346 dialect: FormulaDialect,
1347}
1348
1349impl<T> From<T> for Parser
1350where
1351 T: AsRef<str>,
1352{
1353 fn from(formula: T) -> Self {
1354 let tokens = Tokenizer::new(formula.as_ref()).unwrap().items;
1355 Self::new(tokens, false)
1356 }
1357}
1358
1359impl Parser {
1360 pub fn new(tokens: Vec<Token>, include_whitespace: bool) -> Self {
1361 Self::new_with_dialect(tokens, include_whitespace, FormulaDialect::Excel)
1362 }
1363
1364 pub fn new_with_dialect(
1365 tokens: Vec<Token>,
1366 include_whitespace: bool,
1367 dialect: FormulaDialect,
1368 ) -> Self {
1369 let filtered_tokens = if include_whitespace {
1370 tokens
1371 } else {
1372 tokens
1373 .into_iter()
1374 .filter(|t| t.token_type != TokenType::Whitespace)
1375 .collect()
1376 };
1377 Parser {
1378 tokens: filtered_tokens,
1379 position: 0,
1380 volatility_classifier: None,
1381 dialect,
1382 }
1383 }
1384
1385 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1388 where
1389 F: Fn(&str) -> bool + Send + Sync + 'static,
1390 {
1391 self.volatility_classifier = Some(Box::new(f));
1392 self
1393 }
1394
1395 pub fn new_with_classifier<F>(tokens: Vec<Token>, include_whitespace: bool, f: F) -> Self
1397 where
1398 F: Fn(&str) -> bool + Send + Sync + 'static,
1399 {
1400 Self::new(tokens, include_whitespace).with_volatility_classifier(f)
1401 }
1402
1403 pub fn new_with_classifier_and_dialect<F>(
1404 tokens: Vec<Token>,
1405 include_whitespace: bool,
1406 dialect: FormulaDialect,
1407 f: F,
1408 ) -> Self
1409 where
1410 F: Fn(&str) -> bool + Send + Sync + 'static,
1411 {
1412 Self::new_with_dialect(tokens, include_whitespace, dialect).with_volatility_classifier(f)
1413 }
1414
1415 pub fn parse(&mut self) -> Result<ASTNode, ParserError> {
1417 if self.tokens.is_empty() {
1418 return Err(ParserError {
1419 message: "No tokens to parse".to_string(),
1420 position: None,
1421 });
1422 }
1423
1424 if self.tokens[0].token_type == TokenType::Literal {
1426 let token = self.tokens[0].clone();
1427 return Ok(ASTNode::new(
1428 ASTNodeType::Literal(LiteralValue::Text(token.value.clone())),
1429 Some(token),
1430 ));
1431 }
1432
1433 let ast = self.parse_expression()?;
1434 if self.position < self.tokens.len() {
1435 return Err(ParserError {
1436 message: format!(
1437 "Unexpected token at position {}: {:?}",
1438 self.position, self.tokens[self.position]
1439 ),
1440 position: Some(self.position),
1441 });
1442 }
1443 Ok(ast)
1444 }
1445
1446 fn parse_expression(&mut self) -> Result<ASTNode, ParserError> {
1447 self.parse_binary_op(0)
1448 }
1449
1450 fn parse_binary_op(&mut self, min_precedence: u8) -> Result<ASTNode, ParserError> {
1451 let mut left = self.parse_unary_op()?;
1452
1453 while self.position < self.tokens.len() {
1454 let token = &self.tokens[self.position];
1455 if token.token_type != TokenType::OpInfix {
1456 break;
1457 }
1458
1459 let (precedence, associativity) =
1460 token.get_precedence().unwrap_or((0, Associativity::Left));
1461 if precedence < min_precedence {
1462 break;
1463 }
1464
1465 let op_token = self.tokens[self.position].clone();
1466 self.position += 1;
1467
1468 let next_min_precedence = if associativity == Associativity::Left {
1469 precedence + 1
1470 } else {
1471 precedence
1472 };
1473
1474 let right = self.parse_binary_op(next_min_precedence)?;
1475 let contains_volatile = left.contains_volatile || right.contains_volatile;
1476 left = ASTNode::new_with_volatile(
1477 ASTNodeType::BinaryOp {
1478 op: op_token.value.clone(),
1479 left: Box::new(left),
1480 right: Box::new(right),
1481 },
1482 Some(op_token),
1483 contains_volatile,
1484 );
1485 }
1486
1487 Ok(left)
1488 }
1489
1490 fn parse_unary_op(&mut self) -> Result<ASTNode, ParserError> {
1491 if self.position < self.tokens.len()
1492 && self.tokens[self.position].token_type == TokenType::OpPrefix
1493 {
1494 let op_token = self.tokens[self.position].clone();
1495 self.position += 1;
1496 let expr = self.parse_unary_op()?;
1497 let contains_volatile = expr.contains_volatile;
1498 return Ok(ASTNode::new_with_volatile(
1499 ASTNodeType::UnaryOp {
1500 op: op_token.value.clone(),
1501 expr: Box::new(expr),
1502 },
1503 Some(op_token),
1504 contains_volatile,
1505 ));
1506 }
1507 self.parse_postfix_op()
1508 }
1509
1510 fn parse_postfix_op(&mut self) -> Result<ASTNode, ParserError> {
1511 let mut expr = self.parse_primary()?;
1512
1513 while self.position < self.tokens.len()
1514 && self.tokens[self.position].token_type == TokenType::OpPostfix
1515 {
1516 let op_token = self.tokens[self.position].clone();
1517 self.position += 1;
1518 let contains_volatile = expr.contains_volatile;
1519 expr = ASTNode::new_with_volatile(
1520 ASTNodeType::UnaryOp {
1521 op: op_token.value.clone(),
1522 expr: Box::new(expr),
1523 },
1524 Some(op_token),
1525 contains_volatile,
1526 );
1527 }
1528
1529 Ok(expr)
1530 }
1531
1532 fn parse_primary(&mut self) -> Result<ASTNode, ParserError> {
1533 if self.position >= self.tokens.len() {
1534 return Err(ParserError {
1535 message: "Unexpected end of tokens".to_string(),
1536 position: Some(self.position),
1537 });
1538 }
1539
1540 let token = &self.tokens[self.position];
1541 match token.token_type {
1542 TokenType::Operand => {
1543 let operand_token = self.tokens[self.position].clone();
1544 self.position += 1;
1545 self.parse_operand(operand_token)
1546 }
1547 TokenType::Func => {
1548 let func_token = self.tokens[self.position].clone();
1549 self.position += 1;
1550 self.parse_function(func_token)
1551 }
1552 TokenType::Paren if token.subtype == TokenSubType::Open => {
1553 self.position += 1;
1554 let expr = self.parse_expression()?;
1555 if self.position >= self.tokens.len()
1556 || self.tokens[self.position].token_type != TokenType::Paren
1557 || self.tokens[self.position].subtype != TokenSubType::Close
1558 {
1559 return Err(ParserError {
1560 message: "Expected closing parenthesis".to_string(),
1561 position: Some(self.position),
1562 });
1563 }
1564 self.position += 1;
1565 Ok(expr)
1566 }
1567 TokenType::Array if token.subtype == TokenSubType::Open => {
1568 self.position += 1;
1569 self.parse_array()
1570 }
1571 _ => Err(ParserError {
1572 message: format!("Unexpected token: {token:?}"),
1573 position: Some(self.position),
1574 }),
1575 }
1576 }
1577
1578 fn parse_operand(&mut self, token: Token) -> Result<ASTNode, ParserError> {
1579 match token.subtype {
1580 TokenSubType::Number => {
1581 let value = token.value.parse::<f64>().map_err(|_| ParserError {
1582 message: format!("Invalid number: {}", token.value),
1583 position: Some(self.position),
1584 })?;
1585 Ok(ASTNode::new(
1586 ASTNodeType::Literal(LiteralValue::Number(value)),
1587 Some(token),
1588 ))
1589 }
1590 TokenSubType::Text => {
1591 let mut text = token.value.clone();
1593 if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
1594 text = text[1..text.len() - 1].to_string();
1595 text = text.replace("\"\"", "\"");
1597 }
1598 Ok(ASTNode::new(
1599 ASTNodeType::Literal(LiteralValue::Text(text)),
1600 Some(token),
1601 ))
1602 }
1603 TokenSubType::Logical => {
1604 let value = token.value.to_uppercase() == "TRUE";
1605 Ok(ASTNode::new(
1606 ASTNodeType::Literal(LiteralValue::Boolean(value)),
1607 Some(token),
1608 ))
1609 }
1610 TokenSubType::Error => {
1611 let error = ExcelError::from_error_string(&token.value);
1612 Ok(ASTNode::new(
1613 ASTNodeType::Literal(LiteralValue::Error(error)),
1614 Some(token),
1615 ))
1616 }
1617 TokenSubType::Range => {
1618 let reference = ReferenceType::from_string_with_dialect(&token.value, self.dialect)
1619 .map_err(|e| ParserError {
1620 message: format!("Invalid reference '{}': {}", token.value, e),
1621 position: Some(self.position),
1622 })?;
1623 Ok(ASTNode::new(
1624 ASTNodeType::Reference {
1625 original: token.value.clone(),
1626 reference,
1627 },
1628 Some(token),
1629 ))
1630 }
1631 _ => Err(ParserError {
1632 message: format!("Unexpected operand subtype: {:?}", token.subtype),
1633 position: Some(self.position),
1634 }),
1635 }
1636 }
1637
1638 fn parse_function(&mut self, func_token: Token) -> Result<ASTNode, ParserError> {
1639 let name = func_token.value[..func_token.value.len() - 1].to_string();
1640 let args = self.parse_function_arguments()?;
1641 let this_is_volatile = self
1643 .volatility_classifier
1644 .as_ref()
1645 .map(|f| f(name.as_str()))
1646 .unwrap_or(false);
1647 let args_volatile = args.iter().any(|a| a.contains_volatile);
1648
1649 Ok(ASTNode::new_with_volatile(
1650 ASTNodeType::Function { name, args },
1651 Some(func_token),
1652 this_is_volatile || args_volatile,
1653 ))
1654 }
1655
1656 fn parse_function_arguments(&mut self) -> Result<Vec<ASTNode>, ParserError> {
1658 let mut args = Vec::new();
1659
1660 if self.position < self.tokens.len()
1662 && self.tokens[self.position].token_type == TokenType::Func
1663 && self.tokens[self.position].subtype == TokenSubType::Close
1664 {
1665 self.position += 1;
1666 return Ok(args);
1667 }
1668
1669 if self.position < self.tokens.len()
1672 && self.tokens[self.position].token_type == TokenType::Sep
1673 && self.tokens[self.position].subtype == TokenSubType::Arg
1674 {
1675 args.push(ASTNode::new(
1677 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1678 None,
1679 ));
1680 self.position += 1;
1681 } else {
1682 args.push(self.parse_expression()?);
1684 }
1685
1686 while self.position < self.tokens.len() {
1688 let token = &self.tokens[self.position];
1689
1690 if token.token_type == TokenType::Sep && token.subtype == TokenSubType::Arg {
1691 self.position += 1;
1692 if self.position < self.tokens.len() {
1694 let next_token = &self.tokens[self.position];
1695 if next_token.token_type == TokenType::Sep
1696 && next_token.subtype == TokenSubType::Arg
1697 {
1698 args.push(ASTNode::new(
1700 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1701 None,
1702 ));
1703 } else if next_token.token_type == TokenType::Func
1704 && next_token.subtype == TokenSubType::Close
1705 {
1706 args.push(ASTNode::new(
1708 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1709 None,
1710 ));
1711 self.position += 1;
1712 break;
1713 } else {
1714 args.push(self.parse_expression()?);
1715 }
1716 } else {
1717 args.push(ASTNode::new(
1719 ASTNodeType::Literal(LiteralValue::Text("".to_string())),
1720 None,
1721 ));
1722 }
1723 } else if token.token_type == TokenType::Func && token.subtype == TokenSubType::Close {
1724 self.position += 1;
1725 break;
1726 } else {
1727 return Err(ParserError {
1728 message: format!("Expected ',' or ')' in function arguments, got {token:?}"),
1729 position: Some(self.position),
1730 });
1731 }
1732 }
1733
1734 Ok(args)
1735 }
1736
1737 fn parse_array(&mut self) -> Result<ASTNode, ParserError> {
1738 let mut rows = Vec::new();
1739 let mut current_row = Vec::new();
1740
1741 if self.position < self.tokens.len()
1743 && self.tokens[self.position].token_type == TokenType::Array
1744 && self.tokens[self.position].subtype == TokenSubType::Close
1745 {
1746 self.position += 1;
1747 return Ok(ASTNode::new(ASTNodeType::Array(rows), None));
1748 }
1749
1750 current_row.push(self.parse_expression()?);
1752
1753 while self.position < self.tokens.len() {
1754 let token = &self.tokens[self.position];
1755
1756 if token.token_type == TokenType::Sep {
1757 if token.subtype == TokenSubType::Arg {
1758 self.position += 1;
1760 current_row.push(self.parse_expression()?);
1761 } else if token.subtype == TokenSubType::Row {
1762 self.position += 1;
1764 rows.push(current_row);
1765 current_row = vec![self.parse_expression()?];
1766 }
1767 } else if token.token_type == TokenType::Array && token.subtype == TokenSubType::Close {
1768 self.position += 1;
1769 rows.push(current_row);
1770 break;
1771 } else {
1772 return Err(ParserError {
1773 message: format!("Unexpected token in array: {token:?}"),
1774 position: Some(self.position),
1775 });
1776 }
1777 }
1778
1779 let contains_volatile = rows
1781 .iter()
1782 .flat_map(|r| r.iter())
1783 .any(|n| n.contains_volatile);
1784 Ok(ASTNode::new_with_volatile(
1785 ASTNodeType::Array(rows),
1786 None,
1787 contains_volatile,
1788 ))
1789 }
1790}
1791
1792impl From<TokenizerError> for ParserError {
1793 fn from(err: TokenizerError) -> Self {
1794 ParserError {
1795 message: err.message,
1796 position: Some(err.pos),
1797 }
1798 }
1799}
1800
1801pub fn normalise_reference(reference: &str) -> Result<String, ParsingError> {
1803 let ref_type = ReferenceType::from_string(reference)?;
1804 Ok(ref_type.to_string())
1805}
1806
1807pub fn parse<T: AsRef<str>>(formula: T) -> Result<ASTNode, ParserError> {
1808 parse_with_dialect(formula, FormulaDialect::Excel)
1809}
1810
1811pub fn parse_with_dialect<T: AsRef<str>>(
1812 formula: T,
1813 dialect: FormulaDialect,
1814) -> Result<ASTNode, ParserError> {
1815 let tokenizer = Tokenizer::new_with_dialect(formula.as_ref(), dialect)?;
1816 let mut parser = Parser::new_with_dialect(tokenizer.items, false, dialect);
1817 parser.parse()
1818}
1819
1820pub fn parse_with_volatility_classifier<T, F>(
1823 formula: T,
1824 classifier: F,
1825) -> Result<ASTNode, ParserError>
1826where
1827 T: AsRef<str>,
1828 F: Fn(&str) -> bool + Send + Sync + 'static,
1829{
1830 parse_with_dialect_and_volatility_classifier(formula, FormulaDialect::Excel, classifier)
1831}
1832
1833pub fn parse_with_dialect_and_volatility_classifier<T, F>(
1834 formula: T,
1835 dialect: FormulaDialect,
1836 classifier: F,
1837) -> Result<ASTNode, ParserError>
1838where
1839 T: AsRef<str>,
1840 F: Fn(&str) -> bool + Send + Sync + 'static,
1841{
1842 let tokenizer = Tokenizer::new_with_dialect(formula.as_ref(), dialect)?;
1843 let mut parser =
1844 Parser::new_with_classifier_and_dialect(tokenizer.items, false, dialect, classifier);
1845 parser.parse()
1846}
1847
1848pub struct BatchParser {
1853 include_whitespace: bool,
1854 volatility_classifier: Option<VolatilityClassifierArc>,
1855 token_cache: std::collections::HashMap<String, Vec<Token>>, dialect: FormulaDialect,
1857}
1858
1859impl BatchParser {
1860 pub fn builder() -> BatchParserBuilder {
1861 BatchParserBuilder::default()
1862 }
1863
1864 pub fn parse(&mut self, formula: &str) -> Result<ASTNode, ParserError> {
1866 let filtered = if let Some(tokens) = self.token_cache.get(formula) {
1868 tokens.clone()
1869 } else {
1870 let mut tokens = Tokenizer::new_with_dialect(formula, self.dialect)?.items;
1871 if !self.include_whitespace {
1872 tokens.retain(|t| t.token_type != TokenType::Whitespace);
1873 }
1874 self.token_cache.insert(formula.to_string(), tokens.clone());
1875 tokens
1876 };
1877
1878 let mut parser = Parser::new_with_dialect(filtered, true, self.dialect); if let Some(classifier) = self.volatility_classifier.clone() {
1880 parser = parser.with_volatility_classifier(move |name| classifier(name));
1881 }
1882 parser.parse()
1883 }
1884}
1885
1886#[derive(Default)]
1887pub struct BatchParserBuilder {
1888 include_whitespace: bool,
1889 volatility_classifier: Option<VolatilityClassifierArc>,
1890 dialect: FormulaDialect,
1891}
1892
1893impl BatchParserBuilder {
1894 pub fn include_whitespace(mut self, include: bool) -> Self {
1895 self.include_whitespace = include;
1896 self
1897 }
1898
1899 pub fn with_volatility_classifier<F>(mut self, f: F) -> Self
1900 where
1901 F: Fn(&str) -> bool + Send + Sync + 'static,
1902 {
1903 self.volatility_classifier = Some(Arc::new(f));
1904 self
1905 }
1906
1907 pub fn dialect(mut self, dialect: FormulaDialect) -> Self {
1908 self.dialect = dialect;
1909 self
1910 }
1911
1912 pub fn build(self) -> BatchParser {
1913 BatchParser {
1914 include_whitespace: self.include_whitespace,
1915 volatility_classifier: self.volatility_classifier,
1916 token_cache: std::collections::HashMap::new(),
1917 dialect: self.dialect,
1918 }
1919 }
1920}