use formualizer_parse::parse;
use formualizer_parse::parser::{ASTNode, ASTNodeType, Parser, ParserError};
use formualizer_parse::tokenizer::Tokenizer;
fn classic(formula: &str, include_whitespace: bool) -> Result<ASTNode, ParserError> {
let tokenizer = Tokenizer::new(formula).map_err(|e| ParserError {
message: e.to_string(),
position: Some(e.pos),
})?;
let mut parser = Parser::new(tokenizer.items, include_whitespace);
parser.parse()
}
fn span(formula: &str) -> Result<ASTNode, ParserError> {
parse(formula)
}
fn ast_eq(a: &ASTNode, b: &ASTNode) -> bool {
if a.node_type == b.node_type {
return true;
}
match (&a.node_type, &b.node_type) {
(ASTNodeType::UnaryOp { op: oa, expr: ea }, ASTNodeType::UnaryOp { op: ob, expr: eb }) => {
oa == ob && ast_eq(ea, eb)
}
(
ASTNodeType::BinaryOp {
op: oa,
left: la,
right: ra,
},
ASTNodeType::BinaryOp {
op: ob,
left: lb,
right: rb,
},
) => oa == ob && ast_eq(la, lb) && ast_eq(ra, rb),
(
ASTNodeType::Function { name: na, args: aa },
ASTNodeType::Function { name: nb, args: ab },
) => {
na == nb && aa.len() == ab.len() && aa.iter().zip(ab.iter()).all(|(x, y)| ast_eq(x, y))
}
(ASTNodeType::Array(ra), ASTNodeType::Array(rb)) => {
ra.len() == rb.len()
&& ra.iter().zip(rb.iter()).all(|(rowa, rowb)| {
rowa.len() == rowb.len()
&& rowa.iter().zip(rowb.iter()).all(|(x, y)| ast_eq(x, y))
})
}
_ => false,
}
}
fn assert_ast_eq(formula: &str, a: &ASTNode, b: &ASTNode, label_a: &str, label_b: &str) {
if !ast_eq(a, b) {
panic!(
"AST divergence for `{formula}`:\n {label_a}: {:#?}\n {label_b}: {:#?}",
a.node_type, b.node_type
);
}
}
const AGREEING_CORPUS: &[&str] = &[
"=1",
"=1.5",
"=-1",
"=+1",
"=1e3",
"=\"hello\"",
"=\"\"",
"=TRUE",
"=FALSE",
"=#REF!",
"=#VALUE!",
"=#DIV/0!",
"=#NAME?",
"=#NULL!",
"=#NUM!",
"=#N/A",
"=#GETTING_DATA",
"=#ref!", "=source!#ref!", "=A1",
"=$A$1",
"=A1:B2",
"=$A$1:$B$2",
"=Sheet1!A1",
"='Some Sheet'!A1:B2",
"=Table1[Col]",
"=NamedRange",
"=A:A",
"=1:1",
"=1+2",
"=1-2",
"=2*3",
"=6/3",
"=2^10",
"=1+2*3",
"=(1+2)*3",
"=A1+B1",
"=A1*-B1",
"=A1&B1",
"=A1=B1",
"=A1<>B1",
"=A1<=B1",
"=A1>=B1",
"=A1<B1",
"=A1>B1",
"=50%",
"=A1%",
"=-A1",
"=--A1",
"=- -A1",
"=- -1",
"=SUM(A1,B1)",
"=SUM(A1:A10)",
"=SUM(A1, B1, C1)",
"=IF(A1>0,\"yes\",\"no\")",
"=IF(A1>0,B1,IF(C1<0,D1,E1))",
"=AVERAGE(A1:A10)",
"=COUNTIF(A1:A10,\">0\")",
"=VLOOKUP(A1,B1:C10,2,FALSE)",
"=IFS(A1=1,\"a\",A1=2,\"b\",TRUE,\"c\")",
"=LET(x,1,y,2,x+y)",
"=LAMBDA(x,x+1)(2)",
"=SUM()", "= 1 + 2",
"= SUM(A1,B1) ",
"=SUM( A1 , B1 )",
"=( A1 + B1 )",
"= ( A1 + B1 ) ",
"=SUM(A1 )", "=( A1 )",
"= A1 + B1",
"={1,2,3}",
"={1,2;3,4}",
"={\"a\",\"b\";\"c\",\"d\"}",
"=SUM({1,2,3})",
"=SUM(A1:A10)*COUNT(B1:B10)+IF(C1,1,0)",
"=A1+SUM(B1:B5)",
"=Sheet1!A1+Sheet2!B2",
"='My Sheet'!A1+'Other Sheet'!$B$2",
"=INDEX(A:A,MATCH(B1,C:C,0))",
"=A1#", "=A1:A", "=-A1^2",
"=-(A1^2)",
"=- -A1",
"=---1",
"=A1+B1>=C1*D1",
"=\"a\"&\"b\"&\"c\"",
];
#[test]
fn classic_and_span_agree_on_corpus_no_whitespace_tokens() {
let mut failures: Vec<String> = Vec::new();
for formula in AGREEING_CORPUS {
let c = classic(formula, false);
let s = span(formula);
match (&c, &s) {
(Ok(a), Ok(b)) => {
if !ast_eq(a, b) {
failures.push(format!(
"{formula}: AST mismatch\n classic: {:?}\n span: {:?}",
a.node_type, b.node_type
));
}
}
(Err(ea), Err(eb)) => {
let _ = (ea, eb);
}
(Ok(_), Err(e)) => {
failures.push(format!("{formula}: classic OK but span Err: {e}"));
}
(Err(e), Ok(_)) => {
failures.push(format!("{formula}: span OK but classic Err: {e}"));
}
}
}
assert!(
failures.is_empty(),
"differential failures (classic vs span, include_whitespace=false):\n{}",
failures.join("\n")
);
}
#[test]
fn classic_with_whitespace_tokens_agrees_with_span_on_corpus() {
let mut failures: Vec<String> = Vec::new();
for formula in AGREEING_CORPUS {
if divergence::WS_BREAKS_CLASSIC_WITH_WS_TOKENS.contains(formula) {
continue;
}
let c = classic(formula, true);
let s = span(formula);
match (&c, &s) {
(Ok(a), Ok(b)) => {
if !ast_eq(a, b) {
failures.push(format!(
"{formula}: AST mismatch (include_whitespace=true)\n classic: {:?}\n span: {:?}",
a.node_type, b.node_type
));
}
}
(Err(ea), Err(eb)) => {
let _ = (ea, eb);
}
(Ok(_), Err(e)) => {
failures.push(format!("{formula}: classic(ws) OK but span Err: {e}"));
}
(Err(e), Ok(_)) => {
failures.push(format!(
"{formula}: span OK but classic(ws) Err: {e} — \
if intentional, add to divergence::WS_BREAKS_CLASSIC_WITH_WS_TOKENS"
));
}
}
}
assert!(
failures.is_empty(),
"differential failures (classic include_whitespace=true vs span):\n{}",
failures.join("\n")
);
}
#[test]
fn parser_try_from_str_still_works() {
let mut p = Parser::try_from("=A1+B1").unwrap();
let ast = p.parse().unwrap();
assert!(matches!(ast.node_type, ASTNodeType::BinaryOp { .. }));
}
#[test]
fn parser_new_with_whitespace_tokens_still_works() {
let tokenizer = Tokenizer::new("= 1 + 2").unwrap();
let mut p = Parser::new(tokenizer.items, true);
let ast = p.parse().unwrap();
match ast.node_type {
ASTNodeType::BinaryOp { op, .. } => assert_eq!(op, "+"),
other => panic!("expected BinaryOp, got {other:?}"),
}
}
#[test]
fn batch_parser_still_works() {
use formualizer_parse::parser::BatchParser;
let mut bp = BatchParser::builder().build();
let a = bp.parse("=A1+B1").unwrap();
let b = bp.parse("=A1+B1").unwrap(); assert!(ast_eq(&a, &b));
let s = parse("=A1+B1").unwrap();
assert_ast_eq("=A1+B1", &a, &s, "batch", "span");
}
#[test]
fn regression_sum_with_inner_whitespace() {
let formula = "=SUM( A1 , B1 )";
let c = classic(formula, false).expect("classic no_ws should parse");
let s = span(formula).expect("span should parse");
assert_ast_eq(formula, &c, &s, "classic no_ws", "span");
}
#[test]
fn regression_paren_group_with_inner_and_trailing_whitespace() {
let formula = "=( A1 + B1 ) ";
let c = classic(formula, false).expect("classic no_ws should parse");
let s = span(formula).expect("span should parse");
assert_ast_eq(formula, &c, &s, "classic no_ws", "span");
}
#[test]
fn regression_classic_path_basic_arith() {
let formula = "=A1 + B1";
let c = classic(formula, false).expect("classic no_ws should parse");
let s = span(formula).expect("span should parse");
assert_ast_eq(formula, &c, &s, "classic no_ws", "span");
}
mod divergence {
use super::*;
pub(super) const WS_BREAKS_CLASSIC_WITH_WS_TOKENS: &[&str] = &[];
const CLASSIC_WS_REJECTS_BUT_SPAN_ACCEPTS: &[&str] = &[
"=SUM( )",
"=SUM(A1, )",
"=SUM(A1, B1, )",
];
#[test]
fn divergence_classic_with_ws_tokens_rejects_whitespace_before_close_paren() {
let mut still_broken: Vec<&str> = Vec::new();
let mut now_accepting: Vec<&str> = Vec::new();
for &formula in CLASSIC_WS_REJECTS_BUT_SPAN_ACCEPTS {
let span_ast = span(formula).expect("span parses");
let classic_no_ws = classic(formula, false).expect("classic no_ws parses");
assert_ast_eq(formula, &classic_no_ws, &span_ast, "classic no_ws", "span");
match classic(formula, true) {
Err(_) => still_broken.push(formula),
Ok(_) => now_accepting.push(formula),
}
}
assert!(
now_accepting.is_empty(),
"classic Parser::new(.., include_whitespace=true) now parses {now_accepting:?}; \
remove from CLASSIC_WS_REJECTS_BUT_SPAN_ACCEPTS and consider moving to the agreeing corpus"
);
assert_eq!(
still_broken.len(),
CLASSIC_WS_REJECTS_BUT_SPAN_ACCEPTS.len(),
"divergence pin out of sync"
);
}
}