Skip to main content

formualizer_parse/
pretty.rs

1use crate::parser::{ASTNode, ASTNodeType, ParserError, parse};
2use crate::tokenizer::Associativity;
3
4/// Pretty-prints an AST node according to canonical formatting rules.
5///
6/// Rules:
7/// - All functions upper-case, no spaces before '('
8/// - Commas followed by single space; no space before ','
9/// - Binary operators surrounded by single spaces
10/// - No superfluous parentheses (keeps semantics)
11/// - References printed via .normalise()
12/// - Array literals: {1, 2; 3, 4}
13pub fn pretty_print(ast: &ASTNode) -> String {
14    pretty_print_node(ast)
15}
16
17#[derive(Clone, Copy, Debug, PartialEq, Eq)]
18enum Side {
19    Left,
20    Right,
21}
22
23fn infix_info(op: &str) -> (u8, Associativity) {
24    match op {
25        ":" | " " | "," => (8, Associativity::Left),
26        "^" => (5, Associativity::Right),
27        "*" | "/" => (4, Associativity::Left),
28        "+" | "-" => (3, Associativity::Left),
29        "&" => (2, Associativity::Left),
30        "=" | "<" | ">" | "<=" | ">=" | "<>" => (1, Associativity::Left),
31        _ => (0, Associativity::Left),
32    }
33}
34
35fn unary_precedence(op: &str) -> u8 {
36    if op == "%" { 7 } else { 6 }
37}
38
39fn node_precedence(ast: &ASTNode) -> u8 {
40    match &ast.node_type {
41        ASTNodeType::BinaryOp { op, .. } => infix_info(op).0,
42        ASTNodeType::UnaryOp { op, .. } => unary_precedence(op),
43        // Treat everything else as an atom.
44        _ => 9,
45    }
46}
47
48fn child_needs_parens(
49    child: &ASTNode,
50    parent_op: &str,
51    parent_prec: u8,
52    parent_assoc: Associativity,
53    side: Side,
54) -> bool {
55    let child_prec = node_precedence(child);
56    if child_prec < parent_prec {
57        return true;
58    }
59    if child_prec > parent_prec {
60        return false;
61    }
62
63    // Same precedence: associativity and mixed operators matter.
64    match side {
65        Side::Left => {
66            if parent_assoc == Associativity::Right {
67                // Right-assoc ops (e.g. '^'): parenthesize left child if it could re-associate.
68                matches!(child.node_type, ASTNodeType::BinaryOp { .. })
69            } else {
70                false
71            }
72        }
73        Side::Right => {
74            if parent_assoc == Associativity::Left {
75                if let ASTNodeType::BinaryOp { op: child_op, .. } = &child.node_type {
76                    if child_op != parent_op {
77                        return true;
78                    }
79
80                    // Even with same op, some operators are not associative.
81                    if parent_op == "-" || parent_op == "/" {
82                        return true;
83                    }
84                }
85                false
86            } else {
87                // Right-assoc ops: parenthesize if mixing ops at same precedence.
88                if let ASTNodeType::BinaryOp { op: child_op, .. } = &child.node_type {
89                    return child_op != parent_op;
90                }
91                false
92            }
93        }
94    }
95}
96
97fn unary_operand_needs_parens(unary_op: &str, operand: &ASTNode) -> bool {
98    match unary_op {
99        "%" => matches!(operand.node_type, ASTNodeType::BinaryOp { .. }),
100        _ => {
101            let operand_prec = node_precedence(operand);
102            operand_prec < unary_precedence(unary_op)
103                && matches!(operand.node_type, ASTNodeType::BinaryOp { .. })
104        }
105    }
106}
107
108fn pretty_child(
109    child: &ASTNode,
110    parent_op: &str,
111    parent_prec: u8,
112    parent_assoc: Associativity,
113    side: Side,
114) -> String {
115    let s = pretty_print_node(child);
116    if child_needs_parens(child, parent_op, parent_prec, parent_assoc, side) {
117        format!("({s})")
118    } else {
119        s
120    }
121}
122
123fn pretty_print_node(ast: &ASTNode) -> String {
124    match &ast.node_type {
125        ASTNodeType::Literal(value) => match value {
126            // Quote and escape text literals to preserve Excel semantics
127            crate::LiteralValue::Text(s) => {
128                let escaped = s.replace('"', "\"\"");
129                format!("\"{escaped}\"")
130            }
131            _ => format!("{value}"),
132        },
133        ASTNodeType::Reference { reference, .. } => reference.normalise(),
134        ASTNodeType::UnaryOp { op, expr } => {
135            let inner = pretty_print_node(expr);
136            let inner = if unary_operand_needs_parens(op, expr) {
137                format!("({inner})")
138            } else {
139                inner
140            };
141
142            if op == "%" {
143                format!("{inner}%")
144            } else {
145                format!("{op}{inner}")
146            }
147        }
148        ASTNodeType::BinaryOp { op, left, right } => {
149            let (prec, assoc) = infix_info(op);
150            let left_s = pretty_child(left, op, prec, assoc, Side::Left);
151            let right_s = pretty_child(right, op, prec, assoc, Side::Right);
152
153            // Special handling for range operator ':'
154            if op == ":" {
155                format!("{left_s}:{right_s}")
156            } else {
157                format!("{left_s} {op} {right_s}")
158            }
159        }
160        ASTNodeType::Function { name, args } => {
161            let args_str = args
162                .iter()
163                .map(pretty_print_node)
164                .collect::<Vec<String>>()
165                .join(", ");
166
167            format!("{}({})", name.to_uppercase(), args_str)
168        }
169        ASTNodeType::Array(rows) => {
170            let rows_str = rows
171                .iter()
172                .map(|row| {
173                    row.iter()
174                        .map(pretty_print_node)
175                        .collect::<Vec<String>>()
176                        .join(", ")
177                })
178                .collect::<Vec<String>>()
179                .join("; ");
180
181            format!("{{{rows_str}}}")
182        }
183    }
184}
185
186/// Produce a canonical Excel formula string for an AST, prefixed with '='.
187///
188/// This is the single entry-point that UI layers should use when displaying
189/// a formula reconstructed from an AST.
190pub fn canonical_formula(ast: &ASTNode) -> String {
191    format!("={}", pretty_print(ast))
192}
193
194/// Tokenizes and parses a formula, then pretty-prints it.
195///
196/// Returns a Result with the pretty-printed formula or a parser error.
197pub fn pretty_parse_render(formula: &str) -> Result<String, ParserError> {
198    // Handle empty formula case
199    if formula.is_empty() {
200        return Ok(String::new());
201    }
202
203    // If formula doesn't start with '=', add it before parsing and remove it after
204    let needs_equals = !formula.starts_with('=');
205    let formula_to_parse = if needs_equals {
206        format!("={formula}")
207    } else {
208        formula.to_string()
209    };
210
211    // Parse and pretty-print
212    let ast = parse(&formula_to_parse)?;
213
214    // Format the result with '=' prefix
215    let pretty_printed = pretty_print(&ast);
216
217    // Return the result with appropriate '=' prefix
218    if needs_equals {
219        Ok(pretty_printed)
220    } else {
221        Ok(format!("={pretty_printed}"))
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    #[test]
230    fn test_pretty_print_validation() {
231        let original = "= sum(  a1 ,2 ) ";
232        let pretty = pretty_parse_render(original).unwrap();
233        assert_eq!(pretty, "=SUM(A1, 2)");
234
235        let round = pretty_parse_render(&pretty).unwrap();
236        assert_eq!(pretty, round); // idempotent
237    }
238
239    #[test]
240    fn test_ast_canonicalization() {
241        // Test that our pretty printer produces canonical form
242        let formula = "=sum(  a1, b2  )";
243        let pretty = pretty_parse_render(formula).unwrap();
244
245        // Check that the pretty printed version is canonicalized
246        assert_eq!(pretty, "=SUM(A1, B2)");
247
248        // Test round-trip consistency
249        let repretty = pretty_parse_render(&pretty).unwrap();
250        assert_eq!(pretty, repretty);
251    }
252
253    #[test]
254    fn test_pretty_print_operators() {
255        let formula = "=a1+b2*3";
256        let pretty = pretty_parse_render(formula).unwrap();
257        assert_eq!(pretty, "=A1 + B2 * 3");
258
259        let formula = "=a1 + b2 *     3";
260        let pretty = pretty_parse_render(formula).unwrap();
261        assert_eq!(pretty, "=A1 + B2 * 3");
262    }
263
264    #[test]
265    fn test_pretty_print_inserts_parentheses_when_needed() {
266        let formula = "=(a1+b2)*c3";
267        let pretty = pretty_parse_render(formula).unwrap();
268        assert_eq!(pretty, "=(A1 + B2) * C3");
269    }
270
271    #[test]
272    fn test_pretty_print_function_nesting() {
273        let formula = "=if(a1>0, sum(b1:b10), average(c1:c10))";
274        let pretty = pretty_parse_render(formula).unwrap();
275        assert_eq!(pretty, "=IF(A1 > 0, SUM(B1:B10), AVERAGE(C1:C10))");
276    }
277
278    #[test]
279    fn test_pretty_print_arrays() {
280        let formula = "={1,2;3,4}";
281        let pretty = pretty_parse_render(formula).unwrap();
282        assert_eq!(pretty, "={1, 2; 3, 4}");
283
284        let formula = "={1, 2; 3, 4}";
285        let pretty = pretty_parse_render(formula).unwrap();
286        assert_eq!(pretty, "={1, 2; 3, 4}");
287    }
288
289    #[test]
290    fn test_pretty_print_references() {
291        let formula = "=Sheet1!$a$1:$b$2";
292        let pretty = pretty_parse_render(formula).unwrap();
293        assert_eq!(pretty, "=Sheet1!$A$1:$B$2");
294
295        let formula = "='My Sheet'!a1";
296        let pretty = pretty_parse_render(formula).unwrap();
297        assert_eq!(pretty, "='My Sheet'!A1");
298    }
299
300    #[test]
301    fn test_pretty_print_text_literals_in_functions() {
302        // Should preserve quotes around text literals
303        let formula = "=SUMIFS(A:A, B:B, \"*Parking*\")";
304        let pretty = pretty_parse_render(formula).unwrap();
305        assert_eq!(pretty, "=SUMIFS(A:A, B:B, \"*Parking*\")");
306    }
307
308    #[test]
309    fn test_pretty_print_text_concatenation_and_escaping() {
310        // Operators as text must stay quoted, and spacing around '&' is canonical
311        let formula = "=\">=\"&DATE(2024,1,1)";
312        let pretty = pretty_parse_render(formula).unwrap();
313        assert_eq!(pretty, "=\">=\" & DATE(2024, 1, 1)");
314
315        // Embedded quotes should be doubled
316        let formula = "=\"He said \"\"Hi\"\"\"";
317        let pretty = pretty_parse_render(formula).unwrap();
318        assert_eq!(pretty, "=\"He said \"\"Hi\"\"\"");
319    }
320
321    #[test]
322    fn test_pretty_print_text_in_arrays() {
323        let formula = "={\"A\", \"B\"; \"C\", \"D\"}";
324        let pretty = pretty_parse_render(formula).unwrap();
325        assert_eq!(pretty, "={\"A\", \"B\"; \"C\", \"D\"}");
326    }
327}