formualizer_parse/
pretty.rs

1use crate::parser::{ASTNode, ASTNodeType, ParserError, parse};
2use crate::tokenizer::Associativity;
3
4/// Pretty-prints an AST node according to canonical formatting rules.
5///
6/// Rules:
7/// - All functions upper-case, no spaces before '('
8/// - Commas followed by single space; no space before ','
9/// - Binary operators surrounded by single spaces
10/// - No superfluous parentheses (keeps semantics)
11/// - References printed via .normalise()
12/// - Array literals: {1, 2; 3, 4}
13pub fn pretty_print(ast: &ASTNode) -> String {
14    pretty_print_node(ast)
15}
16
17#[derive(Clone, Copy, Debug, PartialEq, Eq)]
18enum Side {
19    Left,
20    Right,
21}
22
23fn infix_info(op: &str) -> (u8, Associativity) {
24    match op {
25        ":" | " " | "," => (8, Associativity::Left),
26        "^" => (6, Associativity::Right),
27        "*" | "/" => (4, Associativity::Left),
28        "+" | "-" => (3, Associativity::Left),
29        "&" => (2, Associativity::Left),
30        "=" | "<" | ">" | "<=" | ">=" | "<>" => (1, Associativity::Left),
31        _ => (0, Associativity::Left),
32    }
33}
34
35fn unary_precedence(op: &str) -> u8 {
36    if op == "%" {
37        7
38    } else {
39        // Prefix unary.
40        5
41    }
42}
43
44fn node_precedence(ast: &ASTNode) -> u8 {
45    match &ast.node_type {
46        ASTNodeType::BinaryOp { op, .. } => infix_info(op).0,
47        ASTNodeType::UnaryOp { op, .. } => unary_precedence(op),
48        // Treat everything else as an atom.
49        _ => 9,
50    }
51}
52
53fn child_needs_parens(
54    child: &ASTNode,
55    parent_op: &str,
56    parent_prec: u8,
57    parent_assoc: Associativity,
58    side: Side,
59) -> bool {
60    let child_prec = node_precedence(child);
61    if child_prec < parent_prec {
62        return true;
63    }
64    if child_prec > parent_prec {
65        return false;
66    }
67
68    // Same precedence: associativity and mixed operators matter.
69    match side {
70        Side::Left => {
71            if parent_assoc == Associativity::Right {
72                // Right-assoc ops (e.g. '^'): parenthesize left child if it could re-associate.
73                matches!(child.node_type, ASTNodeType::BinaryOp { .. })
74            } else {
75                false
76            }
77        }
78        Side::Right => {
79            if parent_assoc == Associativity::Left {
80                if let ASTNodeType::BinaryOp { op: child_op, .. } = &child.node_type {
81                    if child_op != parent_op {
82                        return true;
83                    }
84
85                    // Even with same op, some operators are not associative.
86                    if parent_op == "-" || parent_op == "/" {
87                        return true;
88                    }
89                }
90                false
91            } else {
92                // Right-assoc ops: parenthesize if mixing ops at same precedence.
93                if let ASTNodeType::BinaryOp { op: child_op, .. } = &child.node_type {
94                    return child_op != parent_op;
95                }
96                false
97            }
98        }
99    }
100}
101
102fn unary_operand_needs_parens(unary_op: &str, operand: &ASTNode) -> bool {
103    match unary_op {
104        "%" => matches!(operand.node_type, ASTNodeType::BinaryOp { .. }),
105        _ => {
106            let operand_prec = node_precedence(operand);
107            operand_prec < unary_precedence(unary_op)
108                && matches!(operand.node_type, ASTNodeType::BinaryOp { .. })
109        }
110    }
111}
112
113fn pretty_child(
114    child: &ASTNode,
115    parent_op: &str,
116    parent_prec: u8,
117    parent_assoc: Associativity,
118    side: Side,
119) -> String {
120    let s = pretty_print_node(child);
121    if child_needs_parens(child, parent_op, parent_prec, parent_assoc, side) {
122        format!("({s})")
123    } else {
124        s
125    }
126}
127
128fn pretty_print_node(ast: &ASTNode) -> String {
129    match &ast.node_type {
130        ASTNodeType::Literal(value) => match value {
131            // Quote and escape text literals to preserve Excel semantics
132            crate::LiteralValue::Text(s) => {
133                let escaped = s.replace('"', "\"\"");
134                format!("\"{escaped}\"")
135            }
136            _ => format!("{value}"),
137        },
138        ASTNodeType::Reference { reference, .. } => reference.normalise(),
139        ASTNodeType::UnaryOp { op, expr } => {
140            let inner = pretty_print_node(expr);
141            let inner = if unary_operand_needs_parens(op, expr) {
142                format!("({inner})")
143            } else {
144                inner
145            };
146
147            if op == "%" {
148                format!("{inner}%")
149            } else {
150                format!("{op}{inner}")
151            }
152        }
153        ASTNodeType::BinaryOp { op, left, right } => {
154            let (prec, assoc) = infix_info(op);
155            let left_s = pretty_child(left, op, prec, assoc, Side::Left);
156            let right_s = pretty_child(right, op, prec, assoc, Side::Right);
157
158            // Special handling for range operator ':'
159            if op == ":" {
160                format!("{left_s}:{right_s}")
161            } else {
162                format!("{left_s} {op} {right_s}")
163            }
164        }
165        ASTNodeType::Function { name, args } => {
166            let args_str = args
167                .iter()
168                .map(pretty_print_node)
169                .collect::<Vec<String>>()
170                .join(", ");
171
172            format!("{}({})", name.to_uppercase(), args_str)
173        }
174        ASTNodeType::Array(rows) => {
175            let rows_str = rows
176                .iter()
177                .map(|row| {
178                    row.iter()
179                        .map(pretty_print_node)
180                        .collect::<Vec<String>>()
181                        .join(", ")
182                })
183                .collect::<Vec<String>>()
184                .join("; ");
185
186            format!("{{{rows_str}}}")
187        }
188    }
189}
190
191/// Produce a canonical Excel formula string for an AST, prefixed with '='.
192///
193/// This is the single entry-point that UI layers should use when displaying
194/// a formula reconstructed from an AST.
195pub fn canonical_formula(ast: &ASTNode) -> String {
196    format!("={}", pretty_print(ast))
197}
198
199/// Tokenizes and parses a formula, then pretty-prints it.
200///
201/// Returns a Result with the pretty-printed formula or a parser error.
202pub fn pretty_parse_render(formula: &str) -> Result<String, ParserError> {
203    // Handle empty formula case
204    if formula.is_empty() {
205        return Ok(String::new());
206    }
207
208    // If formula doesn't start with '=', add it before parsing and remove it after
209    let needs_equals = !formula.starts_with('=');
210    let formula_to_parse = if needs_equals {
211        format!("={formula}")
212    } else {
213        formula.to_string()
214    };
215
216    // Parse and pretty-print
217    let ast = parse(&formula_to_parse)?;
218
219    // Format the result with '=' prefix
220    let pretty_printed = pretty_print(&ast);
221
222    // Return the result with appropriate '=' prefix
223    if needs_equals {
224        Ok(pretty_printed)
225    } else {
226        Ok(format!("={pretty_printed}"))
227    }
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233
234    #[test]
235    fn test_pretty_print_validation() {
236        let original = "= sum(  a1 ,2 ) ";
237        let pretty = pretty_parse_render(original).unwrap();
238        assert_eq!(pretty, "=SUM(A1, 2)");
239
240        let round = pretty_parse_render(&pretty).unwrap();
241        assert_eq!(pretty, round); // idempotent
242    }
243
244    #[test]
245    fn test_ast_canonicalization() {
246        // Test that our pretty printer produces canonical form
247        let formula = "=sum(  a1, b2  )";
248        let pretty = pretty_parse_render(formula).unwrap();
249
250        // Check that the pretty printed version is canonicalized
251        assert_eq!(pretty, "=SUM(A1, B2)");
252
253        // Test round-trip consistency
254        let repretty = pretty_parse_render(&pretty).unwrap();
255        assert_eq!(pretty, repretty);
256    }
257
258    #[test]
259    fn test_pretty_print_operators() {
260        let formula = "=a1+b2*3";
261        let pretty = pretty_parse_render(formula).unwrap();
262        assert_eq!(pretty, "=A1 + B2 * 3");
263
264        let formula = "=a1 + b2 *     3";
265        let pretty = pretty_parse_render(formula).unwrap();
266        assert_eq!(pretty, "=A1 + B2 * 3");
267    }
268
269    #[test]
270    fn test_pretty_print_inserts_parentheses_when_needed() {
271        let formula = "=(a1+b2)*c3";
272        let pretty = pretty_parse_render(formula).unwrap();
273        assert_eq!(pretty, "=(A1 + B2) * C3");
274    }
275
276    #[test]
277    fn test_pretty_print_function_nesting() {
278        let formula = "=if(a1>0, sum(b1:b10), average(c1:c10))";
279        let pretty = pretty_parse_render(formula).unwrap();
280        assert_eq!(pretty, "=IF(A1 > 0, SUM(B1:B10), AVERAGE(C1:C10))");
281    }
282
283    #[test]
284    fn test_pretty_print_arrays() {
285        let formula = "={1,2;3,4}";
286        let pretty = pretty_parse_render(formula).unwrap();
287        assert_eq!(pretty, "={1, 2; 3, 4}");
288
289        let formula = "={1, 2; 3, 4}";
290        let pretty = pretty_parse_render(formula).unwrap();
291        assert_eq!(pretty, "={1, 2; 3, 4}");
292    }
293
294    #[test]
295    fn test_pretty_print_references() {
296        let formula = "=Sheet1!$a$1:$b$2";
297        let pretty = pretty_parse_render(formula).unwrap();
298        assert_eq!(pretty, "=Sheet1!A1:B2");
299
300        let formula = "='My Sheet'!a1";
301        let pretty = pretty_parse_render(formula).unwrap();
302        assert_eq!(pretty, "='My Sheet'!A1");
303    }
304
305    #[test]
306    fn test_pretty_print_text_literals_in_functions() {
307        // Should preserve quotes around text literals
308        let formula = "=SUMIFS(A:A, B:B, \"*Parking*\")";
309        let pretty = pretty_parse_render(formula).unwrap();
310        assert_eq!(pretty, "=SUMIFS(A:A, B:B, \"*Parking*\")");
311    }
312
313    #[test]
314    fn test_pretty_print_text_concatenation_and_escaping() {
315        // Operators as text must stay quoted, and spacing around '&' is canonical
316        let formula = "=\">=\"&DATE(2024,1,1)";
317        let pretty = pretty_parse_render(formula).unwrap();
318        assert_eq!(pretty, "=\">=\" & DATE(2024, 1, 1)");
319
320        // Embedded quotes should be doubled
321        let formula = "=\"He said \"\"Hi\"\"\"";
322        let pretty = pretty_parse_render(formula).unwrap();
323        assert_eq!(pretty, "=\"He said \"\"Hi\"\"\"");
324    }
325
326    #[test]
327    fn test_pretty_print_text_in_arrays() {
328        let formula = "={\"A\", \"B\"; \"C\", \"D\"}";
329        let pretty = pretty_parse_render(formula).unwrap();
330        assert_eq!(pretty, "={\"A\", \"B\"; \"C\", \"D\"}");
331    }
332}