Skip to main content

leekscript_tooling/formatter/
driver.rs

1//! Format driver: emits the syntax tree to a string (round-trip or canonical).
2//!
3//! Uses sipha's emit API for round-trip when no extras are requested. With
4//! `canonical_format`, `parenthesize_expressions`, or `annotate_types`, uses a custom walk.
5
6use sipha::emit::{syntax_root_to_string, EmitOptions};
7use sipha::red::{SyntaxNode, SyntaxToken};
8use sipha::types::{FromSyntaxKind, IntoSyntaxKind};
9use sipha::walk::WalkOptions;
10
11use crate::visitor::{walk, Visitor, WalkResult};
12use leekscript_analysis::{analyze, analyze_with_signatures, TypeMapKey};
13use leekscript_core::syntax::Kind;
14use leekscript_core::Type;
15
16use super::options::{BraceStyle, FormatterOptions, IndentStyle, SemicolonStyle};
17
18/// Compound expression node kinds that get parentheses when `parenthesize_expressions` is true.
19/// We wrap nodes that contain the *entire* expression in the AST:
20/// - `NodeBinaryLevel`: one precedence level (add, mul, compare, etc.) with [left, op, right, ...].
21/// - `NodeUnaryExpr`, `NodeExpr`, `NodeAsCast`, `NodeArray`, `NodeMap`, `NodeInterval` (full expr in one node).
22/// We do NOT wrap `NodeBinaryExpr` (only [op, right]), `NodeMemberExpr`, `NodeCallExpr`, `NodeIndexExpr`.
23fn is_expression_node(kind: Kind) -> bool {
24    matches!(
25        kind,
26        Kind::NodeBinaryLevel // full "left op right" for one precedence level
27            | Kind::NodeUnaryExpr
28            | Kind::NodeAsCast
29            | Kind::NodeArray
30            | Kind::NodeMap
31            | Kind::NodeInterval
32    )
33}
34
35/// Print the syntax tree to a string.
36/// When `canonical_format`, `parenthesize_expressions`, or `annotate_types` is set, runs a custom walk;
37/// otherwise uses sipha's emit API for round-trip.
38#[must_use]
39pub fn format(root: &SyntaxNode, options: &FormatterOptions) -> String {
40    if options.canonical_format {
41        return format_canonical(root, options);
42    }
43    if options.parenthesize_expressions || options.annotate_types {
44        let type_map = if options.annotate_types {
45            let result = if let Some(ref roots) = options.signature_roots {
46                analyze_with_signatures(root, roots)
47            } else {
48                analyze(root)
49            };
50            result.type_map
51        } else {
52            std::collections::HashMap::<TypeMapKey, Type>::new()
53        };
54        format_with_extras(root, options, &type_map)
55    } else {
56        let emit_opts = EmitOptions {
57            include_trivia: options.preserve_comments,
58            skip_kind: Some(Kind::TokEof.into_syntax_kind()),
59        };
60        syntax_root_to_string(root, &emit_opts)
61    }
62}
63
64/// Canonical format: normalize indentation, braces, semicolons. Trivia (comments/whitespace) is not emitted.
65fn format_canonical(root: &SyntaxNode, options: &FormatterOptions) -> String {
66    let mut driver = CanonicalFormatDriver {
67        options,
68        out: String::new(),
69        indent_depth: 0,
70        need_newline: false,
71        statement_semicolon_stack: Vec::new(),
72        last_token_ends_word: false,
73    };
74    let walk_opts = WalkOptions::full();
75    let _ = walk(root, &mut driver, &walk_opts);
76    driver.out
77}
78
79/// True if the token text looks like a word (ident/number) that could run into the next token.
80fn token_ends_word(text: &str) -> bool {
81    let c = text.chars().last().unwrap_or(' ');
82    c.is_alphanumeric() || c == '_'
83}
84
85/// True if the token text starts like a word (ident/number).
86fn token_starts_word(text: &str) -> bool {
87    let c = text.chars().next().unwrap_or(' ');
88    c.is_alphanumeric() || c == '_' || c == '"' || c == '\''
89}
90
91/// Nodes after which we may need to emit a semicolon (optional in grammar).
92fn is_statement_with_optional_semicolon(kind: Kind) -> bool {
93    matches!(
94        kind,
95        Kind::NodeVarDecl
96            | Kind::NodeExprStmt
97            | Kind::NodeReturnStmt
98            | Kind::NodeBreakStmt
99            | Kind::NodeContinueStmt
100    )
101}
102
103/// Canonical format driver: emits tokens with normalized newlines and indentation.
104struct CanonicalFormatDriver<'a> {
105    options: &'a FormatterOptions,
106    out: String,
107    indent_depth: usize,
108    need_newline: bool,
109    /// Stack of "did we see semicolon for this statement" for optional-semicolon statements.
110    statement_semicolon_stack: Vec<bool>,
111    /// Last non-trivia token text, to decide if we need a space before the next token.
112    last_token_ends_word: bool,
113}
114
115impl CanonicalFormatDriver<'_> {
116    fn emit_indent(&mut self) {
117        match self.options.indent_style {
118            IndentStyle::Tabs => {
119                for _ in 0..self.indent_depth {
120                    self.out.push('\t');
121                }
122            }
123            IndentStyle::Spaces(n) => {
124                let spaces = n.max(1).min(8) as usize;
125                for _ in 0..self.indent_depth {
126                    for _ in 0..spaces {
127                        self.out.push(' ');
128                    }
129                }
130            }
131        }
132    }
133
134    fn maybe_emit_newline_and_indent(&mut self) {
135        if self.need_newline {
136            self.out.push('\n');
137            self.emit_indent();
138            self.need_newline = false;
139        }
140    }
141}
142
143impl Visitor for CanonicalFormatDriver<'_> {
144    fn enter_node(&mut self, node: &SyntaxNode) -> WalkResult {
145        if let Some(kind) = node.kind_as::<Kind>() {
146            if is_statement_with_optional_semicolon(kind) {
147                self.statement_semicolon_stack.push(false);
148            }
149        }
150        WalkResult::Continue(())
151    }
152
153    fn visit_token(&mut self, token: &SyntaxToken) -> WalkResult {
154        let tok_kind = Kind::from_syntax_kind(token.kind());
155        if tok_kind == Some(Kind::TokEof) {
156            return WalkResult::Continue(());
157        }
158        if token.is_trivia() {
159            return WalkResult::Continue(());
160        }
161        self.maybe_emit_newline_and_indent();
162
163        let text = token.text();
164        // Emit space between tokens that would otherwise run together (e.g. "var" "x" -> "var x").
165        if self.last_token_ends_word && token_starts_word(text) {
166            self.out.push(' ');
167        }
168
169        if tok_kind == Some(Kind::TokBraceR) {
170            self.out.push('\n');
171            if self.indent_depth > 0 {
172                self.indent_depth -= 1;
173            }
174            self.emit_indent();
175            self.out.push_str(text);
176            self.last_token_ends_word = false;
177            self.need_newline = true;
178            return WalkResult::Continue(());
179        }
180        if tok_kind == Some(Kind::TokBraceL) {
181            if self.options.brace_style == BraceStyle::NextLine {
182                self.out.push('\n');
183                self.emit_indent();
184            }
185            self.out.push_str(text);
186            self.last_token_ends_word = false;
187            self.indent_depth += 1;
188            self.need_newline = true;
189            return WalkResult::Continue(());
190        }
191        if tok_kind == Some(Kind::TokSemi) {
192            if let Some(seen) = self.statement_semicolon_stack.last_mut() {
193                *seen = true;
194            }
195            if self.options.semicolon_style == SemicolonStyle::Always {
196                self.out.push_str(text);
197            }
198            self.last_token_ends_word = false;
199            self.need_newline = true;
200            return WalkResult::Continue(());
201        }
202
203        self.out.push_str(text);
204        self.last_token_ends_word = token_ends_word(text);
205        if text == "}" {
206            self.need_newline = true;
207        }
208        WalkResult::Continue(())
209    }
210
211    fn leave_node(&mut self, node: &SyntaxNode) -> WalkResult {
212        if let Some(kind) = node.kind_as::<Kind>() {
213            if is_statement_with_optional_semicolon(kind) {
214                if let Some(had_semi) = self.statement_semicolon_stack.pop() {
215                    if self.options.semicolon_style == SemicolonStyle::Always && !had_semi {
216                        self.maybe_emit_newline_and_indent();
217                        self.out.push(';');
218                        self.need_newline = true;
219                    }
220                }
221            }
222        }
223        WalkResult::Continue(())
224    }
225}
226
227/// Custom format pass: walk tree, emit tokens, optionally add parens and type comments.
228fn format_with_extras(
229    root: &SyntaxNode,
230    options: &FormatterOptions,
231    type_map: &std::collections::HashMap<TypeMapKey, Type>,
232) -> String {
233    let mut driver = FormatDriverWithExtras {
234        options,
235        type_map,
236        out: String::new(),
237        paren_stack: Vec::new(),
238        depth: 0,
239        postfix_close_stack: Vec::new(),
240    };
241    let walk_opts = WalkOptions::full();
242    let _ = walk(root, &mut driver, &walk_opts);
243    driver.out
244}
245
246/// Driver that emits tokens and optionally parentheses and type comments.
247struct FormatDriverWithExtras<'a> {
248    options: &'a FormatterOptions,
249    type_map: &'a std::collections::HashMap<TypeMapKey, Type>,
250    out: String,
251    /// Stack of "did we emit open paren for this node" for matching close parens.
252    paren_stack: Vec<bool>,
253    /// Depth in the tree (incremented on `enter_node`, decremented on `leave_node`).
254    depth: usize,
255    /// When to emit ")" for postfix chains (a.b.c → (a.b).c): (`parent_depth`, `children_left_to_leave`).
256    postfix_close_stack: Vec<(usize, usize)>,
257}
258
259fn is_postfix_chain(node: &SyntaxNode) -> bool {
260    let children: Vec<_> = node.child_nodes().collect();
261    if children.len() < 2 {
262        return false;
263    }
264    let first = children[0].kind_as::<Kind>();
265    let first_is_suffix = matches!(
266        first,
267        Some(Kind::NodeMemberExpr | Kind::NodeCallExpr | Kind::NodeIndexExpr)
268    );
269    if first_is_suffix {
270        return false;
271    }
272    children[1..].iter().all(|c| {
273        matches!(
274            c.kind_as::<Kind>(),
275            Some(Kind::NodeMemberExpr | Kind::NodeCallExpr | Kind::NodeIndexExpr)
276        )
277    })
278}
279
280impl Visitor for FormatDriverWithExtras<'_> {
281    fn enter_node(&mut self, node: &SyntaxNode) -> WalkResult {
282        self.depth += 1;
283        if self.options.parenthesize_expressions {
284            if let Some(kind) = node.kind_as::<Kind>() {
285                if kind == Kind::NodeBinaryLevel {
286                    let has_binary = node.child_nodes().any(|c| {
287                        matches!(
288                            c.kind_as::<Kind>(),
289                            Some(Kind::NodeBinaryExpr | Kind::NodeInterval)
290                        )
291                    });
292                    if has_binary {
293                        self.out.push('(');
294                        self.paren_stack.push(true);
295                    } else if is_postfix_chain(node) {
296                        // a.b.c → (a.b).c: wrap first segment, emit ")" after first suffix.
297                        self.out.push('(');
298                        self.paren_stack.push(false); // we close via postfix_close_stack
299                        self.postfix_close_stack.push((self.depth, 2));
300                    } else {
301                        self.paren_stack.push(false);
302                    }
303                    return WalkResult::Continue(());
304                }
305                if is_expression_node(kind) {
306                    self.out.push('(');
307                    self.paren_stack.push(true);
308                    return WalkResult::Continue(());
309                }
310            }
311        }
312        self.paren_stack.push(false);
313        WalkResult::Continue(())
314    }
315
316    fn visit_token(&mut self, token: &SyntaxToken) -> WalkResult {
317        if Kind::from_syntax_kind(token.kind()) == Some(Kind::TokEof) {
318            return WalkResult::Continue(());
319        }
320        if token.is_trivia() && !self.options.preserve_comments {
321            return WalkResult::Continue(());
322        }
323        self.out.push_str(token.text());
324        WalkResult::Continue(())
325    }
326
327    fn leave_node(&mut self, node: &SyntaxNode) -> WalkResult {
328        // Postfix chain: emit ")" after we've left the first suffix (e.g. after .b in a.b.c).
329        if let Some(&(parent_depth, _)) = self.postfix_close_stack.last() {
330            if parent_depth == self.depth - 1 {
331                let (_, k) = self.postfix_close_stack.pop().unwrap();
332                if k == 1 {
333                    self.out.push(')');
334                } else {
335                    self.postfix_close_stack.push((parent_depth, k - 1));
336                }
337            }
338        }
339        self.depth -= 1;
340        let did_paren = self.paren_stack.pop().unwrap_or(false);
341        if did_paren {
342            self.out.push(')');
343        }
344        // Only emit type comments for node kinds we actually record in the type checker (avoids duplicates from wrapper nodes that share a child's span).
345        if self.options.annotate_types {
346            let kind_ok = node.kind_as::<Kind>().is_some_and(|k| {
347                matches!(
348                    k,
349                    Kind::NodePrimaryExpr
350                        | Kind::NodeBinaryExpr
351                        | Kind::NodeUnaryExpr
352                        | Kind::NodeCallExpr
353                        | Kind::NodeMemberExpr
354                        | Kind::NodeIndexExpr
355                        | Kind::NodeVarDecl
356                        | Kind::NodeAsCast
357                )
358            });
359            if kind_ok {
360                let span = node.text_range();
361                let key = (span.start, span.end);
362                if let Some(ty) = self.type_map.get(&key) {
363                    self.out.push_str(" /* ");
364                    self.out.push_str(&ty.for_annotation());
365                    self.out.push_str(" */");
366                }
367            }
368        }
369        WalkResult::Continue(())
370    }
371}
372
373/// Driver that writes the syntax tree to a string by visiting tokens.
374/// You can use [`format`] directly, or build a custom flow with `walk(root, &mut driver, &opts)`.
375pub struct FormatDriver<'a> {
376    options: &'a FormatterOptions,
377    out: String,
378}
379
380impl<'a> FormatDriver<'a> {
381    #[must_use]
382    pub fn new(options: &'a FormatterOptions) -> Self {
383        Self {
384            options,
385            out: String::new(),
386        }
387    }
388
389    #[must_use]
390    pub fn into_string(self) -> String {
391        self.out
392    }
393}
394
395impl Visitor for FormatDriver<'_> {
396    fn visit_token(&mut self, token: &SyntaxToken) -> WalkResult {
397        if Kind::from_syntax_kind(token.kind()) == Some(Kind::TokEof) {
398            return WalkResult::Continue(());
399        }
400        if token.is_trivia() && !self.options.preserve_comments {
401            return WalkResult::Continue(());
402        }
403        self.out.push_str(token.text());
404        WalkResult::Continue(())
405    }
406}
407
408#[cfg(test)]
409mod tests {
410    use leekscript_core::parse;
411
412    use super::format;
413    use crate::formatter::FormatterOptions;
414
415    #[test]
416    fn format_round_trip_parse() {
417        let source = "return 1 + 2;";
418        let root = parse(source).unwrap().expect("parse");
419        let options = FormatterOptions::default();
420        let formatted = format(&root, &options);
421        let root2 = parse(&formatted).unwrap().expect("re-parse after format");
422        assert!(
423            root2.kind_as::<leekscript_core::syntax::Kind>()
424                == Some(leekscript_core::syntax::Kind::NodeRoot)
425        );
426    }
427
428    #[test]
429    fn format_preserves_structure() {
430        let source = "var x = 42";
431        let root = parse(source).unwrap().expect("parse");
432        let options = FormatterOptions {
433            preserve_comments: true,
434            parenthesize_expressions: false,
435            annotate_types: false,
436            signature_roots: None,
437            ..FormatterOptions::default()
438        };
439        let formatted = format(&root, &options);
440        assert!(!formatted.is_empty());
441        assert!(formatted.contains("var"));
442        assert!(formatted.contains("x"));
443        assert!(formatted.contains("42"));
444    }
445
446    #[test]
447    fn format_parenthesize_expressions() {
448        let source = "return a + b + c * d;";
449        let root = parse(source).unwrap().expect("parse");
450        let options = FormatterOptions {
451            preserve_comments: true,
452            parenthesize_expressions: true,
453            annotate_types: false,
454            signature_roots: None,
455            ..FormatterOptions::default()
456        };
457        let formatted = format(&root, &options);
458        // Should add parentheses around compound expressions (exact shape depends on grammar associativity)
459        assert!(
460            formatted.contains('(') && formatted.contains(')'),
461            "expected parentheses in output: {:?}",
462            formatted
463        );
464    }
465
466    #[test]
467    fn format_annotate_types() {
468        let source = "var x = 1 + 2";
469        let root = parse(source).unwrap().expect("parse");
470        let options = FormatterOptions {
471            preserve_comments: true,
472            parenthesize_expressions: false,
473            annotate_types: true,
474            signature_roots: None,
475            ..FormatterOptions::default()
476        };
477        let formatted = format(&root, &options);
478        // Should add type comments for expressions (e.g. integer for literals and result)
479        assert!(formatted.contains("/* integer */"));
480    }
481
482    #[test]
483    fn format_canonical_indent() {
484        let source = "var x=1;function f(){return x;}";
485        let root = parse(source).unwrap().expect("parse");
486        let options = FormatterOptions {
487            canonical_format: true,
488            indent_style: super::IndentStyle::Tabs,
489            semicolon_style: super::SemicolonStyle::Always,
490            ..FormatterOptions::default()
491        };
492        let formatted = format(&root, &options);
493        assert!(formatted.contains("var"));
494        assert!(formatted.contains("x"));
495        assert!(formatted.contains("1"));
496        assert!(formatted.contains(';'));
497        assert!(
498            formatted.contains('\t'),
499            "canonical format should use tabs: {:?}",
500            formatted
501        );
502        assert!(formatted.contains("return"));
503    }
504}