Skip to main content

gdscript_fmt/
lib.rs

1//! `gdscript-fmt` — the GDScript source formatter (Phase-6 Workstream 3).
2//!
3//! A pure `fn(source, &FmtConfig) -> String`: no engine model, no filesystem, `wasm32`-safe.
4//! It re-emits the lexer/pre-pass token stream, normalizing **block indentation** (to the
5//! configured unit), **trailing whitespace**, and the **final newline** — every *significant*
6//! token (keywords, identifiers, literals — including multi-line strings, which are single tokens)
7//! is emitted **verbatim**, so meaning cannot change.
8//!
9//! **Safe by construction.** In `safe_mode` (the default) the formatter (a) refuses to touch a
10//! file with syntax errors, and (b) re-lexes its own output and **falls back to the original** if
11//! the significant token sequence changed. So it never corrupts code, even input it doesn't fully
12//! understand. The result is idempotent: `format(format(x)) == format(x)`.
13//!
14//! Intra-line spacing normalization and line-reflow (full `gdformat` parity, the Wadler/Prettier
15//! `Doc`-IR pretty-printer) are the documented next step — see `TECH_DEBT.md`. Today the formatter
16//! owns indentation + whitespace, which is the most common formatting need and the safest subset.
17#![cfg_attr(docsrs, feature(doc_cfg))]
18
19use gdscript_syntax::SyntaxKind;
20
21/// Formatter options. Defaults match the Godot convention (tabs) and keep the safety net on.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct FmtConfig {
24    /// Indent with tabs (the Godot convention). `false` indents with [`indent_size`](Self::indent_size) spaces.
25    pub use_tabs: bool,
26    /// Spaces per indent level when `use_tabs` is `false`.
27    pub indent_size: usize,
28    /// The target line width for reflow. **Reserved** — line-wrapping is not yet implemented.
29    pub line_width: usize,
30    /// Re-parse + significant-token-equality fallback to verbatim. Keep on unless you have a
31    /// reason not to: it is the guarantee the formatter never changes meaning.
32    pub safe_mode: bool,
33}
34
35impl Default for FmtConfig {
36    fn default() -> Self {
37        Self {
38            use_tabs: true,
39            indent_size: 4,
40            line_width: 100,
41            safe_mode: true,
42        }
43    }
44}
45
46impl FmtConfig {
47    /// One level of indentation as a string.
48    #[must_use]
49    fn indent_unit(&self) -> String {
50        if self.use_tabs {
51            "\t".to_owned()
52        } else {
53            " ".repeat(self.indent_size)
54        }
55    }
56}
57
58/// Format `source`, returning the tidied text. In `safe_mode` (the default) this returns `source`
59/// unchanged rather than risk a meaning-changing edit (a syntax error in the input, or output whose
60/// significant tokens differ from the input's).
61#[must_use]
62pub fn format(source: &str, config: &FmtConfig) -> String {
63    let input_parses = gdscript_syntax::parse(source).errors().is_empty();
64    // Safe mode: never reformat around a syntax error — we'd risk mis-indenting a mis-parsed block.
65    if config.safe_mode && !input_parses {
66        return source.to_owned();
67    }
68    let out = reindent(source, config);
69    if config.safe_mode {
70        // The safety net is two-layered, because each catches what the other cannot:
71        // (1) significant-token equality catches a dropped / reordered / corrupted *token*;
72        if !same_significant_tokens(source, &out) {
73            return source.to_owned();
74        }
75        // (2) a parse-validity recheck catches a meaning-changing *indentation* edit — indentation
76        //     lives entirely in trivia/synthetic layout, so it is invisible to (1). If the input
77        //     parsed clean, the output must too, else we fall back to the verbatim source.
78        if input_parses && !gdscript_syntax::parse(&out).errors().is_empty() {
79            return source.to_owned();
80        }
81    }
82    out
83}
84
85/// Re-emit the pre-pass token stream with normalized indentation + trailing whitespace + a single
86/// final newline. Significant tokens (and continuation lines inside bracketed expressions) are
87/// emitted verbatim; only a *logical* line's leading indentation is rewritten, to its block depth.
88fn reindent(source: &str, config: &FmtConfig) -> String {
89    let raw = gdscript_syntax::tokenize(source);
90    let (toks, _diags) = gdscript_syntax::run_prepass(&raw, source);
91    let unit = config.indent_unit();
92
93    let mut out = String::with_capacity(source.len() + 16);
94    let mut depth: usize = 0;
95    // `true` while we are at the start of a logical line, before its first significant token — the
96    // point at which we (re)emit the indentation, once `depth` is final.
97    let mut line_start = true;
98    // A synthetic `Newline` (zero-width) precedes the real `NewlinePhys` that carries the line's
99    // bytes; this flag swallows that one `NewlinePhys` so the break is emitted exactly once. A
100    // `NewlinePhys` *not* so flagged is either a bracketed-continuation physical newline (kept
101    // verbatim, interior preserved) or the terminator of a comment-only/blank line the prepass
102    // copies verbatim *without* a synthetic `Newline` — those two are told apart by `bracket_depth`.
103    let mut just_broke = false;
104    // Open-bracket nesting depth of the significant tokens emitted so far. The prepass suppresses
105    // synthetic line breaks inside brackets, so a `NewlinePhys` with `bracket_depth == 0` always
106    // ends a logical (or comment-only) line, and the *next* line's indentation must be re-emitted.
107    let mut bracket_depth: usize = 0;
108
109    for t in &toks {
110        let text = &source[t.range];
111        match t.kind {
112            SyntaxKind::Indent => depth += 1,
113            SyntaxKind::Dedent => depth = depth.saturating_sub(1),
114            // A synthetic line break: ends the logical line; the next one is re-indented.
115            SyntaxKind::Newline => {
116                trim_trailing_inline_ws(&mut out);
117                out.push('\n');
118                line_start = true;
119                just_broke = true;
120            }
121            SyntaxKind::NewlinePhys => {
122                if just_broke {
123                    just_broke = false; // its bytes belong to the synthetic break already emitted
124                } else {
125                    trim_trailing_inline_ws(&mut out);
126                    out.push('\n');
127                    // Outside brackets this newline ends a comment-only / blank line the prepass
128                    // copied verbatim (no synthetic `Newline`), so the next line must be
129                    // re-indented. Inside brackets it is a real continuation — leave it verbatim.
130                    if bracket_depth == 0 {
131                        line_start = true;
132                    }
133                }
134            }
135            SyntaxKind::Whitespace => {
136                if line_start {
137                    // A logical line's leading indentation — dropped; the normalized indentation is
138                    // emitted at the first significant token (so `depth` is final by then).
139                } else {
140                    out.push_str(text);
141                }
142            }
143            // A significant token or a comment.
144            _ => {
145                if line_start {
146                    for _ in 0..depth {
147                        out.push_str(&unit);
148                    }
149                    line_start = false;
150                }
151                just_broke = false;
152                out.push_str(text);
153                match t.kind {
154                    SyntaxKind::LParen | SyntaxKind::LBrack | SyntaxKind::LBrace => {
155                        bracket_depth += 1;
156                    }
157                    SyntaxKind::RParen | SyntaxKind::RBrack | SyntaxKind::RBrace => {
158                        bracket_depth = bracket_depth.saturating_sub(1);
159                    }
160                    _ => {}
161                }
162            }
163        }
164    }
165    // Trim a trailing blank/whitespace run and guarantee exactly one final newline.
166    let trimmed = out.trim_end();
167    let mut result = String::with_capacity(trimmed.len() + 1);
168    result.push_str(trimmed);
169    if !result.is_empty() {
170        result.push('\n');
171    }
172    result
173}
174
175/// Trim trailing spaces/tabs from the end of `out` (the current line).
176fn trim_trailing_inline_ws(out: &mut String) {
177    while out.ends_with(' ') || out.ends_with('\t') {
178        out.pop();
179    }
180}
181
182/// Whether two sources lex to the same sequence of significant (non-trivia) tokens — the
183/// meaning-preservation check. Whitespace / newline / comment trivia are ignored (that is what the
184/// formatter is allowed to change); literals (including multi-line strings) are significant, so a
185/// corrupted string would be caught here.
186fn same_significant_tokens(a: &str, b: &str) -> bool {
187    fn sig(s: &str) -> Vec<(SyntaxKind, &str)> {
188        gdscript_syntax::tokenize(s)
189            .into_iter()
190            .filter(|t| !t.kind.is_trivia())
191            .map(|t| (t.kind, &s[t.range]))
192            .collect()
193    }
194    sig(a) == sig(b)
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200
201    fn fmt(src: &str) -> String {
202        format(src, &FmtConfig::default())
203    }
204
205    #[test]
206    fn normalizes_indentation_to_tabs() {
207        // Four-space indentation becomes one tab per level.
208        let src = "func f():\n    if true:\n        return 1\n";
209        assert_eq!(fmt(src), "func f():\n\tif true:\n\t\treturn 1\n");
210    }
211
212    #[test]
213    fn trims_trailing_whitespace_and_adds_final_newline() {
214        let src = "var x = 1   \nvar y = 2"; // trailing spaces + no final newline
215        assert_eq!(fmt(src), "var x = 1\nvar y = 2\n");
216    }
217
218    #[test]
219    fn is_idempotent() {
220        let src = "func f():\n  var a = 1\n  if a:\n      return a\n";
221        let once = fmt(src);
222        assert_eq!(fmt(&once), once, "formatting must be idempotent");
223    }
224
225    #[test]
226    fn already_formatted_is_unchanged() {
227        let src = "func f():\n\tvar a = 1\n\treturn a\n";
228        assert_eq!(fmt(src), src);
229    }
230
231    #[test]
232    fn preserves_significant_tokens_including_strings() {
233        let src = "func f():\n\tvar s = \"a + b\"\n\treturn s\n";
234        let out = fmt(src);
235        assert!(super::same_significant_tokens(src, &out));
236        assert!(out.contains("\"a + b\""));
237    }
238
239    #[test]
240    fn multiline_string_content_is_untouched() {
241        // The interior of a multi-line string must survive verbatim (it is a single token).
242        let src = "func f():\n\tvar s = \"\"\"line1\n        keep   \nline2\"\"\"\n\treturn s\n";
243        let out = fmt(src);
244        assert!(
245            out.contains("line1\n        keep   \nline2"),
246            "got: {out:?}"
247        );
248    }
249
250    #[test]
251    fn safe_mode_returns_input_on_syntax_error() {
252        let src = "func f(:\n\treturn"; // malformed
253        assert_eq!(fmt(src), src);
254    }
255
256    #[test]
257    fn empty_input_stays_empty() {
258        assert_eq!(fmt(""), "");
259        assert_eq!(fmt("\n\n\n"), "");
260    }
261
262    #[test]
263    fn spaces_option_indents_with_spaces() {
264        let cfg = FmtConfig {
265            use_tabs: false,
266            indent_size: 2,
267            ..FmtConfig::default()
268        };
269        let src = "func f():\n\treturn 1\n";
270        assert_eq!(format(src, &cfg), "func f():\n  return 1\n");
271    }
272
273    /// `parse(src).errors()` must be empty — the formatter must never emit code that fails to parse.
274    fn parses_clean(src: &str) -> bool {
275        gdscript_syntax::parse(src).errors().is_empty()
276    }
277
278    #[test]
279    fn comment_between_statements_does_not_corrupt_the_next_line() {
280        // A comment-only line is copied verbatim by the prepass (no synthetic Newline); the line
281        // AFTER it must still be re-indented to the block depth, not left at its original spacing.
282        let src = "func g():\n  var a = 1\n  # c\n  var x = 1\n  var y = 2\n";
283        let out = fmt(src);
284        assert_eq!(
285            out,
286            "func g():\n\tvar a = 1\n\t# c\n\tvar x = 1\n\tvar y = 2\n"
287        );
288        assert!(
289            parses_clean(&out),
290            "formatter must not emit mixed indent: {out:?}"
291        );
292        assert_eq!(fmt(&out), out, "must be idempotent");
293    }
294
295    #[test]
296    fn leading_body_comment_does_not_corrupt_the_body() {
297        // A comment that is the FIRST line of a block lands at column 0 (the prepass emits `Indent`
298        // only at the first *code* line, so the block depth isn't known yet — a documented cosmetic
299        // limitation, NOT a corruption). The CODE must still be correctly indented + parse clean.
300        let src = "func g():\n  # c\n  var x = 1\n  var y = 2\n";
301        let out = fmt(src);
302        assert_eq!(out, "func g():\n# c\n\tvar x = 1\n\tvar y = 2\n");
303        assert!(
304            parses_clean(&out),
305            "code must be correctly indented: {out:?}"
306        );
307        assert_eq!(fmt(&out), out, "must be idempotent");
308    }
309
310    #[test]
311    fn doc_comment_between_statements_is_reindented_and_does_not_corrupt() {
312        // A doc comment AFTER a code line (depth known) is re-indented like any line, and the line
313        // following it must not be mis-indented.
314        let src = "func g():\n  var a = 1\n  ## doc\n  var x = 1\n";
315        let out = fmt(src);
316        assert_eq!(out, "func g():\n\tvar a = 1\n\t## doc\n\tvar x = 1\n");
317        assert!(parses_clean(&out), "{out:?}");
318    }
319
320    #[test]
321    fn bracketed_continuation_interior_is_preserved() {
322        // A physical newline INSIDE brackets is a real continuation — its interior spacing must be
323        // kept verbatim (not treated like a comment-line terminator that re-indents the next line).
324        let src = "func f():\n\tvar a = [\n\t\t1,\n\t\t2,\n\t]\n\treturn a\n";
325        let out = fmt(src);
326        assert!(parses_clean(&out), "{out:?}");
327        assert!(super::same_significant_tokens(src, &out));
328        assert_eq!(fmt(&out), out, "must be idempotent");
329    }
330}