gdscript_fmt/lib.rs
1//! `gdscript-fmt` — the GDScript source formatter (Phase-6 Workstream 3).
2//!
3//! A pure `fn(source, &FmtConfig) -> String`: no engine model, no filesystem, `wasm32`-safe.
4//! It re-emits the lexer/pre-pass token stream, normalizing **block indentation** (to the
5//! configured unit), **trailing whitespace**, and the **final newline** — every *significant*
6//! token (keywords, identifiers, literals — including multi-line strings, which are single tokens)
7//! is emitted **verbatim**, so meaning cannot change.
8//!
9//! **Safe by construction.** In `safe_mode` (the default) the formatter (a) refuses to touch a
10//! file with syntax errors, and (b) re-lexes its own output and **falls back to the original** if
11//! the significant token sequence changed. So it never corrupts code, even input it doesn't fully
12//! understand. The result is idempotent: `format(format(x)) == format(x)`.
13//!
14//! Intra-line spacing normalization and line-reflow (full `gdformat` parity, the Wadler/Prettier
15//! `Doc`-IR pretty-printer) are the documented next step — see `TECH_DEBT.md`. Today the formatter
16//! owns indentation + whitespace, which is the most common formatting need and the safest subset.
17#![cfg_attr(docsrs, feature(doc_cfg))]
18
19use gdscript_syntax::SyntaxKind;
20
21/// Formatter options. Defaults match the Godot convention (tabs) and keep the safety net on.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct FmtConfig {
24 /// Indent with tabs (the Godot convention). `false` indents with [`indent_size`](Self::indent_size) spaces.
25 pub use_tabs: bool,
26 /// Spaces per indent level when `use_tabs` is `false`.
27 pub indent_size: usize,
28 /// The target line width for reflow. **Reserved** — line-wrapping is not yet implemented.
29 pub line_width: usize,
30 /// Re-parse + significant-token-equality fallback to verbatim. Keep on unless you have a
31 /// reason not to: it is the guarantee the formatter never changes meaning.
32 pub safe_mode: bool,
33}
34
35impl Default for FmtConfig {
36 fn default() -> Self {
37 Self {
38 use_tabs: true,
39 indent_size: 4,
40 line_width: 100,
41 safe_mode: true,
42 }
43 }
44}
45
46impl FmtConfig {
47 /// One level of indentation as a string.
48 #[must_use]
49 fn indent_unit(&self) -> String {
50 if self.use_tabs {
51 "\t".to_owned()
52 } else {
53 " ".repeat(self.indent_size)
54 }
55 }
56}
57
58/// Format `source`, returning the tidied text. In `safe_mode` (the default) this returns `source`
59/// unchanged rather than risk a meaning-changing edit (a syntax error in the input, or output whose
60/// significant tokens differ from the input's).
61#[must_use]
62pub fn format(source: &str, config: &FmtConfig) -> String {
63 let input_parses = gdscript_syntax::parse(source).errors().is_empty();
64 // Safe mode: never reformat around a syntax error — we'd risk mis-indenting a mis-parsed block.
65 if config.safe_mode && !input_parses {
66 return source.to_owned();
67 }
68 let out = reindent(source, config);
69 if config.safe_mode {
70 // The safety net is two-layered, because each catches what the other cannot:
71 // (1) significant-token equality catches a dropped / reordered / corrupted *token*;
72 if !same_significant_tokens(source, &out) {
73 return source.to_owned();
74 }
75 // (2) a parse-validity recheck catches a meaning-changing *indentation* edit — indentation
76 // lives entirely in trivia/synthetic layout, so it is invisible to (1). If the input
77 // parsed clean, the output must too, else we fall back to the verbatim source.
78 if input_parses && !gdscript_syntax::parse(&out).errors().is_empty() {
79 return source.to_owned();
80 }
81 }
82 out
83}
84
85/// Re-emit the pre-pass token stream with normalized indentation + trailing whitespace + a single
86/// final newline. Significant tokens (and continuation lines inside bracketed expressions) are
87/// emitted verbatim; only a *logical* line's leading indentation is rewritten, to its block depth.
88fn reindent(source: &str, config: &FmtConfig) -> String {
89 let raw = gdscript_syntax::tokenize(source);
90 let (toks, _diags) = gdscript_syntax::run_prepass(&raw, source);
91 let unit = config.indent_unit();
92
93 let mut out = String::with_capacity(source.len() + 16);
94 let mut depth: usize = 0;
95 // `true` while we are at the start of a logical line, before its first significant token — the
96 // point at which we (re)emit the indentation, once `depth` is final.
97 let mut line_start = true;
98 // A synthetic `Newline` (zero-width) precedes the real `NewlinePhys` that carries the line's
99 // bytes; this flag swallows that one `NewlinePhys` so the break is emitted exactly once. A
100 // `NewlinePhys` *not* so flagged is either a bracketed-continuation physical newline (kept
101 // verbatim, interior preserved) or the terminator of a comment-only/blank line the prepass
102 // copies verbatim *without* a synthetic `Newline` — those two are told apart by `bracket_depth`.
103 let mut just_broke = false;
104 // Open-bracket nesting depth of the significant tokens emitted so far. The prepass suppresses
105 // synthetic line breaks inside brackets, so a `NewlinePhys` with `bracket_depth == 0` always
106 // ends a logical (or comment-only) line, and the *next* line's indentation must be re-emitted.
107 let mut bracket_depth: usize = 0;
108
109 for t in &toks {
110 let text = &source[t.range];
111 match t.kind {
112 SyntaxKind::Indent => depth += 1,
113 SyntaxKind::Dedent => depth = depth.saturating_sub(1),
114 // A synthetic line break: ends the logical line; the next one is re-indented.
115 SyntaxKind::Newline => {
116 trim_trailing_inline_ws(&mut out);
117 out.push('\n');
118 line_start = true;
119 just_broke = true;
120 }
121 SyntaxKind::NewlinePhys => {
122 if just_broke {
123 just_broke = false; // its bytes belong to the synthetic break already emitted
124 } else {
125 trim_trailing_inline_ws(&mut out);
126 out.push('\n');
127 // Outside brackets this newline ends a comment-only / blank line the prepass
128 // copied verbatim (no synthetic `Newline`), so the next line must be
129 // re-indented. Inside brackets it is a real continuation — leave it verbatim.
130 if bracket_depth == 0 {
131 line_start = true;
132 }
133 }
134 }
135 SyntaxKind::Whitespace => {
136 if line_start {
137 // A logical line's leading indentation — dropped; the normalized indentation is
138 // emitted at the first significant token (so `depth` is final by then).
139 } else {
140 out.push_str(text);
141 }
142 }
143 // A significant token or a comment.
144 _ => {
145 if line_start {
146 for _ in 0..depth {
147 out.push_str(&unit);
148 }
149 line_start = false;
150 }
151 just_broke = false;
152 out.push_str(text);
153 match t.kind {
154 SyntaxKind::LParen | SyntaxKind::LBrack | SyntaxKind::LBrace => {
155 bracket_depth += 1;
156 }
157 SyntaxKind::RParen | SyntaxKind::RBrack | SyntaxKind::RBrace => {
158 bracket_depth = bracket_depth.saturating_sub(1);
159 }
160 _ => {}
161 }
162 }
163 }
164 }
165 // Trim a trailing blank/whitespace run and guarantee exactly one final newline.
166 let trimmed = out.trim_end();
167 let mut result = String::with_capacity(trimmed.len() + 1);
168 result.push_str(trimmed);
169 if !result.is_empty() {
170 result.push('\n');
171 }
172 result
173}
174
175/// Trim trailing spaces/tabs from the end of `out` (the current line).
176fn trim_trailing_inline_ws(out: &mut String) {
177 while out.ends_with(' ') || out.ends_with('\t') {
178 out.pop();
179 }
180}
181
182/// Whether two sources lex to the same sequence of significant (non-trivia) tokens — the
183/// meaning-preservation check. Whitespace / newline / comment trivia are ignored (that is what the
184/// formatter is allowed to change); literals (including multi-line strings) are significant, so a
185/// corrupted string would be caught here.
186fn same_significant_tokens(a: &str, b: &str) -> bool {
187 fn sig(s: &str) -> Vec<(SyntaxKind, &str)> {
188 gdscript_syntax::tokenize(s)
189 .into_iter()
190 .filter(|t| !t.kind.is_trivia())
191 .map(|t| (t.kind, &s[t.range]))
192 .collect()
193 }
194 sig(a) == sig(b)
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200
201 fn fmt(src: &str) -> String {
202 format(src, &FmtConfig::default())
203 }
204
205 #[test]
206 fn normalizes_indentation_to_tabs() {
207 // Four-space indentation becomes one tab per level.
208 let src = "func f():\n if true:\n return 1\n";
209 assert_eq!(fmt(src), "func f():\n\tif true:\n\t\treturn 1\n");
210 }
211
212 #[test]
213 fn trims_trailing_whitespace_and_adds_final_newline() {
214 let src = "var x = 1 \nvar y = 2"; // trailing spaces + no final newline
215 assert_eq!(fmt(src), "var x = 1\nvar y = 2\n");
216 }
217
218 #[test]
219 fn is_idempotent() {
220 let src = "func f():\n var a = 1\n if a:\n return a\n";
221 let once = fmt(src);
222 assert_eq!(fmt(&once), once, "formatting must be idempotent");
223 }
224
225 #[test]
226 fn already_formatted_is_unchanged() {
227 let src = "func f():\n\tvar a = 1\n\treturn a\n";
228 assert_eq!(fmt(src), src);
229 }
230
231 #[test]
232 fn preserves_significant_tokens_including_strings() {
233 let src = "func f():\n\tvar s = \"a + b\"\n\treturn s\n";
234 let out = fmt(src);
235 assert!(super::same_significant_tokens(src, &out));
236 assert!(out.contains("\"a + b\""));
237 }
238
239 #[test]
240 fn multiline_string_content_is_untouched() {
241 // The interior of a multi-line string must survive verbatim (it is a single token).
242 let src = "func f():\n\tvar s = \"\"\"line1\n keep \nline2\"\"\"\n\treturn s\n";
243 let out = fmt(src);
244 assert!(
245 out.contains("line1\n keep \nline2"),
246 "got: {out:?}"
247 );
248 }
249
250 #[test]
251 fn safe_mode_returns_input_on_syntax_error() {
252 let src = "func f(:\n\treturn"; // malformed
253 assert_eq!(fmt(src), src);
254 }
255
256 #[test]
257 fn empty_input_stays_empty() {
258 assert_eq!(fmt(""), "");
259 assert_eq!(fmt("\n\n\n"), "");
260 }
261
262 #[test]
263 fn spaces_option_indents_with_spaces() {
264 let cfg = FmtConfig {
265 use_tabs: false,
266 indent_size: 2,
267 ..FmtConfig::default()
268 };
269 let src = "func f():\n\treturn 1\n";
270 assert_eq!(format(src, &cfg), "func f():\n return 1\n");
271 }
272
273 /// `parse(src).errors()` must be empty — the formatter must never emit code that fails to parse.
274 fn parses_clean(src: &str) -> bool {
275 gdscript_syntax::parse(src).errors().is_empty()
276 }
277
278 #[test]
279 fn comment_between_statements_does_not_corrupt_the_next_line() {
280 // A comment-only line is copied verbatim by the prepass (no synthetic Newline); the line
281 // AFTER it must still be re-indented to the block depth, not left at its original spacing.
282 let src = "func g():\n var a = 1\n # c\n var x = 1\n var y = 2\n";
283 let out = fmt(src);
284 assert_eq!(
285 out,
286 "func g():\n\tvar a = 1\n\t# c\n\tvar x = 1\n\tvar y = 2\n"
287 );
288 assert!(
289 parses_clean(&out),
290 "formatter must not emit mixed indent: {out:?}"
291 );
292 assert_eq!(fmt(&out), out, "must be idempotent");
293 }
294
295 #[test]
296 fn leading_body_comment_does_not_corrupt_the_body() {
297 // A comment that is the FIRST line of a block lands at column 0 (the prepass emits `Indent`
298 // only at the first *code* line, so the block depth isn't known yet — a documented cosmetic
299 // limitation, NOT a corruption). The CODE must still be correctly indented + parse clean.
300 let src = "func g():\n # c\n var x = 1\n var y = 2\n";
301 let out = fmt(src);
302 assert_eq!(out, "func g():\n# c\n\tvar x = 1\n\tvar y = 2\n");
303 assert!(
304 parses_clean(&out),
305 "code must be correctly indented: {out:?}"
306 );
307 assert_eq!(fmt(&out), out, "must be idempotent");
308 }
309
310 #[test]
311 fn doc_comment_between_statements_is_reindented_and_does_not_corrupt() {
312 // A doc comment AFTER a code line (depth known) is re-indented like any line, and the line
313 // following it must not be mis-indented.
314 let src = "func g():\n var a = 1\n ## doc\n var x = 1\n";
315 let out = fmt(src);
316 assert_eq!(out, "func g():\n\tvar a = 1\n\t## doc\n\tvar x = 1\n");
317 assert!(parses_clean(&out), "{out:?}");
318 }
319
320 #[test]
321 fn bracketed_continuation_interior_is_preserved() {
322 // A physical newline INSIDE brackets is a real continuation — its interior spacing must be
323 // kept verbatim (not treated like a comment-line terminator that re-indents the next line).
324 let src = "func f():\n\tvar a = [\n\t\t1,\n\t\t2,\n\t]\n\treturn a\n";
325 let out = fmt(src);
326 assert!(parses_clean(&out), "{out:?}");
327 assert!(super::same_significant_tokens(src, &out));
328 assert_eq!(fmt(&out), out, "must be idempotent");
329 }
330}