axon/cli_fmt.rs
1//! §Fase 39.f — `axon fmt` subcommand (Rust binary parity).
2//!
3//! Token-level round-trip formatter. Direct port of the Python
4//! `axon.compiler.formatter.format_source` (Fase 14.d MVP).
5//!
6//! The algorithm walks every token (effective + comment) emitted by
7//! the lexer in source order, re-emits each at its original
8//! `(line, column)` position, padding with newlines + spaces to
9//! preserve layout. Comments survive verbatim (regular, outer doc,
10//! inner doc — line + block). The final output is right-trimmed per
11//! line + ends with exactly one `\n`.
12//!
13//! ## Idempotence
14//!
15//! `format_source(format_source(x)) == format_source(x)` for every
16//! input. The MVP intentionally preserves the author's existing
17//! layout; canonicalisation rules (indent width, brace style) are
18//! deferred to a future fase — same scope as the Python MVP.
19//!
20//! ## Why a token-level formatter
21//!
22//! The lexer's lossless channel (Fase 14.a) already records every
23//! comment with its exact `(line, column)`. Combined with the
24//! effective tokens, the source byte stream can be reconstructed
25//! deterministically. This is enough to call `axon fmt --check` a
26//! lossless contract: if the formatter changes nothing beyond the
27//! documented cosmetic normalisation, the file is canonical.
28
29use axon_frontend::lexer::Lexer;
30use axon_frontend::tokens::{Token, TokenType};
31
32/// §Fase 39.f — re-render a lexer token to source form. The lexer
33/// strips delimiters from certain token kinds (string literals
34/// store only their content sans `"..."`); the formatter MUST
35/// re-add them to produce re-lexable output.
36fn render_token_to_source(tok: &Token) -> String {
37 match tok.ttype {
38 TokenType::StringLit => {
39 // Re-quote with proper escape sequences for backslash +
40 // double-quote + newline + tab. Match the lexer's
41 // `scan_string` escape set verbatim.
42 let mut out = String::with_capacity(tok.value.len() + 2);
43 out.push('"');
44 for c in tok.value.chars() {
45 match c {
46 '\\' => out.push_str("\\\\"),
47 '"' => out.push_str("\\\""),
48 '\n' => out.push_str("\\n"),
49 '\t' => out.push_str("\\t"),
50 other => out.push(other),
51 }
52 }
53 out.push('"');
54 out
55 }
56 _ => tok.value.clone(),
57 }
58}
59
60/// Format an AXON source string. Returns the canonicalised source
61/// or a `LexerError` message when the input doesn't tokenise.
62///
63/// Cosmetic normalisations applied:
64/// - every line right-trimmed of trailing whitespace
65/// - file ends with exactly one `\n`
66///
67/// Beyond those two, the output is byte-identical to the input
68/// modulo position re-emission.
69pub fn format_source(src: &str) -> Result<String, String> {
70 let tokens = Lexer::new(src, "<fmt>")
71 .tokenize()
72 .map_err(|e| format!("lex error at {}:{}: {}", e.line, e.column, e.message))?;
73
74 let mut pieces: Vec<String> = Vec::new();
75 let mut cur_line: u32 = 1;
76 let mut cur_col: u32 = 1;
77
78 for tok in tokens {
79 // Skip EOF sentinel.
80 if matches!(tok.ttype, axon_frontend::tokens::TokenType::Eof) {
81 continue;
82 }
83
84 // Catch up to the token's line by emitting newlines.
85 if tok.line > cur_line {
86 for _ in 0..(tok.line - cur_line) {
87 pieces.push("\n".to_string());
88 }
89 cur_line = tok.line;
90 cur_col = 1;
91 }
92
93 // Catch up to the token's column by emitting spaces.
94 if tok.column > cur_col {
95 for _ in 0..(tok.column - cur_col) {
96 pieces.push(" ".to_string());
97 }
98 cur_col = tok.column;
99 }
100
101 // §Fase 39.f — re-render token to source form. The lexer
102 // strips delimiters from string literals (`"..."` → just
103 // the content), so the formatter MUST re-add them to
104 // produce re-lexable output. Same defensive shape for
105 // other token kinds whose value is sans-delimiter.
106 let rendered = render_token_to_source(&tok);
107 pieces.push(rendered.clone());
108
109 // Block comments can contain newlines — update the cursor
110 // accordingly so the next token positions correctly.
111 if rendered.contains('\n') {
112 let parts: Vec<&str> = rendered.split('\n').collect();
113 cur_line += (parts.len() - 1) as u32;
114 cur_col = parts.last().map(|s| s.len() as u32).unwrap_or(0) + 1;
115 } else {
116 cur_col += rendered.chars().count() as u32;
117 }
118 }
119
120 let raw: String = pieces.join("");
121 // Right-trim every line, collapse trailing blank lines to a
122 // single final \n.
123 let trimmed: String = raw
124 .split('\n')
125 .map(|line| line.trim_end())
126 .collect::<Vec<_>>()
127 .join("\n");
128 let mut result = trimmed.trim_end_matches('\n').to_string();
129 result.push('\n');
130 Ok(result)
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 #[test]
138 fn fase39f_fmt_empty_source_returns_newline() {
139 // Edge case: empty source → just a final \n.
140 let out = format_source("").expect("empty source lexes");
141 assert_eq!(out, "\n");
142 }
143
144 #[test]
145 fn fase39f_fmt_trailing_whitespace_trimmed() {
146 let src = "persona Alice \n";
147 let out = format_source(src).expect("lexes");
148 assert!(!out.contains("Alice "));
149 assert!(out.contains("Alice"));
150 }
151
152 #[test]
153 fn fase39f_fmt_multiple_trailing_newlines_collapsed() {
154 let src = "persona Alice\n\n\n\n";
155 let out = format_source(src).expect("lexes");
156 assert!(out.ends_with("\n"));
157 // Collapsed to single final \n.
158 assert!(!out.ends_with("\n\n"));
159 }
160
161 #[test]
162 fn fase39f_fmt_idempotent_on_well_formed_source() {
163 let src = "persona Alice {\n confidence_threshold: 0.85\n}\n";
164 let once = format_source(src).expect("first pass");
165 let twice = format_source(&once).expect("second pass");
166 assert_eq!(once, twice, "format_source MUST be idempotent");
167 }
168
169 #[test]
170 fn fase39f_fmt_line_comment_preserved() {
171 let src = "persona Alice {\n // a comment\n}\n";
172 let out = format_source(src).expect("lexes");
173 assert!(out.contains("// a comment"));
174 }
175
176 #[test]
177 fn fase39f_fmt_block_comment_preserved() {
178 let src = "persona Alice {\n /* block */\n}\n";
179 let out = format_source(src).expect("lexes");
180 assert!(out.contains("/* block */"));
181 }
182
183 #[test]
184 fn fase39f_fmt_doc_line_comment_preserved() {
185 let src = "/// outer doc\npersona Alice {\n}\n";
186 let out = format_source(src).expect("lexes");
187 assert!(out.contains("/// outer doc"));
188 }
189
190 #[test]
191 fn fase39f_fmt_inner_doc_preserved() {
192 let src = "//! inner doc\npersona Alice {\n}\n";
193 let out = format_source(src).expect("lexes");
194 assert!(out.contains("//! inner doc"));
195 }
196
197 #[test]
198 fn fase39f_fmt_lex_error_returns_err() {
199 // Unterminated string literal — should fail to lex.
200 let src = "persona Alice { name: \"unclosed\n";
201 let r = format_source(src);
202 assert!(r.is_err());
203 }
204
205 #[test]
206 fn fase39f_fmt_check_mode_well_formed_returns_unchanged() {
207 // The --check mode (handled by main.rs) compares
208 // formatted vs original. For an already-formatted source,
209 // the diff is empty.
210 let src = "persona Alice {\n confidence_threshold: 0.85\n}\n";
211 let out = format_source(src).expect("lexes");
212 // Either byte-identical OR differs only in whitespace
213 // canonicalization (trailing newlines / line-trim).
214 assert_eq!(out.trim_end(), src.trim_end());
215 }
216}