Skip to main content

axon/
cli_fmt.rs

1//! §Fase 39.f — `axon fmt` subcommand (Rust binary parity).
2//!
3//! Token-level round-trip formatter. Direct port of the Python
4//! `axon.compiler.formatter.format_source` (Fase 14.d MVP).
5//!
6//! The algorithm walks every token (effective + comment) emitted by
7//! the lexer in source order, re-emits each at its original
8//! `(line, column)` position, padding with newlines + spaces to
9//! preserve layout. Comments survive verbatim (regular, outer doc,
10//! inner doc — line + block). The final output is right-trimmed per
11//! line + ends with exactly one `\n`.
12//!
13//! ## Idempotence
14//!
15//! `format_source(format_source(x)) == format_source(x)` for every
16//! input. The MVP intentionally preserves the author's existing
17//! layout; canonicalisation rules (indent width, brace style) are
18//! deferred to a future fase — same scope as the Python MVP.
19//!
20//! ## Why a token-level formatter
21//!
22//! The lexer's lossless channel (Fase 14.a) already records every
23//! comment with its exact `(line, column)`. Combined with the
24//! effective tokens, the source byte stream can be reconstructed
25//! deterministically. This is enough to call `axon fmt --check` a
26//! lossless contract: if the formatter changes nothing beyond the
27//! documented cosmetic normalisation, the file is canonical.
28
29use axon_frontend::lexer::Lexer;
30use axon_frontend::tokens::{Token, TokenType};
31
32/// §Fase 39.f — re-render a lexer token to source form. The lexer
33/// strips delimiters from certain token kinds (string literals
34/// store only their content sans `"..."`); the formatter MUST
35/// re-add them to produce re-lexable output.
36fn render_token_to_source(tok: &Token) -> String {
37    match tok.ttype {
38        TokenType::StringLit => {
39            // Re-quote with proper escape sequences for backslash +
40            // double-quote + newline + tab. Match the lexer's
41            // `scan_string` escape set verbatim.
42            let mut out = String::with_capacity(tok.value.len() + 2);
43            out.push('"');
44            for c in tok.value.chars() {
45                match c {
46                    '\\' => out.push_str("\\\\"),
47                    '"' => out.push_str("\\\""),
48                    '\n' => out.push_str("\\n"),
49                    '\t' => out.push_str("\\t"),
50                    other => out.push(other),
51                }
52            }
53            out.push('"');
54            out
55        }
56        _ => tok.value.clone(),
57    }
58}
59
60/// Format an AXON source string. Returns the canonicalised source
61/// or a `LexerError` message when the input doesn't tokenise.
62///
63/// Cosmetic normalisations applied:
64///   - every line right-trimmed of trailing whitespace
65///   - file ends with exactly one `\n`
66///
67/// Beyond those two, the output is byte-identical to the input
68/// modulo position re-emission.
69pub fn format_source(src: &str) -> Result<String, String> {
70    let tokens = Lexer::new(src, "<fmt>")
71        .tokenize()
72        .map_err(|e| format!("lex error at {}:{}: {}", e.line, e.column, e.message))?;
73
74    let mut pieces: Vec<String> = Vec::new();
75    let mut cur_line: u32 = 1;
76    let mut cur_col: u32 = 1;
77
78    for tok in tokens {
79        // Skip EOF sentinel.
80        if matches!(tok.ttype, axon_frontend::tokens::TokenType::Eof) {
81            continue;
82        }
83
84        // Catch up to the token's line by emitting newlines.
85        if tok.line > cur_line {
86            for _ in 0..(tok.line - cur_line) {
87                pieces.push("\n".to_string());
88            }
89            cur_line = tok.line;
90            cur_col = 1;
91        }
92
93        // Catch up to the token's column by emitting spaces.
94        if tok.column > cur_col {
95            for _ in 0..(tok.column - cur_col) {
96                pieces.push(" ".to_string());
97            }
98            cur_col = tok.column;
99        }
100
101        // §Fase 39.f — re-render token to source form. The lexer
102        // strips delimiters from string literals (`"..."` → just
103        // the content), so the formatter MUST re-add them to
104        // produce re-lexable output. Same defensive shape for
105        // other token kinds whose value is sans-delimiter.
106        let rendered = render_token_to_source(&tok);
107        pieces.push(rendered.clone());
108
109        // Block comments can contain newlines — update the cursor
110        // accordingly so the next token positions correctly.
111        if rendered.contains('\n') {
112            let parts: Vec<&str> = rendered.split('\n').collect();
113            cur_line += (parts.len() - 1) as u32;
114            cur_col = parts.last().map(|s| s.len() as u32).unwrap_or(0) + 1;
115        } else {
116            cur_col += rendered.chars().count() as u32;
117        }
118    }
119
120    let raw: String = pieces.join("");
121    // Right-trim every line, collapse trailing blank lines to a
122    // single final \n.
123    let trimmed: String = raw
124        .split('\n')
125        .map(|line| line.trim_end())
126        .collect::<Vec<_>>()
127        .join("\n");
128    let mut result = trimmed.trim_end_matches('\n').to_string();
129    result.push('\n');
130    Ok(result)
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[test]
138    fn fase39f_fmt_empty_source_returns_newline() {
139        // Edge case: empty source → just a final \n.
140        let out = format_source("").expect("empty source lexes");
141        assert_eq!(out, "\n");
142    }
143
144    #[test]
145    fn fase39f_fmt_trailing_whitespace_trimmed() {
146        let src = "persona Alice   \n";
147        let out = format_source(src).expect("lexes");
148        assert!(!out.contains("Alice   "));
149        assert!(out.contains("Alice"));
150    }
151
152    #[test]
153    fn fase39f_fmt_multiple_trailing_newlines_collapsed() {
154        let src = "persona Alice\n\n\n\n";
155        let out = format_source(src).expect("lexes");
156        assert!(out.ends_with("\n"));
157        // Collapsed to single final \n.
158        assert!(!out.ends_with("\n\n"));
159    }
160
161    #[test]
162    fn fase39f_fmt_idempotent_on_well_formed_source() {
163        let src = "persona Alice {\n  confidence_threshold: 0.85\n}\n";
164        let once = format_source(src).expect("first pass");
165        let twice = format_source(&once).expect("second pass");
166        assert_eq!(once, twice, "format_source MUST be idempotent");
167    }
168
169    #[test]
170    fn fase39f_fmt_line_comment_preserved() {
171        let src = "persona Alice {\n  // a comment\n}\n";
172        let out = format_source(src).expect("lexes");
173        assert!(out.contains("// a comment"));
174    }
175
176    #[test]
177    fn fase39f_fmt_block_comment_preserved() {
178        let src = "persona Alice {\n  /* block */\n}\n";
179        let out = format_source(src).expect("lexes");
180        assert!(out.contains("/* block */"));
181    }
182
183    #[test]
184    fn fase39f_fmt_doc_line_comment_preserved() {
185        let src = "/// outer doc\npersona Alice {\n}\n";
186        let out = format_source(src).expect("lexes");
187        assert!(out.contains("/// outer doc"));
188    }
189
190    #[test]
191    fn fase39f_fmt_inner_doc_preserved() {
192        let src = "//! inner doc\npersona Alice {\n}\n";
193        let out = format_source(src).expect("lexes");
194        assert!(out.contains("//! inner doc"));
195    }
196
197    #[test]
198    fn fase39f_fmt_lex_error_returns_err() {
199        // Unterminated string literal — should fail to lex.
200        let src = "persona Alice { name: \"unclosed\n";
201        let r = format_source(src);
202        assert!(r.is_err());
203    }
204
205    #[test]
206    fn fase39f_fmt_check_mode_well_formed_returns_unchanged() {
207        // The --check mode (handled by main.rs) compares
208        // formatted vs original. For an already-formatted source,
209        // the diff is empty.
210        let src = "persona Alice {\n  confidence_threshold: 0.85\n}\n";
211        let out = format_source(src).expect("lexes");
212        // Either byte-identical OR differs only in whitespace
213        // canonicalization (trailing newlines / line-trim).
214        assert_eq!(out.trim_end(), src.trim_end());
215    }
216}