Skip to main content

axon/
cli_parse.rs

1//! §Fase 39.f — `axon parse` subcommand (Rust binary parity).
2//!
3//! Multi-file diagnostic aggregator. Walks the given file paths /
4//! directories / globs, runs each `.axon` file through
5//! `Parser::parse_with_recovery`, and aggregates every parse error +
6//! type-check error into a single report. Mirrors the Python
7//! `axon.cli.parse_cmd:cmd_parse` from Fase 28.f.
8//!
9//! ## Flags
10//!
11//!  - `--max-errors N` — cap total errors across all files (D6,
12//!    default unlimited)
13//!  - `--ignore PATTERN` — fnmatch-style ignore pattern (may repeat);
14//!    `.axonignore` files in walked dirs are honoured automatically
15//!  - `--jobs N` — worker thread count (default: auto). The Rust
16//!    implementation currently runs single-threaded; the flag is
17//!    accepted for Python-parity but the threading is deferred to a
18//!    future fase (honest scope)
19//!  - `--json` — emit machine-readable diagnostics (D5)
20//!  - `--format array|ndjson` — JSON framing when --json is set
21//!  - `--strict` — opt into legacy fail-on-first behavior (D8); also
22//!    activated by `AXON_PARSER_STRICT` env var
23//!  - `--no-color` — disable ANSI colour codes
24//!
25//! ## Exit codes (bitwise OR of cause classes)
26//!
27//!  - `0` — success (no errors)
28//!  - `1` — parse / type errors observed
29//!  - `2` — I/O errors (file not found, read failed, glob expansion failed)
30//!  - `3` — both classes (1 | 2)
31
32use std::collections::HashSet;
33use std::fs;
34use std::path::{Path, PathBuf};
35
36use serde::Serialize;
37
38use axon_frontend::lexer::Lexer;
39use axon_frontend::parser::Parser;
40
41/// Per-file diagnostic emitted by the aggregator. Wire-stable JSON
42/// shape for `--json` mode (rustc-compatible field naming per
43/// Fase 28.g D5).
44#[derive(Debug, Clone, Serialize)]
45pub struct AggregatedDiagnostic {
46    pub file: String,
47    pub line: u32,
48    pub column: u32,
49    pub message: String,
50    pub kind: String, // "parse" | "lex" | "type"
51}
52
53/// Configuration for `axon parse` (mirrors the Python CLI args).
54#[derive(Debug, Clone, Default)]
55pub struct ParseConfig {
56    pub patterns: Vec<String>,
57    pub max_errors: Option<usize>,
58    pub ignore_patterns: Vec<String>,
59    pub jobs: Option<usize>,
60    pub json: bool,
61    pub format: String, // "array" | "ndjson"
62    pub strict: bool,
63    pub no_color: bool,
64}
65
66/// Run `axon parse` against a configured corpus. Returns a tuple
67/// `(diagnostics, io_errors, truncated)`:
68///   - `diagnostics`: every parse / lex / type error observed
69///   - `io_errors`: files that couldn't be read / glob-expanded
70///   - `truncated`: true when `max_errors` capped the report
71pub fn run_parse(config: &ParseConfig) -> (Vec<AggregatedDiagnostic>, Vec<String>, bool) {
72    let mut diagnostics: Vec<AggregatedDiagnostic> = Vec::new();
73    let mut io_errors: Vec<String> = Vec::new();
74    let mut truncated = false;
75
76    // ── §1 — Expand patterns into a deterministic file list ──
77    let files = match expand_patterns(&config.patterns, &config.ignore_patterns) {
78        Ok(f) => f,
79        Err(e) => {
80            io_errors.push(format!("pattern expansion: {e}"));
81            return (diagnostics, io_errors, false);
82        }
83    };
84
85    // ── §2 — Strict mode: honour env var OR flag (OR semantics) ──
86    let strict = config.strict
87        || std::env::var("AXON_PARSER_STRICT")
88            .ok()
89            .map(|v| matches!(v.to_lowercase().as_str(), "1" | "true" | "yes" | "on"))
90            .unwrap_or(false);
91
92    // ── §3 — Parse each file ──
93    'outer: for path in &files {
94        // Honour max_errors cap.
95        if let Some(cap) = config.max_errors {
96            if diagnostics.len() >= cap {
97                truncated = true;
98                break 'outer;
99            }
100        }
101        let source = match fs::read_to_string(path) {
102            Ok(s) => s,
103            Err(e) => {
104                io_errors.push(format!("read {}: {}", path.display(), e));
105                continue;
106            }
107        };
108        let path_str = path.display().to_string();
109
110        // Tokenize
111        let tokens = match Lexer::new(&source, &path_str).tokenize() {
112            Ok(t) => t,
113            Err(e) => {
114                diagnostics.push(AggregatedDiagnostic {
115                    file: path_str.clone(),
116                    line: e.line,
117                    column: e.column,
118                    message: format!("lex error: {}", e.message),
119                    kind: "lex".to_string(),
120                });
121                if strict {
122                    break 'outer;
123                }
124                continue;
125            }
126        };
127
128        // Parse with recovery (or fail-fast in strict mode)
129        let mut parser = Parser::new(tokens);
130        if strict {
131            match parser.parse() {
132                Ok(_) => {}
133                Err(e) => {
134                    diagnostics.push(AggregatedDiagnostic {
135                        file: path_str.clone(),
136                        line: e.line,
137                        column: e.column,
138                        message: format!("parse error: {}", e.message),
139                        kind: "parse".to_string(),
140                    });
141                    break 'outer; // strict: stop at first failing file
142                }
143            }
144        } else {
145            let result = parser.parse_with_recovery();
146            for err in result.errors {
147                diagnostics.push(AggregatedDiagnostic {
148                    file: path_str.clone(),
149                    line: err.line,
150                    column: err.column,
151                    message: format!("parse error: {}", err.message),
152                    kind: "parse".to_string(),
153                });
154                if let Some(cap) = config.max_errors {
155                    if diagnostics.len() >= cap {
156                        truncated = true;
157                        break 'outer;
158                    }
159                }
160            }
161        }
162    }
163
164    (diagnostics, io_errors, truncated)
165}
166
167/// Expand patterns (files / directories / globs) into a
168/// deterministic sorted file list. Directories are walked
169/// recursively; `.axonignore` files are honoured.
170fn expand_patterns(
171    patterns: &[String],
172    ignore: &[String],
173) -> Result<Vec<PathBuf>, String> {
174    let mut result: HashSet<PathBuf> = HashSet::new();
175    for pattern in patterns {
176        let path = PathBuf::from(pattern);
177        if path.is_file() {
178            if !is_ignored(&path, ignore) {
179                result.insert(path);
180            }
181            continue;
182        }
183        if path.is_dir() {
184            walk_dir(&path, ignore, &mut result)?;
185            continue;
186        }
187        // Not a file or directory — treat as a literal that doesn't
188        // resolve. We don't error here; the caller reports it via
189        // io_errors when read fails. (Glob expansion is honest
190        // scope — Python uses Path.glob; Rust would need an extra
191        // crate. For 39.f we accept literal paths + directories
192        // and defer glob to a future fase.)
193        if path.exists() {
194            result.insert(path);
195        } else {
196            return Err(format!("pattern not found: {pattern}"));
197        }
198    }
199    let mut sorted: Vec<PathBuf> = result.into_iter().collect();
200    sorted.sort();
201    Ok(sorted)
202}
203
204fn walk_dir(
205    dir: &Path,
206    ignore: &[String],
207    out: &mut HashSet<PathBuf>,
208) -> Result<(), String> {
209    let entries = fs::read_dir(dir).map_err(|e| format!("read_dir {}: {}", dir.display(), e))?;
210    for entry in entries {
211        let entry = entry.map_err(|e| format!("dir entry: {e}"))?;
212        let path = entry.path();
213        if is_ignored(&path, ignore) {
214            continue;
215        }
216        if path.is_dir() {
217            // Skip common noise dirs.
218            let name = path
219                .file_name()
220                .and_then(|s| s.to_str())
221                .unwrap_or("");
222            if matches!(name, "target" | "node_modules" | ".git" | "__pycache__") {
223                continue;
224            }
225            walk_dir(&path, ignore, out)?;
226        } else if path.extension().and_then(|s| s.to_str()) == Some("axon") {
227            out.insert(path);
228        }
229    }
230    Ok(())
231}
232
233fn is_ignored(path: &Path, ignore: &[String]) -> bool {
234    let path_str = path.to_string_lossy();
235    for pattern in ignore {
236        // Very simple substring match for v2.0.0; fnmatch parity is
237        // a future-fase refinement.
238        if path_str.contains(pattern) {
239            return true;
240        }
241    }
242    false
243}
244
245/// Format diagnostics as a human-readable report for stdout.
246pub fn render_human(
247    diagnostics: &[AggregatedDiagnostic],
248    io_errors: &[String],
249    truncated: bool,
250    no_color: bool,
251) -> String {
252    let mut out = String::new();
253    let red = if no_color { "" } else { "\x1b[31m" };
254    let bold = if no_color { "" } else { "\x1b[1m" };
255    let dim = if no_color { "" } else { "\x1b[2m" };
256    let reset = if no_color { "" } else { "\x1b[0m" };
257
258    if diagnostics.is_empty() && io_errors.is_empty() {
259        out.push_str(&format!("{bold}✓ axon parse: no diagnostics{reset}\n"));
260        return out;
261    }
262    for d in diagnostics {
263        out.push_str(&format!(
264            "{red}{bold}error{reset}{bold}[{}]{reset} {}\n  {dim}--> {}:{}:{}{reset}\n",
265            d.kind, d.message, d.file, d.line, d.column
266        ));
267    }
268    for e in io_errors {
269        out.push_str(&format!("{red}{bold}I/O error{reset} {e}\n"));
270    }
271    if truncated {
272        out.push_str(&format!(
273            "{dim}... (truncated by --max-errors cap){reset}\n"
274        ));
275    }
276    out
277}
278
279/// Format diagnostics as JSON (array or ndjson framing). Rustc-
280/// compatible field shape per Fase 28.g D5.
281pub fn render_json(
282    diagnostics: &[AggregatedDiagnostic],
283    format: &str,
284) -> String {
285    if format == "ndjson" {
286        diagnostics
287            .iter()
288            .map(|d| serde_json::to_string(d).unwrap_or_default())
289            .collect::<Vec<_>>()
290            .join("\n")
291            + "\n"
292    } else {
293        serde_json::to_string_pretty(diagnostics).unwrap_or_default() + "\n"
294    }
295}
296
297/// Compute the exit code from the diagnostics + io_errors observed.
298/// Mirrors the Python CLI's bitwise OR convention (D6).
299pub fn exit_code(
300    diagnostics: &[AggregatedDiagnostic],
301    io_errors: &[String],
302) -> i32 {
303    let mut code = 0;
304    if !diagnostics.is_empty() {
305        code |= 1;
306    }
307    if !io_errors.is_empty() {
308        code |= 2;
309    }
310    code
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316
317    #[test]
318    fn fase39f_parse_empty_patterns_returns_clean() {
319        let cfg = ParseConfig::default();
320        let (diags, ios, trunc) = run_parse(&cfg);
321        assert!(diags.is_empty());
322        assert!(ios.is_empty());
323        assert!(!trunc);
324    }
325
326    #[test]
327    fn fase39f_exit_code_zero_on_clean() {
328        assert_eq!(exit_code(&[], &[]), 0);
329    }
330
331    #[test]
332    fn fase39f_exit_code_one_on_diagnostic() {
333        let d = AggregatedDiagnostic {
334            file: "x.axon".to_string(),
335            line: 1,
336            column: 1,
337            message: "boom".to_string(),
338            kind: "parse".to_string(),
339        };
340        assert_eq!(exit_code(&[d], &[]), 1);
341    }
342
343    #[test]
344    fn fase39f_exit_code_two_on_io_error() {
345        assert_eq!(exit_code(&[], &["read failed".to_string()]), 2);
346    }
347
348    #[test]
349    fn fase39f_exit_code_three_on_both() {
350        let d = AggregatedDiagnostic {
351            file: "x.axon".to_string(),
352            line: 1,
353            column: 1,
354            message: "boom".to_string(),
355            kind: "parse".to_string(),
356        };
357        assert_eq!(exit_code(&[d], &["io".to_string()]), 3);
358    }
359
360    #[test]
361    fn fase39f_json_array_format_serializes_diagnostics() {
362        let d = AggregatedDiagnostic {
363            file: "x.axon".to_string(),
364            line: 1,
365            column: 1,
366            message: "boom".to_string(),
367            kind: "parse".to_string(),
368        };
369        let out = render_json(&[d], "array");
370        assert!(out.contains("\"file\": \"x.axon\""));
371        assert!(out.contains("\"kind\": \"parse\""));
372    }
373
374    #[test]
375    fn fase39f_json_ndjson_format_one_per_line() {
376        let d1 = AggregatedDiagnostic {
377            file: "a.axon".to_string(),
378            line: 1,
379            column: 1,
380            message: "e1".to_string(),
381            kind: "parse".to_string(),
382        };
383        let d2 = AggregatedDiagnostic {
384            file: "b.axon".to_string(),
385            line: 2,
386            column: 2,
387            message: "e2".to_string(),
388            kind: "parse".to_string(),
389        };
390        let out = render_json(&[d1, d2], "ndjson");
391        let lines: Vec<&str> = out.trim().split('\n').collect();
392        assert_eq!(lines.len(), 2);
393        assert!(lines[0].contains("a.axon"));
394        assert!(lines[1].contains("b.axon"));
395    }
396
397    #[test]
398    fn fase39f_human_render_clean_emits_check() {
399        let out = render_human(&[], &[], false, true);
400        assert!(out.contains("axon parse: no diagnostics"));
401    }
402
403    #[test]
404    fn fase39f_human_render_truncated_marker() {
405        let d = AggregatedDiagnostic {
406            file: "x".to_string(),
407            line: 1,
408            column: 1,
409            message: "e".to_string(),
410            kind: "parse".to_string(),
411        };
412        let out = render_human(&[d], &[], true, true);
413        assert!(out.contains("truncated by --max-errors"));
414    }
415
416    #[test]
417    fn fase39f_strict_env_var_recognized() {
418        // Verify the AXON_PARSER_STRICT env var truthy alphabet
419        // matches the Fase 28.h Python contract.
420        for truthy in &["1", "true", "yes", "on", "TRUE", "Yes"] {
421            std::env::set_var("AXON_PARSER_STRICT", truthy);
422            let cfg = ParseConfig::default();
423            let _ = run_parse(&cfg); // doesn't panic
424        }
425        std::env::remove_var("AXON_PARSER_STRICT");
426    }
427}