alef-e2e 0.16.2

//! Post-generation formatter support for e2e test projects.
//!
//! Reads formatter commands from `E2eConfig.format` and runs them for each
//! language that had files generated. The `{dir}` placeholder in the command
//! is replaced with the actual output directory.

use crate::config::E2eConfig;
use alef_core::backend::GeneratedFile;
use std::collections::HashSet;
use std::path::Path;
use tracing::warn;

/// Default per-language formatter commands for e2e directories.
///
/// These are run automatically when the language's directory was generated and
/// no custom override is present in `e2e_config.format`. The `{dir}` placeholder
/// is replaced with the actual output directory path before execution.
///
/// * `rust` — `(cd {dir} && cargo fmt --all)` formats the standalone e2e
///   crate's `.rs` files. `cargo fmt --manifest-path` is *not* a global cargo
///   flag (it's an unstable cargo-fmt-only flag in nightly), so running cargo
///   fmt from the e2e crate's own directory is the portable way to format a
///   non-workspace-member crate. `Cargo.toml` normalisation is handled by the
///   `normalize_rust_toml` post-pass (taplo fmt), not by cargo-sort: cargo-sort
///   relocates leading top-level comments to the bottom of the file, which
///   pushes the alef header (and `# alef:hash:` line) past the 10-line
///   detection window and breaks `alef verify` silently.
/// * `python` — `ruff check --fix {dir} && ruff format {dir}` runs lint
///   autofixes (unused imports, import sorting, TypeAlias annotations) then
///   whitespace normalisation so both prek's `ruff check` and `ruff format`
///   hooks are no-ops after generation.
/// * `node` — `npx oxfmt {dir}` normalises TypeScript test files so prek's
///   oxfmt hook is a no-op. Without this, hashes are computed over raw codegen
///   output and reformatted by prek, causing `alef verify` to report stale files.
/// * `wasm` — same as `node`; the wasm e2e suite uses the same TypeScript
///   toolchain and oxfmt produces identical normalisation requirements.
fn default_formatter(lang: &str) -> Option<&'static str> {
    match lang {
        "rust" => Some("(cd {dir} && cargo fmt --all)"),
        "python" => Some("ruff check --fix {dir} && ruff format {dir}"),
        "node" | "wasm" => Some("pnpm dlx oxfmt {dir}"),
        _ => None,
    }
}

/// Run a best-effort TOML normalization pass on the rust e2e crate's
/// `Cargo.toml` after the language formatter has finished.
///
/// Runs `taplo fmt Cargo.toml` so that downstream `prek` setups that include
/// a taplo hook produce no further changes after `alef e2e generate`. Without
/// this pass, prek would rewrite the manifest (array wrapping, indentation)
/// after `finalize_hashes` has captured the pre-prek content, causing
/// `alef verify` to report the file as stale.
///
/// `cargo sort` is intentionally **not** invoked here — it relocates the
/// alef header comments to the bottom of the file, pushing both the
/// "auto-generated by alef" marker and the embedded `# alef:hash:` line past
/// the 10-line detection window used by `alef_core::hash::{extract_hash,
/// inject_hash_line}`. The result is silently broken verification (no hash
/// found, file treated as fresh) — strictly worse than the prek-rewrite the
/// invocation was meant to prevent. Consumers whose CI runs `cargo-sort`
/// must either exclude `e2e/**/Cargo.toml` from the hook or place the alef
/// header inside a `[package.metadata.alef]` section that cargo-sort
/// preserves.
///
/// Taplo is invoked via `sh -c` and is best-effort: a missing binary or
/// non-zero exit is ignored. This is intentional — alef cannot assume a
/// particular host toolchain, and the calling project's own CI is
/// responsible for enforcing that taplo is present when they ship the
/// corresponding prek hook.
fn normalize_rust_toml(dir: &str) {
    let taplo_cmd = format!("(cd {dir} && taplo fmt Cargo.toml >/dev/null 2>&1) || true");
    let _ = std::process::Command::new("sh").args(["-c", &taplo_cmd]).status();
}

/// Run per-language formatters for all languages that had files generated.
///
/// For each language present in `files`, picks the command from
/// `e2e_config.format[lang]` when available, then falls back to
/// [`default_formatter`] for languages that have a built-in default (rust,
/// python). The `{dir}` placeholder is replaced with `{output}/{lang}`.
/// Failures are logged as warnings and do not abort the process.
pub fn run_formatters(files: &[GeneratedFile], e2e_config: &E2eConfig) {
    // Collect the set of languages that had files generated by inspecting
    // file paths. E2e files are written to `{output}/{lang}/...`, so the
    // first path component after the output prefix is the language name.
    let output_prefix = Path::new(e2e_config.effective_output());
    let languages: HashSet<String> = files
        .iter()
        .filter_map(|f| {
            let remainder = f.path.strip_prefix(output_prefix).ok()?;
            let first = remainder.components().next()?;
            Some(first.as_os_str().to_string_lossy().into_owned())
        })
        .collect();

    for lang in &languages {
        // User override takes precedence; then built-in default.
        let cmd_template: &str = if let Some(custom) = e2e_config.format.get(lang.as_str()) {
            custom.as_str()
        } else if let Some(builtin) = default_formatter(lang.as_str()) {
            builtin
        } else {
            continue;
        };

        let dir = format!("{}/{}", e2e_config.effective_output(), lang);
        let cmd = cmd_template.replace("{dir}", &dir);

        eprintln!("  Formatting {lang}: {cmd}");
        let status = std::process::Command::new("sh").args(["-c", &cmd]).status();

        match status {
            Ok(s) if s.success() => {}
            Ok(s) => {
                warn!("Formatter for {lang} exited with {s}: {cmd}");
            }
            Err(e) => {
                warn!("Failed to run formatter for {lang}: {e}");
            }
        }

        // Rust-only TOML normalization pass: run taplo on the e2e crate's
        // Cargo.toml so that downstream prek hooks that include taplo produce
        // no further changes after generation. The user's
        // `e2e_config.format[rust]` override (if any) typically only covers
        // cargo fmt — which leaves `Cargo.toml` array wrapping at its raw
        // codegen layout. Without this pass, prek would rewrite the manifest
        // after `finalize_hashes` has captured pre-prek content, causing
        // `alef verify` to report `e2e/rust/Cargo.toml` as stale.
        if lang == "rust" {
            normalize_rust_toml(&dir);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_default_formatter_rust_uses_cd_into_dir() {
        let cmd = default_formatter("rust").expect("rust must have a default formatter");
        assert!(
            cmd.contains("cd {dir}") && cmd.contains("cargo fmt"),
            "rust formatter must cd into {{dir}} before invoking cargo fmt so it works on \
             standalone (non-workspace-member) e2e crates: {cmd}"
        );
        assert!(
            !cmd.contains("--manifest-path"),
            "rust formatter must not use --manifest-path; cargo-fmt does not accept it as a \
             global flag: {cmd}"
        );
        assert!(
            cmd.contains("{dir}"),
            "rust formatter must include {{dir}} placeholder: {cmd}"
        );
        // cargo-sort must NOT be in the default rust formatter: it relocates
        // the alef header comments to the bottom of the file, pushing the
        // `# alef:hash:` line past the 10-line detection window and silently
        // breaking `alef verify`. TOML normalisation is delegated to taplo via
        // `normalize_rust_toml`.
        assert!(
            !cmd.contains("cargo sort"),
            "rust formatter must NOT run cargo sort — it scrambles the alef header \
             location in Cargo.toml. Use taplo via normalize_rust_toml instead: {cmd}"
        );
    }

    #[test]
    fn test_default_formatter_python_uses_ruff_check_and_format() {
        let cmd = default_formatter("python").expect("python must have a default formatter");
        assert!(
            cmd.contains("ruff check --fix"),
            "python formatter must run ruff check --fix before ruff format: {cmd}"
        );
        assert!(
            cmd.contains("ruff format"),
            "python formatter must run ruff format: {cmd}"
        );
        assert!(
            cmd.contains("{dir}"),
            "python formatter must include {{dir}} placeholder: {cmd}"
        );
    }

    #[test]
    fn test_default_formatter_node_uses_oxfmt() {
        let cmd = default_formatter("node").expect("node must have a default formatter");
        assert!(cmd.contains("oxfmt"), "node formatter must use oxfmt: {cmd}");
        assert!(
            cmd.contains("{dir}"),
            "node formatter must include {{dir}} placeholder: {cmd}"
        );
    }

    #[test]
    fn test_default_formatter_wasm_uses_oxfmt() {
        let cmd = default_formatter("wasm").expect("wasm must have a default formatter");
        assert!(cmd.contains("oxfmt"), "wasm formatter must use oxfmt: {cmd}");
        assert!(
            cmd.contains("{dir}"),
            "wasm formatter must include {{dir}} placeholder: {cmd}"
        );
    }

    #[test]
    fn test_default_formatter_unknown_lang_returns_none() {
        assert!(default_formatter("gleam").is_none());
        assert!(default_formatter("zig").is_none());
        assert!(default_formatter("java").is_none());
    }

    /// `normalize_rust_toml` is best-effort: missing tools and non-existent
    /// directories must NOT panic and must NOT abort the surrounding pipeline.
    /// Verified by pointing it at a path guaranteed not to exist.
    #[test]
    fn test_normalize_rust_toml_is_best_effort_on_missing_dir() {
        // Should return cleanly even though the dir does not exist; both
        // cargo-sort and taplo invocations are wrapped in `|| true`.
        normalize_rust_toml("/nonexistent/alef-e2e-test/dir");
    }
}