alef-cli 0.16.36

CLI for the alef polyglot binding generator
use std::collections::{BTreeSet, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use tracing::info;

/// Markers that indicate a file was auto-generated by alef.
const ALEF_HEADER_MARKERS: &[&str] = &[
    "auto-generated by alef",
    "AUTO-GENERATED by alef",
    "Generated by alef",
    "DO NOT EDIT",
];

/// Clean up orphan alef-generated files that are no longer in the current generation output.
///
/// Strategy: walk only the directories where the current run actually wrote files
/// (the parent dir of every entry in `current_gen_paths`). For each file in those
/// directories whose first lines contain an alef-generated header marker, if its
/// canonicalized absolute path is NOT in the current run's path set, delete it.
///
/// Walking only the parents of just-written files is what keeps the cleanup safe
/// when callers (e.g. `alef generate`) emit only a subset of categories: scaffold
/// dirs that the current run did not touch are never visited, so untouched files
/// in those dirs (e.g. user-customized package manifests) are preserved.
pub fn cleanup_orphaned_files(current_gen_paths: &HashSet<PathBuf>) -> anyhow::Result<usize> {
    if current_gen_paths.is_empty() {
        return Ok(0);
    }

    // Normalize current_gen_paths so the comparison below is consistent. canonicalize()
    // resolves `.` / `..` / symlinks. If a file does not exist (yet), fall back to the
    // raw absolute path. The set is what we compare against during the walk.
    let normalized: HashSet<PathBuf> = current_gen_paths
        .iter()
        .map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()))
        .collect();

    // Collect the set of parent directories actually touched in this run.
    // Canonicalize so cross-platform path resolution (e.g. macOS /tmp vs.
    // /private/tmp symlinks) does not silently break the descend-check
    // comparisons in `cleanup_dir_recursive`, which always sees canonicalized
    // subdirectory paths.
    let touched_dirs: BTreeSet<PathBuf> = current_gen_paths
        .iter()
        .filter_map(|p| p.parent().map(|d| d.canonicalize().unwrap_or_else(|_| d.to_path_buf())))
        .collect();

    let mut removed_count = 0;
    let mut visited_dirs: HashSet<PathBuf> = HashSet::new();

    for dir in &touched_dirs {
        if !dir.exists() {
            continue;
        }
        let canonical_dir = dir.canonicalize().unwrap_or_else(|_| dir.clone());
        if !visited_dirs.insert(canonical_dir.clone()) {
            continue;
        }
        removed_count += cleanup_dir_recursive(&canonical_dir, &normalized, &touched_dirs)?;
    }

    Ok(removed_count)
}

/// Walk `dir` and remove orphan alef-generated files. Recurses into subdirectories
/// that are themselves touched, that contain a touched path, or that live beneath
/// a touched directory. The third clause catches orphans in subtrees a backend
/// previously owned but no longer writes to (e.g. the kotlin-android backend
/// dropped its `src/main/java/` Java DTO emit and left stale alef-marked Java
/// files behind). The alef-header gate in `has_alef_header` is the safety net
/// that prevents deletion of user-customised files.
fn cleanup_dir_recursive(
    dir: &Path,
    normalized_gen_paths: &HashSet<PathBuf>,
    touched_dirs: &BTreeSet<PathBuf>,
) -> anyhow::Result<usize> {
    let mut removed_count = 0;
    for entry in fs::read_dir(dir)? {
        let entry = entry?;
        let path = entry.path();

        if path.is_dir() {
            // Recurse if the subdirectory itself is touched, contains a touched
            // path, OR is a descendant of any touched dir. Combined with the
            // alef-header check below, this lets us sweep stale binding output
            // in subtrees that the current run no longer writes to without
            // touching user files.
            let canonical_sub = path.canonicalize().unwrap_or_else(|_| path.clone());
            let descend = touched_dirs
                .iter()
                .any(|td| td == &canonical_sub || td.starts_with(&canonical_sub) || canonical_sub.starts_with(td));
            if descend {
                removed_count += cleanup_dir_recursive(&path, normalized_gen_paths, touched_dirs)?;
            }
            continue;
        }

        if !has_alef_header(&path)? {
            continue;
        }

        let canonical_path = path.canonicalize().unwrap_or_else(|_| path.clone());
        if !normalized_gen_paths.contains(&canonical_path) {
            info!("Removing stale alef-generated file: {}", path.display());
            fs::remove_file(&path)?;
            removed_count += 1;
        }
    }

    Ok(removed_count)
}

/// Check if a file has an alef-generated header marker.
fn has_alef_header(path: &Path) -> anyhow::Result<bool> {
    // Read first ~2KB to check for header marker
    let content = match fs::read_to_string(path) {
        Ok(c) => c,
        Err(_) => {
            // If we can't read as UTF-8, skip it
            return Ok(false);
        }
    };

    // Check the first few lines for a marker
    let first_lines = content.lines().take(5).collect::<Vec<_>>().join("\n");

    for marker in ALEF_HEADER_MARKERS {
        if first_lines.contains(marker) {
            return Ok(true);
        }
    }

    Ok(false)
}

#[cfg(test)]
mod tests {
    use super::cleanup_orphaned_files;
    use std::collections::HashSet;
    use std::fs;

    #[test]
    fn cleanup_removes_orphan_with_generated_by_alef_header() {
        let tempdir = tempfile::tempdir().expect("tempdir");
        let package_dir = tempdir.path().join("packages/kotlin/src/main/kotlin/dev/demo");
        fs::create_dir_all(&package_dir).expect("create package dir");

        let current_file = package_dir.join("GraphQLRouteConfig.kt");
        let stale_file = package_dir.join("DefaultClient.kt");
        fs::write(
            &current_file,
            "// Generated by alef. Do not edit by hand.\n\nclass GraphQLRouteConfig\n",
        )
        .expect("write current file");
        fs::write(
            &stale_file,
            "// Generated by alef. Do not edit by hand.\n\nclass GraphQLRouteConfig\n",
        )
        .expect("write stale file");

        let current_gen_paths = HashSet::from([current_file.clone()]);

        let removed = cleanup_orphaned_files(&current_gen_paths).expect("cleanup");

        assert_eq!(removed, 1);
        assert!(current_file.exists());
        assert!(!stale_file.exists());
    }

    /// Regression: orphans in a sibling subtree of a touched directory must be
    /// swept. This models the kotlin-android case where the backend wrote
    /// Java DTOs into `src/main/java/` in older versions, then dropped that
    /// emit but kept writing Kotlin to `src/main/kotlin/`. The stale Java
    /// orphans live in a subtree the current run never writes to, but they
    /// are descendants of the package root that IS touched (via
    /// `build.gradle.kts` etc.).
    #[test]
    fn cleanup_removes_orphan_in_sibling_subtree_of_touched_dir() {
        let tempdir = tempfile::tempdir().expect("tempdir");
        let package_root = tempdir.path().join("packages/kotlin-android");
        let kotlin_dir = package_root.join("src/main/kotlin/dev/demo/android");
        let java_dir = package_root.join("src/main/java/dev/demo");
        fs::create_dir_all(&kotlin_dir).expect("create kotlin dir");
        fs::create_dir_all(&java_dir).expect("create java dir");

        let build_gradle = package_root.join("build.gradle.kts");
        let bridge_kt = kotlin_dir.join("DemoBridge.kt");
        let stale_java = java_dir.join("CrawlEngineHandle.java");
        let user_java = java_dir.join("UserCode.java");
        fs::write(
            &build_gradle,
            "// Generated by alef. Do not edit by hand.\n\nplugins {}\n",
        )
        .expect("write build.gradle.kts");
        fs::write(
            &bridge_kt,
            "// Generated by alef. Do not edit by hand.\n\nobject DemoBridge\n",
        )
        .expect("write bridge.kt");
        fs::write(
            &stale_java,
            "// This file is auto-generated by alef — DO NOT EDIT.\n\npublic class CrawlEngineHandle {}\n",
        )
        .expect("write stale java");
        // User-customised file in the same orphan subtree — must survive.
        fs::write(&user_java, "// hand-written\npublic class UserCode {}\n").expect("write user java");

        let current_gen_paths = HashSet::from([build_gradle.clone(), bridge_kt.clone()]);
        let removed = cleanup_orphaned_files(&current_gen_paths).expect("cleanup");

        assert_eq!(removed, 1, "exactly the alef-marked orphan must be removed");
        assert!(build_gradle.exists(), "current build.gradle.kts must survive");
        assert!(bridge_kt.exists(), "current bridge.kt must survive");
        assert!(!stale_java.exists(), "stale java orphan must be removed");
        assert!(user_java.exists(), "user-written java must survive (no alef header)");
    }
}