use std::collections::{BTreeSet, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use tracing::info;
pub(crate) fn has_alef_hash(path: &Path) -> bool {
let Ok(content) = fs::read_to_string(path) else {
return false;
};
alef_core::hash::extract_hash(&content).is_some()
}
pub fn cleanup_orphaned_files(current_gen_paths: &HashSet<PathBuf>) -> anyhow::Result<usize> {
if current_gen_paths.is_empty() {
return Ok(0);
}
let normalized: HashSet<PathBuf> = current_gen_paths
.iter()
.map(|p| p.canonicalize().unwrap_or_else(|_| p.clone()))
.collect();
let touched_dirs: BTreeSet<PathBuf> = current_gen_paths
.iter()
.filter_map(|p| p.parent().map(|d| d.canonicalize().unwrap_or_else(|_| d.to_path_buf())))
.collect();
let mut removed_count = 0;
let mut visited_dirs: HashSet<PathBuf> = HashSet::new();
for dir in &touched_dirs {
if !dir.exists() {
continue;
}
let canonical_dir = dir.canonicalize().unwrap_or_else(|_| dir.clone());
if !visited_dirs.insert(canonical_dir.clone()) {
continue;
}
removed_count += cleanup_dir_recursive(&canonical_dir, &normalized, &touched_dirs)?;
}
Ok(removed_count)
}
fn cleanup_dir_recursive(
dir: &Path,
normalized_gen_paths: &HashSet<PathBuf>,
touched_dirs: &BTreeSet<PathBuf>,
) -> anyhow::Result<usize> {
let mut removed_count = 0;
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
let canonical_sub = path.canonicalize().unwrap_or_else(|_| path.clone());
let descend = touched_dirs
.iter()
.any(|td| td == &canonical_sub || td.starts_with(&canonical_sub) || canonical_sub.starts_with(td));
if descend {
removed_count += cleanup_dir_recursive(&path, normalized_gen_paths, touched_dirs)?;
}
continue;
}
if !has_alef_hash(&path) {
continue;
}
let canonical_path = path.canonicalize().unwrap_or_else(|_| path.clone());
if !normalized_gen_paths.contains(&canonical_path) {
info!("Removing stale alef-generated file: {}", path.display());
fs::remove_file(&path)?;
removed_count += 1;
}
}
Ok(removed_count)
}
#[cfg(test)]
mod tests {
use super::{cleanup_orphaned_files, has_alef_hash};
use std::collections::HashSet;
use std::fs;
const TEST_HASH: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
#[test]
fn cleanup_removes_orphan_with_alef_hash_header() {
let tempdir = tempfile::tempdir().expect("tempdir");
let package_dir = tempdir.path().join("packages/kotlin/src/main/kotlin/dev/demo");
fs::create_dir_all(&package_dir).expect("create package dir");
let current_file = package_dir.join("GraphQLRouteConfig.kt");
let stale_file = package_dir.join("DefaultClient.kt");
let alef_header = format!("// alef:hash:{TEST_HASH}\n\n");
fs::write(¤t_file, format!("{alef_header}class GraphQLRouteConfig\n")).expect("write current file");
fs::write(&stale_file, format!("{alef_header}class DefaultClient\n")).expect("write stale file");
let current_gen_paths = HashSet::from([current_file.clone()]);
let removed = cleanup_orphaned_files(¤t_gen_paths).expect("cleanup");
assert_eq!(removed, 1);
assert!(current_file.exists());
assert!(!stale_file.exists());
}
#[test]
fn cleanup_preserves_file_with_loose_marker_but_no_hash() {
let tempdir = tempfile::tempdir().expect("tempdir");
let package_dir = tempdir.path().join("packages/go/include");
fs::create_dir_all(&package_dir).expect("create dir");
let vendored = package_dir.join("kreuzcrawl.h");
fs::write(
&vendored,
"// DO NOT EDIT — generated by cgo. See CGO_ENABLED.\n#ifndef KREUZCRAWL_H\n#define KREUZCRAWL_H\n#endif\n",
)
.expect("write vendored header");
let alef_file = package_dir.join("bindings.go");
fs::write(&alef_file, format!("// alef:hash:{TEST_HASH}\npackage main\n")).expect("write alef file");
let current_gen_paths = HashSet::from([alef_file.clone()]);
let removed = cleanup_orphaned_files(¤t_gen_paths).expect("cleanup");
assert_eq!(removed, 0, "vendored file without alef:hash must not be deleted");
assert!(vendored.exists(), "vendored cgo header must survive");
assert!(alef_file.exists(), "current alef file must survive");
}
#[test]
fn has_alef_hash_detects_hash_line() {
let tempdir = tempfile::tempdir().expect("tempdir");
let with_hash = tempdir.path().join("with_hash.rs");
let without_hash = tempdir.path().join("without_hash.rs");
fs::write(&with_hash, format!("// alef:hash:{TEST_HASH}\nfn main() {{}}\n")).expect("write");
fs::write(
&without_hash,
"// auto-generated by alef\n// DO NOT EDIT\nfn main() {}\n",
)
.expect("write");
assert!(has_alef_hash(&with_hash), "must detect alef:hash: line");
assert!(!has_alef_hash(&without_hash), "must not match loose markers");
}
#[test]
fn cleanup_removes_orphan_in_sibling_subtree_of_touched_dir() {
let tempdir = tempfile::tempdir().expect("tempdir");
let package_root = tempdir.path().join("packages/kotlin-android");
let kotlin_dir = package_root.join("src/main/kotlin/dev/demo/android");
let java_dir = package_root.join("src/main/java/dev/demo");
fs::create_dir_all(&kotlin_dir).expect("create kotlin dir");
fs::create_dir_all(&java_dir).expect("create java dir");
let alef_header = format!("// alef:hash:{TEST_HASH}\n");
let build_gradle = package_root.join("build.gradle.kts");
let bridge_kt = kotlin_dir.join("DemoBridge.kt");
let stale_java = java_dir.join("CrawlEngineHandle.java");
let user_java = java_dir.join("UserCode.java");
fs::write(&build_gradle, format!("{alef_header}plugins {{}}\n")).expect("write build.gradle.kts");
fs::write(&bridge_kt, format!("{alef_header}object DemoBridge\n")).expect("write bridge.kt");
fs::write(
&stale_java,
format!("{alef_header}public class CrawlEngineHandle {{}}\n"),
)
.expect("write stale java");
fs::write(&user_java, "// hand-written\npublic class UserCode {}\n").expect("write user java");
let current_gen_paths = HashSet::from([build_gradle.clone(), bridge_kt.clone()]);
let removed = cleanup_orphaned_files(¤t_gen_paths).expect("cleanup");
assert_eq!(removed, 1, "exactly the alef-marked orphan must be removed");
assert!(build_gradle.exists(), "current build.gradle.kts must survive");
assert!(bridge_kt.exists(), "current bridge.kt must survive");
assert!(!stale_java.exists(), "stale java orphan must be removed");
assert!(user_java.exists(), "user-written java must survive (no alef hash)");
}
}