use crate::core::backend::GeneratedFile;
use crate::core::config::{Language, ResolvedCrateConfig};
use crate::core::hash;
use crate::core::ir::{ApiSurface, TypeRef};
use crate::core::validation::{ValidatedApiSurface, ValidationCode, ValidationDiagnostic, ValidationSeverity};
use anyhow::Context as _;
use base64::Engine;
use rayon::prelude::*;
use std::path::Path;
use tracing::{debug, info};
use crate::cli::cache;
use crate::cli::registry;
pub fn generate(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
clean: bool,
) -> anyhow::Result<Vec<(Language, Vec<GeneratedFile>)>> {
let validated_api = validate_generation_api(api, config, languages)?;
let has_ffi = languages.contains(&Language::Ffi);
for &lang in languages {
if (lang == Language::Go || lang == Language::Java || lang == Language::Csharp) && !has_ffi {
tracing::warn!(
"Language {:?} requires FFI to be in the languages list for proper code generation",
lang
);
}
}
let ir_json = serde_json::to_string(api)?;
let config_toml =
toml::to_string(config).with_context(|| "failed to serialize resolved crate config for cache key")?;
let to_generate: Vec<_> = languages
.par_iter()
.filter_map(|&lang| {
let lang_str = lang.to_string();
let lang_hash = cache::compute_lang_hash(&ir_json, &lang_str, &config_toml);
if !clean && cache::is_lang_cached(&config.name, &lang_str, &lang_hash) {
debug!(" {}: cached, skipping", lang_str);
return None;
}
Some((lang, lang_str, lang_hash))
})
.collect();
let results: Vec<(Language, Vec<GeneratedFile>)> = to_generate
.par_iter()
.map(|(lang, lang_str, lang_hash)| {
let backend = registry::get_backend(*lang);
info!(" {}: generating...", lang_str);
let files = backend
.generate_bindings_checked(validated_api, config)
.with_context(|| format!("failed to generate bindings for {lang_str}"))?;
let base_dir = std::env::current_dir().unwrap_or_default();
let output_paths: Vec<std::path::PathBuf> = files.iter().map(|f| base_dir.join(&f.path)).collect();
cache::write_lang_hash(&config.name, lang_str, lang_hash, &output_paths)
.with_context(|| format!("failed to write language hash for {lang_str}"))?;
Ok((*lang, files))
})
.collect::<anyhow::Result<_>>()?;
Ok(results)
}
pub fn generate_stubs(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> anyhow::Result<Vec<(Language, Vec<GeneratedFile>)>> {
let validated_api = validate_generation_api(api, config, languages)?;
let results: Vec<(Language, Vec<GeneratedFile>)> = languages
.par_iter()
.map(|&lang| {
let Some(backend) = registry::try_get_backend(lang) else {
return Ok((lang, Vec::new()));
};
let files = backend.generate_type_stubs_checked(validated_api, config)?;
Ok((lang, files))
})
.collect::<anyhow::Result<Vec<_>>>()?
.into_iter()
.filter(|(_, files)| !files.is_empty())
.collect();
Ok(results)
}
pub fn generate_service_api(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> anyhow::Result<Vec<(Language, Vec<GeneratedFile>)>> {
let validated_api = validate_generation_api(api, config, languages)?;
let api = validated_api.api();
if api.services.is_empty() {
return Ok(vec![]);
}
let results: Vec<(Language, Vec<GeneratedFile>)> = languages
.par_iter()
.copied()
.filter(|&lang| {
registry::try_get_backend(lang).is_some_and(|backend| backend.capabilities().supports_service_api)
})
.map(|lang| {
let backend = registry::get_backend(lang);
let files = backend.generate_service_api_checked(validated_api, config)?;
Ok((lang, files))
})
.collect::<anyhow::Result<Vec<_>>>()?
.into_iter()
.filter(|(_, files)| !files.is_empty())
.collect();
Ok(results)
}
pub fn generate_public_api(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> anyhow::Result<Vec<(Language, Vec<GeneratedFile>)>> {
let validated_api = validate_generation_api(api, config, languages)?;
let results: Vec<(Language, Vec<GeneratedFile>)> = languages
.par_iter()
.map(|&lang| {
let Some(backend) = registry::try_get_backend(lang) else {
return Ok((lang, Vec::new()));
};
let files = backend.generate_public_api_checked(validated_api, config)?;
Ok((lang, files))
})
.collect::<anyhow::Result<Vec<_>>>()?
.into_iter()
.filter(|(_, files)| !files.is_empty())
.collect();
Ok(results)
}
fn validate_generation_api<'a>(
api: &'a ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> anyhow::Result<ValidatedApiSurface<'a>> {
let bridged_trait_names: ahash::AHashSet<&str> = config
.trait_bridges
.iter()
.map(|bridge| bridge.trait_name.as_str())
.collect();
let validation_report =
crate::core::validation::validate_api_surface_with_bridged_traits(api, &bridged_trait_names);
let language_diagnostics = language_backend_readiness_diagnostics(api, config, languages);
for diagnostic in validation_report.warnings() {
tracing::warn!("{diagnostic}");
}
for diagnostic in language_diagnostics
.iter()
.filter(|diagnostic| diagnostic.severity == ValidationSeverity::Warning)
{
tracing::warn!("{diagnostic}");
}
let fatal: Vec<_> = validation_report
.errors()
.filter(|diagnostic| {
crate::core::validation::is_critical_unsuppressible(diagnostic.code)
|| !config
.suppress_validation_codes
.iter()
.any(|code| code == &diagnostic.code.to_string())
})
.collect();
let fatal_language_diagnostics: Vec<_> = language_diagnostics
.iter()
.filter(|diagnostic| diagnostic.severity == ValidationSeverity::Error)
.collect();
for diagnostic in validation_report.errors().filter(|diagnostic| {
!crate::core::validation::is_critical_unsuppressible(diagnostic.code)
&& config
.suppress_validation_codes
.iter()
.any(|code| code == &diagnostic.code.to_string())
}) {
tracing::warn!("[suppressed] {diagnostic}");
}
if !fatal.is_empty() || !fatal_language_diagnostics.is_empty() {
let formatted = fatal
.iter()
.copied()
.chain(fatal_language_diagnostics.iter().copied())
.map(|diagnostic| {
let path = diagnostic
.item_path
.as_deref()
.map(|p| format!(" item `{p}`"))
.unwrap_or_default();
format!("- [{}]{path} {}", diagnostic.code, diagnostic.reason)
})
.collect::<Vec<_>>()
.join("\n");
anyhow::bail!("{formatted}");
}
ValidatedApiSurface::new_with_bridged_traits(api, &config.suppress_validation_codes, &bridged_trait_names)
.map_err(|report| anyhow::anyhow!(report.format_errors()))
}
fn language_backend_readiness_diagnostics(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> Vec<ValidationDiagnostic> {
let mut diagnostics = Vec::new();
diagnostics.extend(service_api_capability_diagnostics(api, config, languages));
diagnostics.extend(ffi_json_return_diagnostics(api, config, languages));
diagnostics
}
fn service_api_capability_diagnostics(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> Vec<ValidationDiagnostic> {
if api.services.is_empty() {
return Vec::new();
}
languages
.iter()
.filter_map(|&language| {
if !service_api_requested_for_language(api, config, language) {
return None;
}
let backend = registry::try_get_backend(language)?;
(!backend.capabilities().supports_service_api).then(|| ValidationDiagnostic {
severity: ValidationSeverity::Error,
code: ValidationCode::UnsupportedBackendCapability,
crate_name: config.name.clone(),
language: Some(language),
item_path: Some("service_api".to_string()),
reason: format!(
"configured services require service API generation, but backend `{}` does not support it",
backend.name()
),
suggested_fix: "remove the language from this generation run, opt it out in service config, or implement service API support for the backend".to_string(),
})
})
.collect()
}
fn service_api_requested_for_language(api: &ApiSurface, config: &ResolvedCrateConfig, language: Language) -> bool {
api.services.iter().any(|service| {
config
.services
.iter()
.find(|service_config| service_config.owner_type == service.name)
.is_none_or(|service_config| !service_config.skip_languages.contains(&language.to_string()))
})
}
fn ffi_json_return_diagnostics(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> Vec<ValidationDiagnostic> {
let readiness_languages: Vec<_> = languages
.iter()
.copied()
.filter(|language| ffi_json_return_readiness_applies(*language))
.collect();
if readiness_languages.is_empty() {
return Vec::new();
}
let mut diagnostics = Vec::new();
for function in &api.functions {
if function.binding_excluded {
continue;
}
if non_serde_named_in_ffi_json_return(api, &function.return_type) {
for language in &readiness_languages {
diagnostics.push(ffi_json_return_diagnostic(
config,
*language,
&format!("function {}", function.name),
&function.return_type,
));
}
}
}
for typ in &api.types {
if typ.binding_excluded {
continue;
}
for method in &typ.methods {
if method.binding_excluded {
continue;
}
if non_serde_named_in_ffi_json_return(api, &method.return_type) {
for language in &readiness_languages {
diagnostics.push(ffi_json_return_diagnostic(
config,
*language,
&format!("method {}.{}", typ.name, method.name),
&method.return_type,
));
}
}
}
}
diagnostics
}
fn ffi_json_return_readiness_applies(language: Language) -> bool {
matches!(
language,
Language::Ffi
| Language::Go
| Language::Java
| Language::Jni
| Language::Csharp
| Language::KotlinAndroid
| Language::Swift
| Language::R
| Language::Zig
)
}
fn non_serde_named_in_ffi_json_return(api: &ApiSurface, ty: &TypeRef) -> bool {
match ty {
TypeRef::Vec(inner) => named_lacks_serde(api, inner),
TypeRef::Map(key, value) => named_lacks_serde(api, key) || named_lacks_serde(api, value),
TypeRef::Optional(inner) => non_serde_named_in_ffi_json_return(api, inner),
_ => false,
}
}
fn named_lacks_serde(api: &ApiSurface, ty: &TypeRef) -> bool {
match ty {
TypeRef::Named(name) => {
if let Some(typ) = api.types.iter().find(|typ| typ.name == *name) {
return !typ.has_serde;
}
if let Some(enum_def) = api.enums.iter().find(|enum_def| enum_def.name == *name) {
return !enum_def.has_serde;
}
false
}
TypeRef::Optional(inner) | TypeRef::Vec(inner) => named_lacks_serde(api, inner),
TypeRef::Map(key, value) => named_lacks_serde(api, key) || named_lacks_serde(api, value),
_ => false,
}
}
fn ffi_json_return_diagnostic(
config: &ResolvedCrateConfig,
language: Language,
item_path: &str,
return_type: &TypeRef,
) -> ValidationDiagnostic {
ValidationDiagnostic {
severity: ValidationSeverity::Error,
code: ValidationCode::BackendStubPath,
crate_name: config.name.clone(),
language: Some(language),
item_path: Some(item_path.to_string()),
reason: format!(
"FFI-dependent generation cannot safely marshal return type `{}` because a nested named type lacks serde metadata",
type_ref_label(return_type)
),
suggested_fix: "derive Serialize/Deserialize on the named return type, expose a binding-safe DTO, or exclude/bridge the item explicitly".to_string(),
}
}
fn type_ref_label(ty: &TypeRef) -> String {
match ty {
TypeRef::Named(name) => name.clone(),
TypeRef::Vec(inner) => format!("Vec<{}>", type_ref_label(inner)),
TypeRef::Optional(inner) => format!("Option<{}>", type_ref_label(inner)),
TypeRef::Map(key, value) => format!("Map<{}, {}>", type_ref_label(key), type_ref_label(value)),
_ => format!("{ty:?}"),
}
}
#[cfg(test)]
mod validation_tests {
use super::*;
use crate::core::config::service::ServiceConfig;
use crate::core::ir::{MethodDef, ServiceDef, TypeDef};
fn method_def(name: &str, return_type: TypeRef) -> MethodDef {
MethodDef {
name: name.to_string(),
params: Vec::new(),
return_type,
is_async: false,
is_static: true,
error_type: None,
doc: String::new(),
receiver: None,
sanitized: false,
trait_source: None,
returns_ref: false,
returns_cow: false,
return_newtype_wrapper: None,
has_default_impl: false,
binding_excluded: false,
binding_exclusion_reason: None,
}
}
#[test]
fn ffi_dependent_generation_rejects_vec_named_return_without_serde_metadata() {
let api = ApiSurface {
crate_name: "sample-lib".to_string(),
types: vec![TypeDef {
name: "Payload".to_string(),
rust_path: "sample_lib::Payload".to_string(),
has_serde: false,
..TypeDef::default()
}],
functions: vec![crate::core::ir::FunctionDef {
name: "list_payloads".to_string(),
rust_path: "sample_lib::list_payloads".to_string(),
original_rust_path: String::new(),
params: Vec::new(),
return_type: TypeRef::Vec(Box::new(TypeRef::Named("Payload".to_string()))),
is_async: false,
error_type: None,
doc: String::new(),
cfg: None,
sanitized: false,
return_sanitized: false,
returns_ref: false,
returns_cow: false,
return_newtype_wrapper: None,
binding_excluded: false,
binding_exclusion_reason: None,
}],
..ApiSurface::default()
};
let config = ResolvedCrateConfig {
name: "sample-lib".to_string(),
..ResolvedCrateConfig::default()
};
let error = validate_generation_api(&api, &config, &[Language::Ffi]).expect_err("missing serde must fail");
assert!(
error.to_string().contains("backend_stub_path") && error.to_string().contains("function list_payloads"),
"expected FFI backend-readiness error, got {error}"
);
}
#[test]
fn service_api_generation_rejects_selected_backend_without_capability() {
let api = ApiSurface {
crate_name: "sample-lib".to_string(),
types: vec![TypeDef {
name: "App".to_string(),
rust_path: "sample_lib::App".to_string(),
..TypeDef::default()
}],
services: vec![ServiceDef {
name: "App".to_string(),
rust_path: "sample_lib::App".to_string(),
constructor: method_def("new", TypeRef::Named("App".to_string())),
configurators: Vec::new(),
registrations: Vec::new(),
entrypoints: Vec::new(),
doc: String::new(),
cfg: None,
}],
..ApiSurface::default()
};
let config = ResolvedCrateConfig {
name: "sample-lib".to_string(),
services: vec![ServiceConfig {
owner_type: "App".to_string(),
constructor: Some("new".to_string()),
configurators: Vec::new(),
registrations: Vec::new(),
entrypoints: Vec::new(),
skip_languages: Vec::new(),
host_app_inner_accessor: None,
}],
..ResolvedCrateConfig::default()
};
let error = validate_generation_api(&api, &config, &[Language::KotlinAndroid])
.expect_err("unsupported service backend must fail");
assert!(
error.to_string().contains("unsupported_backend_capability")
&& error.to_string().contains("kotlin_android"),
"expected unsupported backend capability error, got {error}"
);
}
}
#[cfg(unix)]
fn apply_shebang_chmod(path: &std::path::Path, content: &str) -> anyhow::Result<()> {
use std::os::unix::fs::PermissionsExt;
if content.starts_with("#!") {
let perms = std::fs::Permissions::from_mode(0o755);
std::fs::set_permissions(path, perms).with_context(|| format!("failed to chmod 755 {}", path.display()))?;
}
Ok(())
}
#[cfg(not(unix))]
fn apply_shebang_chmod(_path: &std::path::Path, _content: &str) -> anyhow::Result<()> {
Ok(())
}
pub fn write_files(files: &[(Language, Vec<GeneratedFile>)], base_dir: &Path) -> anyhow::Result<usize> {
let dirs: std::collections::BTreeSet<_> = files
.iter()
.flat_map(|(_, lang_files)| lang_files.iter())
.filter_map(|f| base_dir.join(&f.path).parent().map(|p| p.to_path_buf()))
.collect();
for dir in &dirs {
std::fs::create_dir_all(dir).with_context(|| format!("failed to create directory {}", dir.display()))?;
}
let all_files: Vec<_> = files.iter().flat_map(|(_, lang_files)| lang_files.iter()).collect();
all_files.par_iter().try_for_each(|file| -> anyhow::Result<()> {
let full_path = base_dir.join(&file.path);
let is_jar_file = full_path.extension().is_some_and(|ext| ext == "jar");
if is_jar_file {
let binary_content = base64::engine::general_purpose::STANDARD
.decode(&file.content)
.with_context(|| format!("failed to decode base64 for {}", full_path.display()))?;
if let Ok(existing) = std::fs::read(&full_path) {
if existing == binary_content {
debug!(" unchanged: {}", full_path.display());
return Ok(());
}
}
std::fs::write(&full_path, &binary_content)
.with_context(|| format!("failed to write binary file {}", full_path.display()))?;
debug!(" wrote: {}", full_path.display());
} else {
let normalized = normalize_content(&full_path, &file.content);
if let Ok(existing) = std::fs::read_to_string(&full_path) {
let existing_body = crate::core::hash::strip_hash_line(&existing);
let normalized_body = crate::core::hash::strip_hash_line(&normalized);
if existing_body == normalized_body {
apply_shebang_chmod(&full_path, &normalized)?;
debug!(" unchanged: {}", full_path.display());
return Ok(());
}
}
std::fs::write(&full_path, &normalized)
.with_context(|| format!("failed to write generated file {}", full_path.display()))?;
apply_shebang_chmod(&full_path, &normalized)?;
debug!(" wrote: {}", full_path.display());
}
Ok(())
})?;
Ok(all_files.len())
}
pub fn finalize_hashes(
paths: &std::collections::HashSet<std::path::PathBuf>,
sources_hash: &str,
alef_toml_bytes: &[u8],
) -> anyhow::Result<usize> {
let inputs_hash = hash::compute_inputs_hash(sources_hash, alef_toml_bytes);
let updated: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
paths.par_iter().try_for_each(|path| -> anyhow::Result<()> {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return Ok(()),
};
let has_marker = content
.lines()
.take(10)
.any(|line| line.contains("auto-generated by alef") || line.contains("Generated by alef"));
if !has_marker {
return Ok(());
}
let stripped = hash::strip_hash_line(&content);
let final_content = hash::inject_hash_line(&stripped, &inputs_hash);
if final_content == content {
return Ok(());
}
std::fs::write(path, &final_content)
.with_context(|| format!("failed to finalize hash for {}", path.display()))?;
updated.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Ok(())
})?;
Ok(updated.into_inner())
}
pub fn diff_files(files: &[(Language, Vec<GeneratedFile>)], base_dir: &Path) -> anyhow::Result<Vec<String>> {
let all_items: Vec<_> = files
.iter()
.flat_map(|(lang, lang_files)| lang_files.iter().map(move |f| (*lang, f)))
.collect();
let diffs: Vec<String> = all_items
.par_iter()
.filter_map(|(lang, file)| {
let full_path = base_dir.join(&file.path);
let existing = std::fs::read_to_string(&full_path).unwrap_or_default();
let is_rust = file.path.extension().is_some_and(|ext| ext == "rs");
let generated = normalize_content(&file.path, &file.content);
let on_disk = if is_rust {
format_rust_content(&full_path, &existing)
} else {
existing
};
let on_disk_body = hash::strip_hash_line(&on_disk);
if normalize_whitespace(&on_disk_body) != normalize_whitespace(&generated) {
Some(format!("[{lang}] {}", file.path.display()))
} else {
None
}
})
.collect();
Ok(diffs)
}
pub fn normalize_content(path: &Path, content: &str) -> String {
let pre = if path.extension().is_some_and(|ext| ext == "rs") {
format_rust_content(path, content)
} else {
content.to_string()
};
let is_markdown = path.extension().is_some_and(|ext| ext == "md");
normalize_whitespace_with_policy(&pre, is_markdown)
}
fn normalize_whitespace(content: &str) -> String {
normalize_whitespace_with_policy(content, false)
}
fn normalize_whitespace_with_policy(content: &str, is_markdown: bool) -> String {
if content.is_empty() {
return String::new();
}
let max_blanks: usize = if is_markdown { 1 } else { 2 };
let mut result = String::with_capacity(content.len());
let mut blank_count = 0usize;
for line in content.lines() {
let trimmed = line.trim_end();
if trimmed.is_empty() {
blank_count += 1;
if blank_count <= max_blanks {
result.push('\n');
}
} else {
blank_count = 0;
result.push_str(trimmed);
result.push('\n');
}
}
while result.ends_with("\n\n") {
result.pop();
}
if !result.ends_with('\n') {
result.push('\n');
}
result
}
pub fn scaffold(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> anyhow::Result<Vec<GeneratedFile>> {
crate::scaffold::scaffold(api, config, languages)
}
pub fn readme(
api: &ApiSurface,
config: &ResolvedCrateConfig,
languages: &[Language],
) -> anyhow::Result<Vec<GeneratedFile>> {
crate::readme::generate_readmes(api, config, languages)
}
pub fn write_scaffold_files(files: &[GeneratedFile], base_dir: &Path) -> anyhow::Result<usize> {
write_scaffold_files_with_overwrite(files, base_dir, false)
}
pub fn write_scaffold_files_with_overwrite(
files: &[GeneratedFile],
base_dir: &Path,
overwrite: bool,
) -> anyhow::Result<usize> {
let mut count = 0;
for file in files {
let full_path = base_dir.join(&file.path);
let can_skip = !overwrite && !file.generated_header && full_path.exists();
if can_skip {
debug!(" skipped (already exists): {}", full_path.display());
continue;
}
if let Some(parent) = full_path.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("failed to create directory {}", parent.display()))?;
}
let is_jar_file = full_path.extension().is_some_and(|ext| ext == "jar");
if is_jar_file {
let binary_content = base64::engine::general_purpose::STANDARD
.decode(&file.content)
.with_context(|| format!("failed to decode base64 for {}", full_path.display()))?;
if let Ok(existing) = std::fs::read(&full_path) {
if existing == binary_content {
debug!(" unchanged: {}", full_path.display());
continue;
}
}
std::fs::write(&full_path, &binary_content)
.with_context(|| format!("failed to write binary file {}", full_path.display()))?;
count += 1;
debug!(" wrote (binary): {}", full_path.display());
continue;
}
let normalized = normalize_content(&full_path, &file.content);
if let Ok(existing) = std::fs::read_to_string(&full_path) {
let existing_body = crate::core::hash::strip_hash_line(&existing);
let normalized_body = crate::core::hash::strip_hash_line(&normalized);
if existing_body == normalized_body {
apply_shebang_chmod(&full_path, &normalized)?;
debug!(" unchanged: {}", full_path.display());
continue;
}
}
std::fs::write(&full_path, &normalized)
.with_context(|| format!("failed to write generated file {}", full_path.display()))?;
apply_shebang_chmod(&full_path, &normalized)?;
count += 1;
debug!(" wrote: {}", full_path.display());
}
Ok(count)
}
pub fn sweep_orphans(
roots: &[std::path::PathBuf],
keep: &std::collections::HashSet<std::path::PathBuf>,
) -> anyhow::Result<usize> {
fn is_alef_owned(path: &std::path::Path) -> bool {
let Ok(content) = std::fs::read_to_string(path) else {
return false;
};
crate::core::hash::extract_hash(&content).is_some()
}
let mut removed = 0usize;
let mut touched_dirs: std::collections::BTreeSet<std::path::PathBuf> = std::collections::BTreeSet::new();
for root in roots {
if !root.exists() {
continue;
}
let mut stack = vec![root.clone()];
while let Some(dir) = stack.pop() {
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => continue,
};
for entry in entries.flatten() {
let path = entry.path();
let file_type = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if file_type.is_dir() {
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if matches!(
name,
".git"
| "target"
| "node_modules"
| "vendor"
| "_build"
| "deps"
| ".venv"
| "venv"
| "build"
| "dist"
| "Pods"
) {
continue;
}
stack.push(path);
continue;
}
if !file_type.is_file() {
continue;
}
if keep.contains(&path) {
continue;
}
if !is_alef_owned(&path) {
continue;
}
if let Err(err) = std::fs::remove_file(&path) {
debug!(" sweep skip (remove failed): {} ({err})", path.display());
continue;
}
debug!(" swept orphan: {}", path.display());
if let Some(parent) = path.parent() {
touched_dirs.insert(parent.to_path_buf());
}
removed += 1;
}
}
}
let mut dirs: Vec<_> = touched_dirs.into_iter().collect();
dirs.sort_by_key(|p| std::cmp::Reverse(p.components().count()));
for dir in dirs {
let _ = std::fs::remove_dir(&dir);
}
if removed > 0 {
info!("Swept {removed} orphan generated file(s)");
}
Ok(removed)
}
pub fn collect_alef_headered_paths(root: &std::path::Path) -> std::collections::HashSet<std::path::PathBuf> {
fn is_alef_owned(path: &std::path::Path) -> bool {
let Ok(content) = std::fs::read_to_string(path) else {
return false;
};
crate::core::hash::extract_hash(&content).is_some()
}
let mut paths = std::collections::HashSet::new();
if !root.exists() {
return paths;
}
let mut stack = vec![root.to_path_buf()];
while let Some(dir) = stack.pop() {
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => continue,
};
for entry in entries.flatten() {
let path = entry.path();
let Ok(ft) = entry.file_type() else { continue };
if ft.is_dir() {
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if matches!(
name,
".git"
| "target"
| "node_modules"
| "vendor"
| "_build"
| "deps"
| ".venv"
| "venv"
| "build"
| "dist"
| "Pods"
) {
continue;
}
stack.push(path);
} else if ft.is_file() && is_alef_owned(&path) {
paths.insert(path);
}
}
}
paths
}
fn detect_crate_edition(path: &Path) -> String {
let start = if path.is_dir() {
path
} else {
match path.parent() {
Some(p) => p,
None => return "2024".to_string(),
}
};
let mut current = start;
loop {
let candidate = current.join("Cargo.toml");
if candidate.is_file() {
if let Ok(text) = std::fs::read_to_string(&candidate) {
if let Some(edition) = parse_package_edition(&text) {
return edition;
}
}
return "2024".to_string();
}
match current.parent() {
Some(parent) => current = parent,
None => break,
}
}
"2024".to_string()
}
fn parse_package_edition(toml_text: &str) -> Option<String> {
let mut in_package = false;
for line in toml_text.lines() {
let trimmed = line.trim();
if trimmed.starts_with('[') {
in_package = trimmed == "[package]";
continue;
}
if !in_package {
continue;
}
if let Some(rest) = trimmed.strip_prefix("edition") {
let rest = rest.trim_start();
if let Some(rest) = rest.strip_prefix('=') {
let value = rest.trim().trim_matches('"');
if value.len() == 4 && value.chars().all(|c| c.is_ascii_digit()) {
return Some(value.to_string());
}
}
}
}
None
}
pub fn format_rust_content(path: &Path, content: &str) -> String {
use std::io::Write;
use std::process::{Command, Stdio};
let edition = detect_crate_edition(path);
let config_dir = std::env::current_dir().unwrap_or_default();
let mut child = match Command::new("rustfmt")
.arg("--edition")
.arg(&edition)
.arg("--config-path")
.arg(&config_dir)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
{
Ok(child) => child,
Err(e) => {
debug!("rustfmt not available: {e}");
return content.to_string();
}
};
if let Some(mut stdin) = child.stdin.take() {
let _ = stdin.write_all(content.as_bytes());
}
match child.wait_with_output() {
Ok(output) if output.status.success() => {
String::from_utf8(output.stdout).unwrap_or_else(|_| content.to_string())
}
Ok(output) => {
debug!("rustfmt failed: {}", String::from_utf8_lossy(&output.stderr));
content.to_string()
}
Err(e) => {
debug!("rustfmt process error: {e}");
content.to_string()
}
}
}
#[cfg(test)]
mod write_scaffold_normalize_tests {
use super::*;
use crate::core::backend::GeneratedFile;
use std::path::PathBuf;
fn make_file(name: &str, content: &str) -> GeneratedFile {
GeneratedFile {
path: PathBuf::from(name),
content: content.to_owned(),
generated_header: false,
}
}
#[test]
fn test_scaffold_write_normalizes_trailing_whitespace_and_newline() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let content = "line one \nline two\n\n";
let files = vec![make_file("out.py", content)];
write_scaffold_files_with_overwrite(&files, base, true).expect("write ok");
let written = std::fs::read_to_string(base.join("out.py")).expect("read ok");
assert_eq!(
written, "line one\nline two\n",
"trailing whitespace must be stripped and single newline ensured"
);
}
#[test]
fn test_scaffold_write_adds_missing_trailing_newline() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let files = vec![make_file("out.gleam", "pub fn main() {}")];
write_scaffold_files_with_overwrite(&files, base, true).expect("write ok");
let written = std::fs::read_to_string(base.join("out.gleam")).expect("read ok");
assert!(
written.ends_with('\n'),
"file must end with newline, got: {:?}",
written
);
}
#[test]
fn test_scaffold_write_does_not_add_double_trailing_newline() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let files = vec![make_file("out.zig", "const x = 1;\n")];
write_scaffold_files_with_overwrite(&files, base, true).expect("write ok");
let written = std::fs::read_to_string(base.join("out.zig")).expect("read ok");
assert!(!written.ends_with("\n\n"), "must not have double trailing newline");
assert!(written.ends_with('\n'));
}
#[test]
fn test_normalize_content_strips_trailing_whitespace_when_rustfmt_fails() {
let path = PathBuf::from("packages/r/src/rust/src/lib.rs");
let content = "extendr_module! {\n fn convert(\n \n title: String = \"\",\n );\n}\n";
let normalized = normalize_content(&path, content);
for (i, line) in normalized.lines().enumerate() {
assert_eq!(
line.trim_end(),
line,
"line {i} has trailing whitespace after normalize: {line:?}"
);
}
assert!(normalized.ends_with('\n'), "must end with newline");
}
#[test]
fn test_sweep_orphans_removes_only_alef_marked_files_outside_keep_set() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let nested = base.join("e2e/elixir/test");
std::fs::create_dir_all(&nested).expect("mkdir");
let alef_marker = "# This file is auto-generated by alef — DO NOT EDIT.\n# alef:hash:abc\n";
let kept = nested.join("keep_test.exs");
let orphan = nested.join("orphan_test.exs");
let user_owned = nested.join("user_helper.exs");
std::fs::write(&kept, format!("{alef_marker}defmodule Keep do\nend\n")).unwrap();
std::fs::write(&orphan, format!("{alef_marker}defmodule Orphan do\nend\n")).unwrap();
std::fs::write(&user_owned, "defmodule UserHelper do\nend\n").unwrap();
let mut keep = std::collections::HashSet::new();
keep.insert(kept.clone());
let removed = sweep_orphans(&[base.to_path_buf()], &keep).expect("sweep ok");
assert_eq!(removed, 1, "should remove exactly one orphan");
assert!(kept.exists(), "kept alef-marked file must remain");
assert!(!orphan.exists(), "orphan alef-marked file must be removed");
assert!(user_owned.exists(), "user-owned (no marker) file must remain");
}
#[test]
fn test_sweep_orphans_skips_dependency_directories() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let alef_marker = "// auto-generated by alef\n// alef:hash:def\n";
for skip_dir in ["target", "node_modules", "_build", "vendor"] {
let nested = base.join(skip_dir).join("nested");
std::fs::create_dir_all(&nested).expect("mkdir");
std::fs::write(nested.join("orphan.rs"), alef_marker).unwrap();
}
let keep: std::collections::HashSet<std::path::PathBuf> = std::collections::HashSet::new();
let removed = sweep_orphans(&[base.to_path_buf()], &keep).expect("sweep ok");
assert_eq!(removed, 0, "must not descend into dependency directories");
}
#[test]
fn sweep_orphans_preserves_loose_marker_file_without_hash() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let include_dir = base.join("packages/go/include");
std::fs::create_dir_all(&include_dir).expect("mkdir");
let vendored = include_dir.join("sample_crawler.h");
std::fs::write(
&vendored,
"// DO NOT EDIT — vendored cgo header\n#ifndef FOO_H\n#define FOO_H\n\ntypedef void CrawlEngine;\n\n#endif\n",
)
.unwrap();
let keep: std::collections::HashSet<std::path::PathBuf> = std::collections::HashSet::new();
let removed = sweep_orphans(&[base.to_path_buf()], &keep).expect("sweep ok");
assert_eq!(removed, 0, "vendored file without alef:hash must not be deleted");
assert!(vendored.exists(), "vendored cgo header must survive sweep_orphans");
}
#[test]
fn sweep_orphans_removes_file_with_alef_hash() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let out_dir = base.join("e2e/rust/src");
std::fs::create_dir_all(&out_dir).expect("mkdir");
const HASH: &str = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
let alef_file = out_dir.join("lib.rs");
std::fs::write(
&alef_file,
format!(
"// alef:hash:{HASH}\n// This file is auto-generated by alef — DO NOT EDIT.\npub fn hello() {{}}\n"
),
)
.unwrap();
let keep: std::collections::HashSet<std::path::PathBuf> = std::collections::HashSet::new();
let removed = sweep_orphans(&[base.to_path_buf()], &keep).expect("sweep ok");
assert_eq!(removed, 1, "alef-owned file not in keep set must be deleted");
assert!(!alef_file.exists(), "alef:hash file must be removed by sweep_orphans");
}
#[test]
fn test_collect_alef_headered_paths_finds_headered_files() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let lang_dir = base.join("python");
std::fs::create_dir_all(&lang_dir).expect("mkdir");
let alef_marker = "# This file is auto-generated by alef — DO NOT EDIT.\n# alef:hash:abc123\nprint('hello')\n";
let user_file = "print('user code')\n";
let headered = lang_dir.join("test_chat.py");
let plain = lang_dir.join("conftest.py");
std::fs::write(&headered, alef_marker).unwrap();
std::fs::write(&plain, user_file).unwrap();
let collected = collect_alef_headered_paths(base);
assert!(collected.contains(&headered), "alef-headered file must be collected");
assert!(!collected.contains(&plain), "user-owned file must not be collected");
}
#[test]
fn test_collect_alef_headered_paths_missing_root_returns_empty() {
let paths = collect_alef_headered_paths(std::path::Path::new("/nonexistent/test_apps"));
assert!(paths.is_empty(), "missing root must yield empty set");
}
#[test]
fn test_finalize_hashes_embeds_inputs_hash_not_content_hash() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let content_before_format = "// This file is auto-generated by alef — DO NOT EDIT.\nfn hello() {}\n";
let file_path = base.join("lib.rs");
std::fs::write(&file_path, content_before_format).expect("write pre-format content");
let content_after_format = "// This file is auto-generated by alef — DO NOT EDIT.\nfn hello() {}\n\n";
std::fs::write(&file_path, content_after_format).expect("write post-format content");
let sources_hash = "deadbeef";
let alef_toml_bytes = b"[workspace]\nlanguages = [\"rust\"]\n";
let mut paths = std::collections::HashSet::new();
paths.insert(file_path.clone());
finalize_hashes(&paths, sources_hash, alef_toml_bytes).expect("finalize ok");
let finalised = std::fs::read_to_string(&file_path).expect("read finalised");
let embedded = crate::core::hash::extract_hash(&finalised).expect("hash must be present");
let expected = crate::core::hash::compute_inputs_hash(sources_hash, alef_toml_bytes);
assert_eq!(
embedded, expected,
"embedded hash must equal compute_inputs_hash, not a content-derived hash"
);
let reformatted = format!("{content_after_format}\n// formatter added this line\n");
std::fs::write(&file_path, &reformatted).expect("simulate post-finalize formatter rewrite");
let after_reformat = std::fs::read_to_string(&file_path).expect("read after reformat");
let _still_embedded = crate::core::hash::extract_hash(&after_reformat);
assert_eq!(
crate::core::hash::compute_inputs_hash(sources_hash, alef_toml_bytes),
expected,
"inputs hash must be stable across formatter rewrites"
);
}
#[test]
fn test_finalize_hashes_is_idempotent_with_inputs_hash() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let content = "// This file is auto-generated by alef — DO NOT EDIT.\nfn hello() {}\n";
let file_path = base.join("lib.rs");
std::fs::write(&file_path, content).expect("write initial content");
let sources_hash = "sources";
let alef_toml_bytes = b"[workspace]\nlanguages = [\"rust\"]\n";
let mut paths = std::collections::HashSet::new();
paths.insert(file_path.clone());
let n1 = finalize_hashes(&paths, sources_hash, alef_toml_bytes).expect("first finalize");
assert_eq!(n1, 1, "first finalize must write the hash line");
let n2 = finalize_hashes(&paths, sources_hash, alef_toml_bytes).expect("second finalize");
assert_eq!(n2, 0, "second finalize must be a no-op (same inputs hash)");
}
#[test]
fn test_finalize_hashes_non_rust_file_gets_inputs_hash() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let gofmt_output = concat!(
"// This file is auto-generated by alef — DO NOT EDIT.\n",
"package foo\n",
"\n",
"\n",
"func Hello() {}\n",
);
let file_path = base.join("binding.go");
std::fs::write(&file_path, gofmt_output).expect("write gofmt output");
let sources_hash = "deadbeef";
let alef_toml_bytes = b"[workspace]\nlanguages = [\"go\"]\n";
let mut paths = std::collections::HashSet::new();
paths.insert(file_path.clone());
finalize_hashes(&paths, sources_hash, alef_toml_bytes).expect("finalize ok");
let finalised = std::fs::read_to_string(&file_path).expect("read finalised");
let embedded = crate::core::hash::extract_hash(&finalised).expect("hash must be present");
let expected = crate::core::hash::compute_inputs_hash(sources_hash, alef_toml_bytes);
assert_eq!(
embedded, expected,
"embedded hash must equal compute_inputs_hash for Go files"
);
let stripped = crate::core::hash::strip_hash_line(&finalised);
assert!(
stripped.contains("\n\n\n"),
"two consecutive blank lines must survive finalize_hashes: got:\n{stripped:?}"
);
}
#[test]
fn test_finalize_hashes_recognizes_generated_by_alef_header() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let swift_content =
"// Generated by alef. Do not edit by hand.\n// swift-format-ignore-file\n\nimport Foundation\n";
let file_path = base.join("Helpers.swift");
std::fs::write(&file_path, swift_content).expect("write swift content");
let sources_hash = "deadbeef";
let alef_toml_bytes = b"[workspace]\nlanguages = [\"swift\"]\n";
let mut paths = std::collections::HashSet::new();
paths.insert(file_path.clone());
let updated = finalize_hashes(&paths, sources_hash, alef_toml_bytes).expect("finalize ok");
assert_eq!(
updated, 1,
"finalize_hashes must process files with 'Generated by alef' header"
);
let finalised = std::fs::read_to_string(&file_path).expect("read finalised");
let embedded = crate::core::hash::extract_hash(&finalised).expect("hash must be present");
let expected = crate::core::hash::compute_inputs_hash(sources_hash, alef_toml_bytes);
assert_eq!(
embedded, expected,
"embedded hash must equal compute_inputs_hash for Swift files with 'Generated by alef' header"
);
}
#[test]
fn readme_overwrite_false_preserves_existing_content_producing_divergence() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let padded_content = "# My README\n\n| Document | Size |\n| ------------------- | ----- |\n| Lists (Timeline) | 129KB |\n";
std::fs::write(base.join("README.md"), padded_content).expect("write padded README");
let compact_content = "# My README\n\n| Document | Size |\n|----------|------|\n| Lists (Timeline) | 129KB |\n";
let files = vec![make_file("README.md", compact_content)];
write_scaffold_files_with_overwrite(&files, base, false).expect("write ok (overwrite=false)");
let after_false = std::fs::read_to_string(base.join("README.md")).expect("read");
assert_eq!(
after_false, padded_content,
"overwrite=false must not touch an existing README — padded content preserved (bug state)"
);
write_scaffold_files_with_overwrite(&files, base, true).expect("write ok (overwrite=true)");
let after_true = std::fs::read_to_string(base.join("README.md")).expect("read");
assert!(
after_true.contains("|----------|"),
"overwrite=true must write compact-separator content, got:\n{after_true}"
);
assert!(
!after_true.contains("| ------------------- |"),
"overwrite=true must NOT preserve rumdl-fmt-padded separators, got:\n{after_true}"
);
assert_eq!(
after_true,
normalize_content(&std::path::PathBuf::from("README.md"), compact_content),
"alef readme and alef all must produce identical on-disk bytes for README files"
);
}
#[test]
fn seed_file_with_generated_header_false_is_preserved_on_overwrite_false() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let original = "# hand-crafted\n* text=auto eol=lf\n";
std::fs::write(base.join(".gitattributes"), original).expect("write original");
let generated = GeneratedFile {
path: std::path::PathBuf::from(".gitattributes"),
content: "# Generated by alef scaffold.\ne2e/** linguist-generated=true\n".to_owned(),
generated_header: false,
};
let count = write_scaffold_files_with_overwrite(&[generated], base, false).expect("write ok");
assert_eq!(
count, 0,
"overwrite=false must not write any file when seed already exists"
);
let after = std::fs::read_to_string(base.join(".gitattributes")).expect("read");
assert_eq!(
after, original,
"overwrite=false must not touch an existing seed file (generated_header: false)"
);
}
#[test]
fn test_detect_crate_edition_reads_from_cargo_toml() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let cargo_toml = "[package]\nname = \"my-crate\"\nversion = \"0.1.0\"\nedition = \"2021\"\n";
std::fs::write(base.join("Cargo.toml"), cargo_toml).expect("write Cargo.toml");
let src = base.join("src").join("lib.rs");
std::fs::create_dir_all(src.parent().unwrap()).expect("mkdir src");
let edition = detect_crate_edition(&src);
assert_eq!(edition, "2021", "should detect edition 2021 from Cargo.toml");
}
#[test]
fn test_detect_crate_edition_defaults_to_2024_when_no_cargo_toml() {
let dir = tempfile::tempdir().expect("tempdir");
let orphan = dir.path().join("orphan.rs");
let edition = detect_crate_edition(&orphan);
assert_eq!(edition, "2024", "should default to 2024 when no Cargo.toml found");
}
#[test]
fn test_detect_crate_edition_defaults_to_2024_when_edition_absent_from_cargo_toml() {
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
std::fs::write(
base.join("Cargo.toml"),
"[package]\nname = \"no-edition-crate\"\nversion = \"0.1.0\"\n",
)
.expect("write Cargo.toml");
let src = base.join("lib.rs");
let edition = detect_crate_edition(&src);
assert_eq!(edition, "2024", "should default to 2024 when edition field absent");
}
#[test]
fn test_parse_package_edition_extracts_value() {
let toml = "[package]\nname = \"x\"\nedition = \"2021\"\n";
assert_eq!(parse_package_edition(toml).as_deref(), Some("2021"));
}
#[test]
fn test_parse_package_edition_ignores_other_sections() {
let toml = "[workspace]\nedition = \"2021\"\n[package]\nname = \"x\"\n";
assert_eq!(parse_package_edition(toml), None);
}
#[cfg(unix)]
#[test]
fn test_scaffold_write_sets_executable_bit_for_shebang_files() {
use std::os::unix::fs::PermissionsExt;
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let shebang_content = "#!/usr/bin/env bash\nset -euo pipefail\necho hello\n";
let file = GeneratedFile {
path: std::path::PathBuf::from("run_tests.sh"),
content: shebang_content.to_owned(),
generated_header: false,
};
write_scaffold_files_with_overwrite(&[file], base, true).expect("write ok");
let path = base.join("run_tests.sh");
let metadata = std::fs::metadata(&path).expect("metadata");
let mode = metadata.permissions().mode();
assert!(
mode & 0o100 != 0,
"shebang file must have owner-executable bit set, got mode {mode:#o}"
);
}
#[cfg(unix)]
#[test]
fn test_scaffold_write_does_not_set_executable_bit_for_non_shebang_files() {
use std::os::unix::fs::PermissionsExt;
let dir = tempfile::tempdir().expect("tempdir");
let base = dir.path();
let plain_content = "# not a shebang\nsome content\n";
let file = GeneratedFile {
path: std::path::PathBuf::from("plain.sh"),
content: plain_content.to_owned(),
generated_header: false,
};
write_scaffold_files_with_overwrite(&[file], base, true).expect("write ok");
let path = base.join("plain.sh");
let metadata = std::fs::metadata(&path).expect("metadata");
let mode = metadata.permissions().mode();
assert!(
mode & 0o111 == 0,
"non-shebang file must not have any executable bit set, got mode {mode:#o}"
);
}
}