const HASH_PREFIX: &str = "alef:hash:";
const HEADER_BODY: &str = "\
This file is auto-generated by alef — DO NOT EDIT.
To regenerate: alef generate
To verify freshness: alef verify --exit-code
Issues & docs: https://github.com/kreuzberg-dev/alef";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CommentStyle {
DoubleSlash,
Hash,
Block,
}
pub fn header(style: CommentStyle) -> String {
match style {
CommentStyle::DoubleSlash => HEADER_BODY.lines().map(|l| format!("// {l}\n")).collect(),
CommentStyle::Hash => HEADER_BODY.lines().map(|l| format!("# {l}\n")).collect(),
CommentStyle::Block => {
let mut out = String::from("/*\n");
for line in HEADER_BODY.lines() {
out.push_str(&format!(" * {line}\n"));
}
out.push_str(" */\n");
out
}
}
}
const HEADER_MARKER: &str = "auto-generated by alef";
pub fn hash_content(content: &str) -> String {
blake3::hash(content.as_bytes()).to_hex().to_string()
}
pub fn compute_sources_hash(sources: &[std::path::PathBuf]) -> std::io::Result<String> {
let mut hasher = blake3::Hasher::new();
let mut sorted: Vec<&std::path::PathBuf> = sources.iter().collect();
sorted.sort();
for source in sorted {
let content = std::fs::read(source)?;
hasher.update(b"src\0");
hasher.update(source.to_string_lossy().as_bytes());
hasher.update(b"\0");
hasher.update(&content);
}
Ok(hasher.finalize().to_hex().to_string())
}
pub fn compute_crate_sources_hash(crate_cfg: &crate::config::resolved::ResolvedCrateConfig) -> std::io::Result<String> {
let mut all_sources: Vec<&std::path::PathBuf> = Vec::new();
for src in &crate_cfg.sources {
all_sources.push(src);
}
for sc in &crate_cfg.source_crates {
for src in &sc.sources {
all_sources.push(src);
}
}
all_sources.sort();
all_sources.dedup();
let mut hasher = blake3::Hasher::new();
for source in all_sources {
let content = std::fs::read(source)?;
hasher.update(b"src\0");
hasher.update(source.to_string_lossy().as_bytes());
hasher.update(b"\0");
hasher.update(&content);
}
Ok(hasher.finalize().to_hex().to_string())
}
pub fn compute_file_hash(sources_hash: &str, content: &str) -> String {
let stripped = strip_hash_line(content);
let mut hasher = blake3::Hasher::new();
hasher.update(b"sources\0");
hasher.update(sources_hash.as_bytes());
hasher.update(b"\0content\0");
hasher.update(stripped.as_bytes());
hasher.finalize().to_hex().to_string()
}
pub fn inject_hash_line(content: &str, hash: &str) -> String {
let mut result = String::with_capacity(content.len() + 80);
let mut injected = false;
for (i, line) in content.lines().enumerate() {
result.push_str(line);
result.push('\n');
if !injected && i < 10 && line.contains(HEADER_MARKER) {
let trimmed = line.trim();
let hash_line = if trimmed.starts_with("<!--") {
format!("<!-- {HASH_PREFIX}{hash} -->")
} else if trimmed.starts_with("//") {
format!("// {HASH_PREFIX}{hash}")
} else if trimmed.starts_with('#') {
format!("# {HASH_PREFIX}{hash}")
} else if trimmed.starts_with("/*") || trimmed.starts_with(" *") || trimmed.ends_with("*/") {
format!(" * {HASH_PREFIX}{hash}")
} else {
format!("// {HASH_PREFIX}{hash}")
};
result.push_str(&hash_line);
result.push('\n');
injected = true;
}
}
if !content.ends_with('\n') && result.ends_with('\n') {
result.pop();
}
result
}
pub fn extract_hash(content: &str) -> Option<String> {
for (i, line) in content.lines().enumerate() {
if i >= 10 {
break;
}
if let Some(pos) = line.find(HASH_PREFIX) {
let rest = &line[pos + HASH_PREFIX.len()..];
let hex = rest.trim().trim_end_matches("*/").trim_end_matches("-->").trim();
if !hex.is_empty() {
return Some(hex.to_string());
}
}
}
None
}
pub fn strip_hash_line(content: &str) -> String {
let mut result = String::with_capacity(content.len());
for line in content.lines() {
if line.contains(HASH_PREFIX) {
continue;
}
result.push_str(line);
result.push('\n');
}
if !content.ends_with('\n') && result.ends_with('\n') {
result.pop();
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_header_double_slash() {
let h = header(CommentStyle::DoubleSlash);
assert!(h.contains("// This file is auto-generated by alef"));
assert!(h.contains("// Issues & docs: https://github.com/kreuzberg-dev/alef"));
}
#[test]
fn test_header_hash() {
let h = header(CommentStyle::Hash);
assert!(h.contains("# This file is auto-generated by alef"));
}
#[test]
fn test_header_block() {
let h = header(CommentStyle::Block);
assert!(h.starts_with("/*\n"));
assert!(h.contains(" * This file is auto-generated by alef"));
assert!(h.ends_with(" */\n"));
}
#[test]
fn test_inject_and_extract_rust() {
let h = header(CommentStyle::DoubleSlash);
let content = format!("{h}use foo;\n");
let hash = hash_content(&content);
let injected = inject_hash_line(&content, &hash);
assert!(injected.contains(HASH_PREFIX));
assert_eq!(extract_hash(&injected), Some(hash));
}
#[test]
fn test_inject_and_extract_python() {
let h = header(CommentStyle::Hash);
let content = format!("{h}import foo\n");
let hash = hash_content(&content);
let injected = inject_hash_line(&content, &hash);
assert!(injected.contains(&format!("# {HASH_PREFIX}")));
assert_eq!(extract_hash(&injected), Some(hash));
}
#[test]
fn test_inject_and_extract_c_block() {
let h = header(CommentStyle::Block);
let content = format!("{h}#include <stdio.h>\n");
let hash = hash_content(&content);
let injected = inject_hash_line(&content, &hash);
assert!(injected.contains(HASH_PREFIX));
assert_eq!(extract_hash(&injected), Some(hash));
}
#[test]
fn test_inject_php_line2() {
let h = header(CommentStyle::DoubleSlash);
let content = format!("<?php\n{h}namespace Foo;\n");
let hash = hash_content(&content);
let injected = inject_hash_line(&content, &hash);
let lines: Vec<&str> = injected.lines().collect();
assert_eq!(lines[0], "<?php");
assert!(lines[1].contains(HEADER_MARKER));
assert!(lines.iter().any(|l| l.contains(HASH_PREFIX)));
assert_eq!(extract_hash(&injected), Some(hash));
}
#[test]
fn test_no_header_returns_unchanged() {
let content = "fn main() {}\n";
let injected = inject_hash_line(content, "abc123");
assert_eq!(injected, content);
assert_eq!(extract_hash(&injected), None);
}
#[test]
fn test_strip_hash_line() {
let content = "// auto-generated by alef\n// alef:hash:abc123\nuse foo;\n";
let stripped = strip_hash_line(content);
assert_eq!(stripped, "// auto-generated by alef\nuse foo;\n");
}
#[test]
fn test_roundtrip() {
let h = header(CommentStyle::Hash);
let original = format!("{h}import sys\n");
let hash = hash_content(&original);
let injected = inject_hash_line(&original, &hash);
let stripped = strip_hash_line(&injected);
assert_eq!(stripped, original);
assert_eq!(hash_content(&stripped), hash);
}
use std::path::{Path, PathBuf};
use tempfile::tempdir;
fn write_file(dir: &Path, name: &str, content: &str) -> PathBuf {
let path = dir.join(name);
std::fs::write(&path, content).unwrap();
path
}
#[test]
fn sources_hash_changes_when_path_changes_even_if_content_same() {
let dir = tempdir().unwrap();
let s_a = write_file(dir.path(), "a.rs", "fn a() {}");
std::fs::create_dir_all(dir.path().join("moved")).unwrap();
let s_b = write_file(dir.path(), "moved/a.rs", "fn a() {}");
let h_a = compute_sources_hash(&[s_a]).unwrap();
let h_b = compute_sources_hash(&[s_b]).unwrap();
assert_ne!(
h_a, h_b,
"same content at a different path can produce different IR (rust_path differs)"
);
}
#[test]
fn sources_hash_errors_on_missing_source() {
let dir = tempdir().unwrap();
let bogus = dir.path().join("does-not-exist.rs");
assert!(compute_sources_hash(&[bogus]).is_err());
}
#[test]
fn sources_hash_stable_across_runs() {
let dir = tempdir().unwrap();
let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
let sources = vec![s1, s2];
let h1 = compute_sources_hash(&sources).unwrap();
let h2 = compute_sources_hash(&sources).unwrap();
assert_eq!(h1, h2);
}
#[test]
fn sources_hash_path_order_independent() {
let dir = tempdir().unwrap();
let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
let h_forward = compute_sources_hash(&[s1.clone(), s2.clone()]).unwrap();
let h_reverse = compute_sources_hash(&[s2, s1]).unwrap();
assert_eq!(h_forward, h_reverse);
}
#[test]
fn sources_hash_changes_with_content() {
let dir = tempdir().unwrap();
let s = write_file(dir.path(), "a.rs", "fn a() {}");
let h_before = compute_sources_hash(std::slice::from_ref(&s)).unwrap();
std::fs::write(&s, "fn a() { let _ = 1; }").unwrap();
let h_after = compute_sources_hash(&[s]).unwrap();
assert_ne!(h_before, h_after);
}
#[test]
fn file_hash_idempotent_under_strip_hash_line() {
let sources_hash = "abc123";
let bare = "// auto-generated by alef\nfn body() {}\n";
let with_line = "// auto-generated by alef\n// alef:hash:deadbeef\nfn body() {}\n";
let h1 = compute_file_hash(sources_hash, bare);
let h2 = compute_file_hash(sources_hash, with_line);
assert_eq!(h1, h2, "hash must ignore an existing alef:hash: line");
}
#[test]
fn file_hash_changes_when_sources_change() {
let content = "// auto-generated by alef\nfn body() {}\n";
let h_a = compute_file_hash("sources_a", content);
let h_b = compute_file_hash("sources_b", content);
assert_ne!(h_a, h_b);
}
#[test]
fn file_hash_changes_when_content_changes() {
let sources_hash = "abc123";
let h_a = compute_file_hash(sources_hash, "fn a() {}\n");
let h_b = compute_file_hash(sources_hash, "fn b() {}\n");
assert_ne!(h_a, h_b);
}
#[test]
fn file_hash_independent_of_alef_version() {
let h = compute_file_hash("sources_hash", "fn a() {}\n");
assert_eq!(h.len(), 64, "blake3 hex output is 64 chars");
}
#[test]
fn crate_sources_hash_differs_across_crates_with_disjoint_sources() {
use crate::config::resolved::ResolvedCrateConfig;
let dir = tempdir().unwrap();
let a = write_file(dir.path(), "a.rs", "fn a() {}");
let b = write_file(dir.path(), "b.rs", "fn b() {}");
let make_cfg = |name: &str, sources: Vec<std::path::PathBuf>| ResolvedCrateConfig {
name: name.to_string(),
sources,
source_crates: vec![],
version_from: "Cargo.toml".to_string(),
core_import: None,
workspace_root: None,
skip_core_import: false,
error_type: None,
error_constructor: None,
features: vec![],
path_mappings: Default::default(),
extra_dependencies: Default::default(),
auto_path_mappings: true,
languages: vec![],
python: None,
node: None,
ruby: None,
php: None,
elixir: None,
wasm: None,
ffi: None,
gleam: None,
go: None,
java: None,
dart: None,
kotlin: None,
swift: None,
csharp: None,
r: None,
zig: None,
exclude: Default::default(),
include: Default::default(),
output_paths: Default::default(),
explicit_output: Default::default(),
lint: Default::default(),
test: Default::default(),
setup: Default::default(),
update: Default::default(),
clean: Default::default(),
build_commands: Default::default(),
generate: Default::default(),
generate_overrides: Default::default(),
format: Default::default(),
format_overrides: Default::default(),
dto: Default::default(),
tools: Default::default(),
opaque_types: Default::default(),
sync: None,
publish: None,
e2e: None,
adapters: vec![],
trait_bridges: vec![],
scaffold: None,
readme: None,
custom_files: Default::default(),
custom_modules: Default::default(),
custom_registrations: Default::default(),
};
let cfg_a = make_cfg("alpha", vec![a]);
let cfg_b = make_cfg("beta", vec![b]);
let hash_a = compute_crate_sources_hash(&cfg_a).unwrap();
let hash_b = compute_crate_sources_hash(&cfg_b).unwrap();
assert_ne!(
hash_a, hash_b,
"crates with disjoint sources must produce different hashes"
);
}
#[test]
fn crate_sources_hash_includes_source_crates() {
use crate::config::{SourceCrate, resolved::ResolvedCrateConfig};
let dir = tempdir().unwrap();
let a = write_file(dir.path(), "a.rs", "fn a() {}");
let b = write_file(dir.path(), "b.rs", "fn b() {}");
let make_cfg =
|sources: Vec<std::path::PathBuf>, source_crate_sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
let source_crates = if source_crate_sources.is_empty() {
vec![]
} else {
vec![SourceCrate {
name: "extra-crate".to_string(),
sources: source_crate_sources,
}]
};
ResolvedCrateConfig {
name: "test".to_string(),
sources,
source_crates,
version_from: "Cargo.toml".to_string(),
core_import: None,
workspace_root: None,
skip_core_import: false,
error_type: None,
error_constructor: None,
features: vec![],
path_mappings: Default::default(),
extra_dependencies: Default::default(),
auto_path_mappings: true,
languages: vec![],
python: None,
node: None,
ruby: None,
php: None,
elixir: None,
wasm: None,
ffi: None,
gleam: None,
go: None,
java: None,
dart: None,
kotlin: None,
swift: None,
csharp: None,
r: None,
zig: None,
exclude: Default::default(),
include: Default::default(),
output_paths: Default::default(),
explicit_output: Default::default(),
lint: Default::default(),
test: Default::default(),
setup: Default::default(),
update: Default::default(),
clean: Default::default(),
build_commands: Default::default(),
generate: Default::default(),
generate_overrides: Default::default(),
format: Default::default(),
format_overrides: Default::default(),
dto: Default::default(),
tools: Default::default(),
opaque_types: Default::default(),
sync: None,
publish: None,
e2e: None,
adapters: vec![],
trait_bridges: vec![],
scaffold: None,
readme: None,
custom_files: Default::default(),
custom_modules: Default::default(),
custom_registrations: Default::default(),
}
};
let cfg_without_extra = make_cfg(vec![a.clone()], vec![]);
let cfg_with_extra = make_cfg(vec![a.clone()], vec![b.clone()]);
let hash_without = compute_crate_sources_hash(&cfg_without_extra).unwrap();
let hash_with = compute_crate_sources_hash(&cfg_with_extra).unwrap();
assert_ne!(
hash_without, hash_with,
"adding a source_crate source file must change the hash"
);
}
#[test]
fn compute_crate_sources_hash_dedupes_overlapping_paths() {
use crate::config::{SourceCrate, resolved::ResolvedCrateConfig};
let dir = tempdir().unwrap();
let a = write_file(dir.path(), "a.rs", "fn a() {}");
let b = write_file(dir.path(), "b.rs", "fn b() {}");
let make_cfg =
|sources: Vec<std::path::PathBuf>, source_crate_sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
let source_crates = if source_crate_sources.is_empty() {
vec![]
} else {
vec![SourceCrate {
name: "extra-crate".to_string(),
sources: source_crate_sources,
}]
};
ResolvedCrateConfig {
name: "test".to_string(),
sources,
source_crates,
version_from: "Cargo.toml".to_string(),
core_import: None,
workspace_root: None,
skip_core_import: false,
error_type: None,
error_constructor: None,
features: vec![],
path_mappings: Default::default(),
extra_dependencies: Default::default(),
auto_path_mappings: true,
languages: vec![],
python: None,
node: None,
ruby: None,
php: None,
elixir: None,
wasm: None,
ffi: None,
gleam: None,
go: None,
java: None,
dart: None,
kotlin: None,
swift: None,
csharp: None,
r: None,
zig: None,
exclude: Default::default(),
include: Default::default(),
output_paths: Default::default(),
explicit_output: Default::default(),
lint: Default::default(),
test: Default::default(),
setup: Default::default(),
update: Default::default(),
clean: Default::default(),
build_commands: Default::default(),
generate: Default::default(),
generate_overrides: Default::default(),
format: Default::default(),
format_overrides: Default::default(),
dto: Default::default(),
tools: Default::default(),
opaque_types: Default::default(),
sync: None,
publish: None,
e2e: None,
adapters: vec![],
trait_bridges: vec![],
scaffold: None,
readme: None,
custom_files: Default::default(),
custom_modules: Default::default(),
custom_registrations: Default::default(),
}
};
let cfg_with_dupes = make_cfg(vec![a.clone(), a.clone(), b.clone()], vec![a.clone()]);
let cfg_unique = make_cfg(vec![a.clone(), b.clone()], vec![]);
let hash_dup = compute_crate_sources_hash(&cfg_with_dupes).unwrap();
let hash_unique = compute_crate_sources_hash(&cfg_unique).unwrap();
assert_eq!(
hash_dup, hash_unique,
"duplicate source paths must not affect the per-crate sources hash"
);
}
#[test]
fn compute_crate_sources_hash_is_order_independent() {
use crate::config::resolved::ResolvedCrateConfig;
let dir = tempdir().unwrap();
let a = write_file(dir.path(), "a.rs", "fn a() {}");
let b = write_file(dir.path(), "b.rs", "fn b() {}");
let c = write_file(dir.path(), "c.rs", "fn c() {}");
let make_cfg = |sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
ResolvedCrateConfig {
name: "test".to_string(),
sources,
source_crates: vec![],
version_from: "Cargo.toml".to_string(),
core_import: None,
workspace_root: None,
skip_core_import: false,
error_type: None,
error_constructor: None,
features: vec![],
path_mappings: Default::default(),
extra_dependencies: Default::default(),
auto_path_mappings: true,
languages: vec![],
python: None,
node: None,
ruby: None,
php: None,
elixir: None,
wasm: None,
ffi: None,
gleam: None,
go: None,
java: None,
dart: None,
kotlin: None,
swift: None,
csharp: None,
r: None,
zig: None,
exclude: Default::default(),
include: Default::default(),
output_paths: Default::default(),
explicit_output: Default::default(),
lint: Default::default(),
test: Default::default(),
setup: Default::default(),
update: Default::default(),
clean: Default::default(),
build_commands: Default::default(),
generate: Default::default(),
generate_overrides: Default::default(),
format: Default::default(),
format_overrides: Default::default(),
dto: Default::default(),
tools: Default::default(),
opaque_types: Default::default(),
sync: None,
publish: None,
e2e: None,
adapters: vec![],
trait_bridges: vec![],
scaffold: None,
readme: None,
custom_files: Default::default(),
custom_modules: Default::default(),
custom_registrations: Default::default(),
}
};
let cfg1 = make_cfg(vec![a.clone(), b.clone(), c.clone()]);
let cfg2 = make_cfg(vec![c.clone(), a.clone(), b.clone()]);
let cfg3 = make_cfg(vec![b.clone(), c.clone(), a.clone()]);
let h1 = compute_crate_sources_hash(&cfg1).unwrap();
let h2 = compute_crate_sources_hash(&cfg2).unwrap();
let h3 = compute_crate_sources_hash(&cfg3).unwrap();
assert_eq!(h1, h2, "reordering sources must not change the hash");
assert_eq!(h2, h3, "reordering sources must not change the hash");
}
#[test]
fn file_hash_round_trip_via_inject_extract() {
let sources_hash = "abc123";
let raw = "// auto-generated by alef\nfn body() {}\n";
let file_hash = compute_file_hash(sources_hash, raw);
let on_disk = inject_hash_line(raw, &file_hash);
let extracted = extract_hash(&on_disk).expect("hash line should be present");
let recomputed = compute_file_hash(sources_hash, &on_disk);
assert_eq!(extracted, file_hash);
assert_eq!(recomputed, file_hash);
assert_eq!(extracted, recomputed, "verify must reproduce the embedded hash");
}
}