Skip to main content

alef_core/
hash.rs

1//! Content hashing and generated-file headers.
2//!
3//! Every file produced by alef gets a standard header that identifies it as
4//! generated, tells agents/developers how to fix issues, and embeds a blake3
5//! hash so `alef verify` can detect staleness without external state.
6//!
7//! # Hash semantics
8//!
9//! As of alef v0.9.0, the embedded `alef:hash:<hex>` value is an
10//! **input-deterministic** fingerprint produced by [`compute_generation_hash`]:
11//!
12//! ```text
13//! blake3(sorted(rust_source_files) + alef.toml + alef_version)
14//! ```
15//!
16//! Every file emitted by a single `alef generate` run carries the same hash —
17//! it identifies the inputs that produced the run, not the byte-content of the
18//! individual file. `alef verify` recomputes the same input hash and compares
19//! it to the disk hash without inspecting any file body, which makes verify
20//! immune to downstream formatters (rustfmt, rubocop, dotnet format, spotless,
21//! biome, mix format, php-cs-fixer, etc.) reformatting alef-generated content.
22//!
23//! Pre-v0.9.0 alef used per-file output hashing (`hash_content` over the
24//! normalised generated content). That function is still exported for the
25//! handful of callers that hash arbitrary content (IR cache, language hash for
26//! incremental skipping), but it is no longer the verify path.
27
28use std::path::Path;
29
30const HASH_PREFIX: &str = "alef:hash:";
31
32/// The standard header text (without comment delimiters).
33/// Used by [`header`] to produce language-specific comment blocks.
34const HEADER_BODY: &str = "\
35This file is auto-generated by alef — DO NOT EDIT.
36To regenerate: alef generate
37To verify freshness: alef verify --exit-code
38Issues & docs: https://github.com/kreuzberg-dev/alef";
39
40/// Comment style for the generated header.
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum CommentStyle {
43    /// `// line comment`  (Rust, Go, Java, C#, TypeScript, C, PHP)
44    DoubleSlash,
45    /// `# line comment`   (Python, Ruby, Elixir, R, TOML, Shell, Makefile)
46    Hash,
47    /// `/* block comment */` (C headers)
48    Block,
49}
50
51/// Return the standard alef header as a comment block.
52///
53/// ```text
54/// // This file is auto-generated by alef — DO NOT EDIT.
55/// // To regenerate: alef generate
56/// // To verify freshness: alef verify --exit-code
57/// // Issues & docs: https://github.com/kreuzberg-dev/alef
58/// ```
59pub fn header(style: CommentStyle) -> String {
60    match style {
61        CommentStyle::DoubleSlash => HEADER_BODY.lines().map(|l| format!("// {l}\n")).collect(),
62        CommentStyle::Hash => HEADER_BODY.lines().map(|l| format!("# {l}\n")).collect(),
63        CommentStyle::Block => {
64            let mut out = String::from("/*\n");
65            for line in HEADER_BODY.lines() {
66                out.push_str(&format!(" * {line}\n"));
67            }
68            out.push_str(" */\n");
69            out
70        }
71    }
72}
73
74/// The marker string that `inject_hash_line` and `extract_hash` look for.
75/// Every alef-generated header contains this on the first line.
76const HEADER_MARKER: &str = "auto-generated by alef";
77
78/// Blake3 hash of a content string, returned as hex.
79///
80/// Used by the IR / language caches and any caller that needs a hash of an
81/// in-memory string. **Not used for the embedded `alef:hash:` header** — that
82/// is computed once per `alef generate` run by [`compute_generation_hash`].
83pub fn hash_content(content: &str) -> String {
84    blake3::hash(content.as_bytes()).to_hex().to_string()
85}
86
87/// Compute the input-deterministic generation hash that alef embeds into the
88/// header of every generated file.
89///
90/// The hash is a fingerprint of everything that determines the output of
91/// `alef generate`:
92///
93/// - All Rust source files alef parses to build the IR (sorted by path so the
94///   order in `alef.toml`'s `[crate].sources` doesn't matter)
95/// - The contents of `alef.toml` itself (any config change → new hash)
96/// - The alef CLI version (`CARGO_PKG_VERSION` of `alef-cli` at build time)
97///
98/// `alef verify` recomputes this hash and compares it against the
99/// `alef:hash:<hex>` line in each generated file. Because the hash never
100/// touches the generated bytes, downstream formatters (rustfmt, rubocop,
101/// spotless, dotnet format, php-cs-fixer, biome, mix format, …) can reformat
102/// alef-generated files freely without breaking verify.
103///
104/// # Errors
105/// Returns an error if any source file or the config file is missing or
106/// unreadable.
107pub fn compute_generation_hash(
108    sources: &[std::path::PathBuf],
109    config_path: &Path,
110    alef_version: &str,
111) -> std::io::Result<String> {
112    let mut hasher = blake3::Hasher::new();
113
114    // Sort by path so the hash is stable regardless of source-file ordering.
115    let mut sorted: Vec<&std::path::PathBuf> = sources.iter().collect();
116    sorted.sort();
117
118    for source in sorted {
119        let content = std::fs::read(source)?;
120        // Mix the path in too — the same content at a different path can
121        // produce different IR (different `rust_path` on extracted types).
122        hasher.update(b"src\0");
123        hasher.update(source.to_string_lossy().as_bytes());
124        hasher.update(b"\0");
125        hasher.update(&content);
126    }
127
128    let config_content = std::fs::read(config_path)?;
129    hasher.update(b"config\0");
130    hasher.update(&config_content);
131
132    hasher.update(b"alef\0");
133    hasher.update(alef_version.as_bytes());
134
135    Ok(hasher.finalize().to_hex().to_string())
136}
137
138/// Inject an `alef:hash:<hex>` line immediately after the first header marker
139/// line found in the first 10 lines.  The comment syntax is inferred from the
140/// marker line itself.
141///
142/// If no marker line is found, the content is returned unchanged.
143pub fn inject_hash_line(content: &str, hash: &str) -> String {
144    let mut result = String::with_capacity(content.len() + 80);
145    let mut injected = false;
146
147    for (i, line) in content.lines().enumerate() {
148        result.push_str(line);
149        result.push('\n');
150
151        if !injected && i < 10 && line.contains(HEADER_MARKER) {
152            let trimmed = line.trim();
153            let hash_line = if trimmed.starts_with("<!--") {
154                // XML comment: inject hash line as XML comment
155                format!("<!-- {HASH_PREFIX}{hash} -->")
156            } else if trimmed.starts_with("//") {
157                format!("// {HASH_PREFIX}{hash}")
158            } else if trimmed.starts_with('#') {
159                format!("# {HASH_PREFIX}{hash}")
160            } else if trimmed.starts_with("/*") || trimmed.starts_with(" *") || trimmed.ends_with("*/") {
161                format!(" * {HASH_PREFIX}{hash}")
162            } else {
163                format!("// {HASH_PREFIX}{hash}")
164            };
165            result.push_str(&hash_line);
166            result.push('\n');
167            injected = true;
168        }
169    }
170
171    // Preserve original trailing-newline behavior.
172    if !content.ends_with('\n') && result.ends_with('\n') {
173        result.pop();
174    }
175
176    result
177}
178
179/// Extract the hash from an `alef:hash:<hex>` token in the first 10 lines.
180pub fn extract_hash(content: &str) -> Option<String> {
181    for (i, line) in content.lines().enumerate() {
182        if i >= 10 {
183            break;
184        }
185        if let Some(pos) = line.find(HASH_PREFIX) {
186            let rest = &line[pos + HASH_PREFIX.len()..];
187            // Trim trailing comment closers and whitespace.
188            let hex = rest.trim().trim_end_matches("*/").trim_end_matches("-->").trim();
189            if !hex.is_empty() {
190                return Some(hex.to_string());
191            }
192        }
193    }
194    None
195}
196
197/// Strip the `alef:hash:` line from content (for fallback comparison).
198pub fn strip_hash_line(content: &str) -> String {
199    let mut result = String::with_capacity(content.len());
200    for line in content.lines() {
201        if line.contains(HASH_PREFIX) {
202            continue;
203        }
204        result.push_str(line);
205        result.push('\n');
206    }
207    // Preserve original trailing-newline behavior.
208    if !content.ends_with('\n') && result.ends_with('\n') {
209        result.pop();
210    }
211    result
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn test_header_double_slash() {
220        let h = header(CommentStyle::DoubleSlash);
221        assert!(h.contains("// This file is auto-generated by alef"));
222        assert!(h.contains("// Issues & docs: https://github.com/kreuzberg-dev/alef"));
223    }
224
225    #[test]
226    fn test_header_hash() {
227        let h = header(CommentStyle::Hash);
228        assert!(h.contains("# This file is auto-generated by alef"));
229    }
230
231    #[test]
232    fn test_header_block() {
233        let h = header(CommentStyle::Block);
234        assert!(h.starts_with("/*\n"));
235        assert!(h.contains(" * This file is auto-generated by alef"));
236        assert!(h.ends_with(" */\n"));
237    }
238
239    #[test]
240    fn test_inject_and_extract_rust() {
241        let h = header(CommentStyle::DoubleSlash);
242        let content = format!("{h}use foo;\n");
243        let hash = hash_content(&content);
244        let injected = inject_hash_line(&content, &hash);
245        assert!(injected.contains(HASH_PREFIX));
246        assert_eq!(extract_hash(&injected), Some(hash));
247    }
248
249    #[test]
250    fn test_inject_and_extract_python() {
251        let h = header(CommentStyle::Hash);
252        let content = format!("{h}import foo\n");
253        let hash = hash_content(&content);
254        let injected = inject_hash_line(&content, &hash);
255        assert!(injected.contains(&format!("# {HASH_PREFIX}")));
256        assert_eq!(extract_hash(&injected), Some(hash));
257    }
258
259    #[test]
260    fn test_inject_and_extract_c_block() {
261        let h = header(CommentStyle::Block);
262        let content = format!("{h}#include <stdio.h>\n");
263        let hash = hash_content(&content);
264        let injected = inject_hash_line(&content, &hash);
265        assert!(injected.contains(HASH_PREFIX));
266        assert_eq!(extract_hash(&injected), Some(hash));
267    }
268
269    #[test]
270    fn test_inject_php_line2() {
271        let h = header(CommentStyle::DoubleSlash);
272        let content = format!("<?php\n{h}namespace Foo;\n");
273        let hash = hash_content(&content);
274        let injected = inject_hash_line(&content, &hash);
275        let lines: Vec<&str> = injected.lines().collect();
276        assert_eq!(lines[0], "<?php");
277        assert!(lines[1].contains(HEADER_MARKER));
278        assert!(lines.iter().any(|l| l.contains(HASH_PREFIX)));
279        assert_eq!(extract_hash(&injected), Some(hash));
280    }
281
282    #[test]
283    fn test_no_header_returns_unchanged() {
284        let content = "fn main() {}\n";
285        let injected = inject_hash_line(content, "abc123");
286        assert_eq!(injected, content);
287        assert_eq!(extract_hash(&injected), None);
288    }
289
290    #[test]
291    fn test_strip_hash_line() {
292        let content = "// auto-generated by alef\n// alef:hash:abc123\nuse foo;\n";
293        let stripped = strip_hash_line(content);
294        assert_eq!(stripped, "// auto-generated by alef\nuse foo;\n");
295    }
296
297    #[test]
298    fn test_roundtrip() {
299        let h = header(CommentStyle::Hash);
300        let original = format!("{h}import sys\n");
301        let hash = hash_content(&original);
302        let injected = inject_hash_line(&original, &hash);
303        let stripped = strip_hash_line(&injected);
304        assert_eq!(stripped, original);
305        assert_eq!(hash_content(&stripped), hash);
306    }
307
308    // ----- compute_generation_hash tests -----------------------------------
309
310    use std::path::PathBuf;
311    use tempfile::tempdir;
312
313    fn write_file(dir: &Path, name: &str, content: &str) -> PathBuf {
314        let path = dir.join(name);
315        std::fs::write(&path, content).unwrap();
316        path
317    }
318
319    #[test]
320    fn generation_hash_stable_across_runs() {
321        let dir = tempdir().unwrap();
322        let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
323        let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
324        let cfg = write_file(dir.path(), "alef.toml", "name = \"x\"");
325        let sources = vec![s1, s2];
326
327        let h1 = compute_generation_hash(&sources, &cfg, "0.9.0").unwrap();
328        let h2 = compute_generation_hash(&sources, &cfg, "0.9.0").unwrap();
329        assert_eq!(h1, h2, "same inputs must produce same hash");
330    }
331
332    #[test]
333    fn generation_hash_path_order_independent() {
334        let dir = tempdir().unwrap();
335        let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
336        let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
337        let cfg = write_file(dir.path(), "alef.toml", "name = \"x\"");
338
339        let h_forward = compute_generation_hash(&[s1.clone(), s2.clone()], &cfg, "0.9.0").unwrap();
340        let h_reverse = compute_generation_hash(&[s2, s1], &cfg, "0.9.0").unwrap();
341        assert_eq!(h_forward, h_reverse, "source ordering must not affect the hash");
342    }
343
344    #[test]
345    fn generation_hash_changes_when_alef_version_changes() {
346        let dir = tempdir().unwrap();
347        let s = write_file(dir.path(), "a.rs", "fn a() {}");
348        let cfg = write_file(dir.path(), "alef.toml", "name = \"x\"");
349        let sources = [s];
350
351        let h_a = compute_generation_hash(&sources, &cfg, "0.9.0").unwrap();
352        let h_b = compute_generation_hash(&sources, &cfg, "0.9.1").unwrap();
353        assert_ne!(h_a, h_b, "different alef versions must produce different hashes");
354    }
355
356    #[test]
357    fn generation_hash_changes_when_config_changes() {
358        let dir = tempdir().unwrap();
359        let s = write_file(dir.path(), "a.rs", "fn a() {}");
360        let cfg_a = write_file(dir.path(), "alef-a.toml", "name = \"x\"");
361        let cfg_b = write_file(dir.path(), "alef-b.toml", "name = \"y\"");
362        let sources = [s];
363
364        let h_a = compute_generation_hash(&sources, &cfg_a, "0.9.0").unwrap();
365        let h_b = compute_generation_hash(&sources, &cfg_b, "0.9.0").unwrap();
366        assert_ne!(h_a, h_b, "different config must produce different hashes");
367    }
368
369    #[test]
370    fn generation_hash_changes_when_source_content_changes() {
371        let dir = tempdir().unwrap();
372        let s = write_file(dir.path(), "a.rs", "fn a() {}");
373        let cfg = write_file(dir.path(), "alef.toml", "name = \"x\"");
374
375        let h_before = compute_generation_hash(std::slice::from_ref(&s), &cfg, "0.9.0").unwrap();
376        std::fs::write(&s, "fn a() { let _ = 1; }").unwrap();
377        let h_after = compute_generation_hash(&[s], &cfg, "0.9.0").unwrap();
378        assert_ne!(h_before, h_after, "modified source must produce different hash");
379    }
380
381    #[test]
382    fn generation_hash_changes_when_path_changes_even_if_content_same() {
383        let dir = tempdir().unwrap();
384        let s_a = write_file(dir.path(), "a.rs", "fn a() {}");
385        std::fs::create_dir_all(dir.path().join("moved")).unwrap();
386        let s_b = write_file(dir.path(), "moved/a.rs", "fn a() {}");
387        let cfg = write_file(dir.path(), "alef.toml", "name = \"x\"");
388
389        let h_a = compute_generation_hash(&[s_a], &cfg, "0.9.0").unwrap();
390        let h_b = compute_generation_hash(&[s_b], &cfg, "0.9.0").unwrap();
391        assert_ne!(
392            h_a, h_b,
393            "same content at a different path can produce different IR (rust_path differs), so the hash must reflect path"
394        );
395    }
396
397    #[test]
398    fn generation_hash_errors_on_missing_source() {
399        let dir = tempdir().unwrap();
400        let cfg = write_file(dir.path(), "alef.toml", "name = \"x\"");
401        let bogus = dir.path().join("does-not-exist.rs");
402
403        let err = compute_generation_hash(&[bogus], &cfg, "0.9.0");
404        assert!(err.is_err(), "missing source must surface as an error");
405    }
406}