Skip to main content

alef_core/
hash.rs

1//! Content hashing and generated-file headers.
2//!
3//! Every file produced by alef gets a standard header that identifies it as
4//! generated, tells agents/developers how to fix issues, and embeds a blake3
5//! hash so `alef verify` can detect staleness without external state.
6//!
7//! # Hash semantics
8//!
9//! As of alef v0.10.1, the embedded `alef:hash:<hex>` value is a **per-file
10//! source+output fingerprint** produced by [`compute_file_hash`]:
11//!
12//! ```text
13//! blake3(sources_hash || file_content_without_hash_line)
14//! ```
15//!
16//! Where `sources_hash` is [`compute_sources_hash`] over the sorted Rust source
17//! files alef parses to build the IR. The hash deliberately does **not**
18//! include the alef version or `alef.toml`: any input change that affects the
19//! generated bytes is already reflected by hashing the file content itself,
20//! and excluding the alef version makes `alef verify` idempotent across
21//! `alef` upgrades — a CI run on a tagged repo continues to pass after the
22//! alef CLI is bumped, as long as the rust sources and emitted file contents
23//! are unchanged.
24//!
25//! `alef generate` finalises the embedded hash *after* downstream formatters
26//! (rustfmt, rubocop, dotnet format, spotless, oxfmt, mix format, php-cs-fixer,
27//! mix format, …) have run, so the embedded hash describes the actual
28//! on-disk byte-content. `alef verify` reads the file, strips the
29//! `alef:hash:` line, recomputes the same hash, and compares — no
30//! regeneration, no writes.
31//!
32//! Pre-v0.10.1 alef used a single input-deterministic hash that incorporated
33//! the alef CLI version, which forced every consumer repo to re-run
34//! `alef generate` after every alef bump even when nothing else changed.
35
36const HASH_PREFIX: &str = "alef:hash:";
37const DEFAULT_REGENERATE_COMMAND: &str = "alef generate";
38const DEFAULT_VERIFY_COMMAND: &str = "alef verify --exit-code";
39const DEFAULT_ISSUES_URL: &str = "https://github.com/kreuzberg-dev/alef";
40
41/// Comment style for the generated header.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum CommentStyle {
44    /// `// line comment`  (Rust, Go, Java, C#, TypeScript, C, PHP)
45    DoubleSlash,
46    /// `# line comment`   (Python, Ruby, Elixir, R, TOML, Shell, Makefile)
47    Hash,
48    /// `/* block comment */` (C headers)
49    Block,
50}
51
52/// Return the standard alef header as a comment block.
53///
54/// ```text
55/// // This file is auto-generated by alef — DO NOT EDIT.
56/// // To regenerate: alef generate
57/// // To verify freshness: alef verify --exit-code
58/// // Issues & docs: https://github.com/kreuzberg-dev/alef
59/// ```
60pub fn header(style: CommentStyle) -> String {
61    render_header(style, &default_header_body())
62}
63
64/// Return the standard alef header using metadata from a resolved crate config.
65pub fn header_for_config(style: CommentStyle, config: &crate::config::ResolvedCrateConfig) -> String {
66    let header_config = config.scaffold.as_ref().and_then(|s| s.generated_header.as_ref());
67    let body = match header_config {
68        Some(header) => {
69            let regenerate = header
70                .regenerate_command
71                .as_deref()
72                .unwrap_or(DEFAULT_REGENERATE_COMMAND);
73            let verify = header.verify_command.as_deref().unwrap_or(DEFAULT_VERIFY_COMMAND);
74            let issues_url = header.issues_url.as_deref().unwrap_or(DEFAULT_ISSUES_URL);
75            format!(
76                "This file is auto-generated by alef — DO NOT EDIT.\n\
77To regenerate: {regenerate}\n\
78To verify freshness: {verify}\n\
79Issues & docs: {issues_url}"
80            )
81        }
82        None => default_header_body(),
83    };
84    render_header(style, &body)
85}
86
87fn default_header_body() -> String {
88    format!(
89        "This file is auto-generated by alef — DO NOT EDIT.\n\
90To regenerate: {DEFAULT_REGENERATE_COMMAND}\n\
91To verify freshness: {DEFAULT_VERIFY_COMMAND}\n\
92Issues & docs: {DEFAULT_ISSUES_URL}"
93    )
94}
95
96fn render_header(style: CommentStyle, body: &str) -> String {
97    match style {
98        CommentStyle::DoubleSlash => body.lines().map(|l| format!("// {l}\n")).collect(),
99        CommentStyle::Hash => body.lines().map(|l| format!("# {l}\n")).collect(),
100        CommentStyle::Block => {
101            let mut out = String::from("/*\n");
102            for line in body.lines() {
103                out.push_str(&format!(" * {line}\n"));
104            }
105            out.push_str(" */\n");
106            out
107        }
108    }
109}
110
111/// The marker string that `inject_hash_line` and `extract_hash` look for.
112/// Every alef-generated header contains this on the first line.
113const HEADER_MARKER: &str = "auto-generated by alef";
114
115/// Blake3 hash of a content string, returned as hex.
116///
117/// Used by the IR / language caches and any caller that needs a hash of an
118/// in-memory string. **Not used for the embedded `alef:hash:` header** — that
119/// is computed by [`compute_file_hash`].
120pub fn hash_content(content: &str) -> String {
121    blake3::hash(content.as_bytes()).to_hex().to_string()
122}
123
124/// Compute a stable hash over the Rust source files that alef extracts.
125///
126/// This is the "source side" of the per-file verify hash. Sources are sorted
127/// by path so the hash is stable regardless of ordering in
128/// `alef.toml`'s `[crate].sources`. The path is mixed in alongside the
129/// content because the same byte-content at a different path produces
130/// different IR (the `rust_path` on extracted types differs).
131///
132/// Used by [`compute_file_hash`]; not by itself the value embedded in any
133/// file header.
134///
135/// # Errors
136/// Returns an error if any source file is missing or unreadable.
137pub fn compute_sources_hash(sources: &[std::path::PathBuf]) -> std::io::Result<String> {
138    let mut hasher = blake3::Hasher::new();
139    let mut sorted: Vec<&std::path::PathBuf> = sources.iter().collect();
140    sorted.sort();
141    for source in sorted {
142        let content = std::fs::read(source)?;
143        hasher.update(b"src\0");
144        hasher.update(source.to_string_lossy().as_bytes());
145        hasher.update(b"\0");
146        hasher.update(&content);
147    }
148    Ok(hasher.finalize().to_hex().to_string())
149}
150
151/// Compute a stable hex-encoded Blake3 hash over all Rust source files
152/// belonging to a [`crate::config::resolved::ResolvedCrateConfig`].
153///
154/// Returns a hex string so callers can feed the result directly to
155/// [`compute_file_hash`], matching [`compute_sources_hash`]'s return type.
156///
157/// The hash covers the union of:
158/// - `crate_cfg.sources` (direct sources on the crate)
159/// - every `source_crates[*].sources` entry
160///
161/// All paths are sorted before hashing so the result is independent of the
162/// order they appear in `alef.toml`.  The path string is mixed in alongside
163/// the file content because the same byte-content at a different path produces
164/// different IR (the `rust_path` on extracted types differs).
165///
166/// # Phase 3 migration note
167///
168/// Phase 3 callers should migrate from the per-file `compute_sources_hash` to
169/// this function when they have a `ResolvedCrateConfig` available, so that
170/// multi-source-crate workspaces produce a single stable hash across all
171/// contributing source files.
172///
173/// # Errors
174///
175/// Returns an error if any source file is missing or unreadable.
176pub fn compute_crate_sources_hash(crate_cfg: &crate::config::resolved::ResolvedCrateConfig) -> std::io::Result<String> {
177    let mut all_sources: Vec<&std::path::PathBuf> = Vec::new();
178
179    for src in &crate_cfg.sources {
180        all_sources.push(src);
181    }
182    for sc in &crate_cfg.source_crates {
183        for src in &sc.sources {
184            all_sources.push(src);
185        }
186    }
187
188    // Stable sort by path so the hash is order-independent.
189    all_sources.sort();
190    all_sources.dedup();
191
192    let mut hasher = blake3::Hasher::new();
193    for source in all_sources {
194        let content = std::fs::read(source)?;
195        hasher.update(b"src\0");
196        hasher.update(source.to_string_lossy().as_bytes());
197        hasher.update(b"\0");
198        hasher.update(&content);
199    }
200    Ok(hasher.finalize().to_hex().to_string())
201}
202
203/// Compute the per-file verify hash that alef embeds in each generated file.
204///
205/// `sources_hash` comes from [`compute_sources_hash`]. `content` is the file
206/// content; any pre-existing `alef:hash:` line is stripped before hashing so
207/// the function is idempotent — calling it on file content that already has a
208/// hash line returns the same value as calling it on the same content with no
209/// hash line. This makes the verify path symmetric with the generate path:
210///
211/// - **Generate**: write the file, run formatters, then call this with the
212///   on-disk content and inject the result.
213/// - **Verify**: read the file, extract the existing hash line, call this
214///   with the on-disk content, compare.
215pub fn compute_file_hash(sources_hash: &str, content: &str) -> String {
216    let stripped = strip_hash_line(content);
217    let mut hasher = blake3::Hasher::new();
218    hasher.update(b"sources\0");
219    hasher.update(sources_hash.as_bytes());
220    hasher.update(b"\0content\0");
221    hasher.update(stripped.as_bytes());
222    hasher.finalize().to_hex().to_string()
223}
224
225/// Inject an `alef:hash:<hex>` line immediately after the first header marker
226/// line found in the first 10 lines.  The comment syntax is inferred from the
227/// marker line itself.
228///
229/// If no marker line is found, the content is returned unchanged.
230pub fn inject_hash_line(content: &str, hash: &str) -> String {
231    let mut result = String::with_capacity(content.len() + 80);
232    let mut injected = false;
233
234    for (i, line) in content.lines().enumerate() {
235        result.push_str(line);
236        result.push('\n');
237
238        if !injected && i < 10 && line.contains(HEADER_MARKER) {
239            let trimmed = line.trim();
240            let hash_line = if trimmed.starts_with("<!--") {
241                // XML comment: inject hash line as XML comment
242                format!("<!-- {HASH_PREFIX}{hash} -->")
243            } else if trimmed.starts_with("//") {
244                format!("// {HASH_PREFIX}{hash}")
245            } else if trimmed.starts_with('#') {
246                format!("# {HASH_PREFIX}{hash}")
247            } else if trimmed.starts_with("/*") || trimmed.starts_with(" *") || trimmed.ends_with("*/") {
248                format!(" * {HASH_PREFIX}{hash}")
249            } else {
250                format!("// {HASH_PREFIX}{hash}")
251            };
252            result.push_str(&hash_line);
253            result.push('\n');
254            injected = true;
255        }
256    }
257
258    // Preserve original trailing-newline behavior.
259    if !content.ends_with('\n') && result.ends_with('\n') {
260        result.pop();
261    }
262
263    result
264}
265
266/// Extract the hash from an `alef:hash:<hex>` token in the first 10 lines.
267pub fn extract_hash(content: &str) -> Option<String> {
268    for (i, line) in content.lines().enumerate() {
269        if i >= 10 {
270            break;
271        }
272        if let Some(pos) = line.find(HASH_PREFIX) {
273            let rest = &line[pos + HASH_PREFIX.len()..];
274            // Trim trailing comment closers and whitespace.
275            let hex = rest.trim().trim_end_matches("*/").trim_end_matches("-->").trim();
276            if !hex.is_empty() {
277                return Some(hex.to_string());
278            }
279        }
280    }
281    None
282}
283
284/// Strip the `alef:hash:` line from content (for fallback comparison).
285pub fn strip_hash_line(content: &str) -> String {
286    let mut result = String::with_capacity(content.len());
287    for line in content.lines() {
288        if line.contains(HASH_PREFIX) {
289            continue;
290        }
291        result.push_str(line);
292        result.push('\n');
293    }
294    // Preserve original trailing-newline behavior.
295    if !content.ends_with('\n') && result.ends_with('\n') {
296        result.pop();
297    }
298    result
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    #[test]
306    fn test_header_double_slash() {
307        let h = header(CommentStyle::DoubleSlash);
308        assert!(h.contains("// This file is auto-generated by alef"));
309        assert!(h.contains("// Issues & docs: https://github.com/kreuzberg-dev/alef"));
310    }
311
312    #[test]
313    fn test_header_for_config_uses_configured_metadata() {
314        let cfg: crate::config::NewAlefConfig = toml::from_str(
315            r#"
316[workspace]
317languages = ["python"]
318
319[workspace.generated_header]
320issues_url = "https://docs.example.invalid/alef"
321regenerate_command = "task generate"
322verify_command = "task verify"
323
324[[crates]]
325name = "demo"
326sources = ["src/lib.rs"]
327"#,
328        )
329        .unwrap();
330        let resolved = cfg.resolve().unwrap().remove(0);
331
332        let h = header_for_config(CommentStyle::DoubleSlash, &resolved);
333
334        assert!(h.contains("// To regenerate: task generate"));
335        assert!(h.contains("// To verify freshness: task verify"));
336        assert!(h.contains("// Issues & docs: https://docs.example.invalid/alef"));
337    }
338
339    #[test]
340    fn test_header_hash() {
341        let h = header(CommentStyle::Hash);
342        assert!(h.contains("# This file is auto-generated by alef"));
343    }
344
345    #[test]
346    fn test_header_block() {
347        let h = header(CommentStyle::Block);
348        assert!(h.starts_with("/*\n"));
349        assert!(h.contains(" * This file is auto-generated by alef"));
350        assert!(h.ends_with(" */\n"));
351    }
352
353    #[test]
354    fn test_inject_and_extract_rust() {
355        let h = header(CommentStyle::DoubleSlash);
356        let content = format!("{h}use foo;\n");
357        let hash = hash_content(&content);
358        let injected = inject_hash_line(&content, &hash);
359        assert!(injected.contains(HASH_PREFIX));
360        assert_eq!(extract_hash(&injected), Some(hash));
361    }
362
363    #[test]
364    fn test_inject_and_extract_python() {
365        let h = header(CommentStyle::Hash);
366        let content = format!("{h}import foo\n");
367        let hash = hash_content(&content);
368        let injected = inject_hash_line(&content, &hash);
369        assert!(injected.contains(&format!("# {HASH_PREFIX}")));
370        assert_eq!(extract_hash(&injected), Some(hash));
371    }
372
373    #[test]
374    fn test_inject_and_extract_c_block() {
375        let h = header(CommentStyle::Block);
376        let content = format!("{h}#include <stdio.h>\n");
377        let hash = hash_content(&content);
378        let injected = inject_hash_line(&content, &hash);
379        assert!(injected.contains(HASH_PREFIX));
380        assert_eq!(extract_hash(&injected), Some(hash));
381    }
382
383    #[test]
384    fn test_inject_php_line2() {
385        let h = header(CommentStyle::DoubleSlash);
386        let content = format!("<?php\n{h}namespace Foo;\n");
387        let hash = hash_content(&content);
388        let injected = inject_hash_line(&content, &hash);
389        let lines: Vec<&str> = injected.lines().collect();
390        assert_eq!(lines[0], "<?php");
391        assert!(lines[1].contains(HEADER_MARKER));
392        assert!(lines.iter().any(|l| l.contains(HASH_PREFIX)));
393        assert_eq!(extract_hash(&injected), Some(hash));
394    }
395
396    #[test]
397    fn test_no_header_returns_unchanged() {
398        let content = "fn main() {}\n";
399        let injected = inject_hash_line(content, "abc123");
400        assert_eq!(injected, content);
401        assert_eq!(extract_hash(&injected), None);
402    }
403
404    #[test]
405    fn test_strip_hash_line() {
406        let content = "// auto-generated by alef\n// alef:hash:abc123\nuse foo;\n";
407        let stripped = strip_hash_line(content);
408        assert_eq!(stripped, "// auto-generated by alef\nuse foo;\n");
409    }
410
411    #[test]
412    fn test_roundtrip() {
413        let h = header(CommentStyle::Hash);
414        let original = format!("{h}import sys\n");
415        let hash = hash_content(&original);
416        let injected = inject_hash_line(&original, &hash);
417        let stripped = strip_hash_line(&injected);
418        assert_eq!(stripped, original);
419        assert_eq!(hash_content(&stripped), hash);
420    }
421
422    // ----- compute_sources_hash / compute_file_hash --------------------------
423
424    use std::path::{Path, PathBuf};
425    use tempfile::tempdir;
426
427    fn write_file(dir: &Path, name: &str, content: &str) -> PathBuf {
428        let path = dir.join(name);
429        std::fs::write(&path, content).unwrap();
430        path
431    }
432
433    #[test]
434    fn sources_hash_changes_when_path_changes_even_if_content_same() {
435        let dir = tempdir().unwrap();
436        let s_a = write_file(dir.path(), "a.rs", "fn a() {}");
437        std::fs::create_dir_all(dir.path().join("moved")).unwrap();
438        let s_b = write_file(dir.path(), "moved/a.rs", "fn a() {}");
439        let h_a = compute_sources_hash(&[s_a]).unwrap();
440        let h_b = compute_sources_hash(&[s_b]).unwrap();
441        assert_ne!(
442            h_a, h_b,
443            "same content at a different path can produce different IR (rust_path differs)"
444        );
445    }
446
447    #[test]
448    fn sources_hash_errors_on_missing_source() {
449        let dir = tempdir().unwrap();
450        let bogus = dir.path().join("does-not-exist.rs");
451        assert!(compute_sources_hash(&[bogus]).is_err());
452    }
453
454    #[test]
455    fn sources_hash_stable_across_runs() {
456        let dir = tempdir().unwrap();
457        let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
458        let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
459        let sources = vec![s1, s2];
460        let h1 = compute_sources_hash(&sources).unwrap();
461        let h2 = compute_sources_hash(&sources).unwrap();
462        assert_eq!(h1, h2);
463    }
464
465    #[test]
466    fn sources_hash_path_order_independent() {
467        let dir = tempdir().unwrap();
468        let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
469        let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
470        let h_forward = compute_sources_hash(&[s1.clone(), s2.clone()]).unwrap();
471        let h_reverse = compute_sources_hash(&[s2, s1]).unwrap();
472        assert_eq!(h_forward, h_reverse);
473    }
474
475    #[test]
476    fn sources_hash_changes_with_content() {
477        let dir = tempdir().unwrap();
478        let s = write_file(dir.path(), "a.rs", "fn a() {}");
479        let h_before = compute_sources_hash(std::slice::from_ref(&s)).unwrap();
480        std::fs::write(&s, "fn a() { let _ = 1; }").unwrap();
481        let h_after = compute_sources_hash(&[s]).unwrap();
482        assert_ne!(h_before, h_after);
483    }
484
485    #[test]
486    fn file_hash_idempotent_under_strip_hash_line() {
487        // The defining property: hash(content with hash line) == hash(content without hash line).
488        // This is what makes the verify path symmetric with the generate path.
489        let sources_hash = "abc123";
490        let bare = "// auto-generated by alef\nfn body() {}\n";
491        let with_line = "// auto-generated by alef\n// alef:hash:deadbeef\nfn body() {}\n";
492
493        let h1 = compute_file_hash(sources_hash, bare);
494        let h2 = compute_file_hash(sources_hash, with_line);
495        assert_eq!(h1, h2, "hash must ignore an existing alef:hash: line");
496    }
497
498    #[test]
499    fn file_hash_changes_when_sources_change() {
500        let content = "// auto-generated by alef\nfn body() {}\n";
501        let h_a = compute_file_hash("sources_a", content);
502        let h_b = compute_file_hash("sources_b", content);
503        assert_ne!(h_a, h_b);
504    }
505
506    #[test]
507    fn file_hash_changes_when_content_changes() {
508        let sources_hash = "abc123";
509        let h_a = compute_file_hash(sources_hash, "fn a() {}\n");
510        let h_b = compute_file_hash(sources_hash, "fn b() {}\n");
511        assert_ne!(h_a, h_b);
512    }
513
514    #[test]
515    fn file_hash_independent_of_alef_version() {
516        // Idempotency property: the hash is purely a function of (sources, content).
517        // Bumping the alef CLI version must not change it. Encoded by the type
518        // signature — there is no version parameter — but make it explicit so
519        // a future regression that re-introduces a version dimension is caught.
520        let h = compute_file_hash("sources_hash", "fn a() {}\n");
521        assert_eq!(h.len(), 64, "blake3 hex output is 64 chars");
522    }
523
524    #[test]
525    fn crate_sources_hash_differs_across_crates_with_disjoint_sources() {
526        use crate::config::resolved::ResolvedCrateConfig;
527
528        let dir = tempdir().unwrap();
529        let a = write_file(dir.path(), "a.rs", "fn a() {}");
530        let b = write_file(dir.path(), "b.rs", "fn b() {}");
531
532        // Build two minimal ResolvedCrateConfig values using the builder pattern
533        // isn't available, so we construct via serde round-trip from JSON to avoid
534        // requiring Default on the struct.  Instead, use helper that constructs the
535        // minimal required fields directly.
536        let make_cfg = |name: &str, sources: Vec<std::path::PathBuf>| ResolvedCrateConfig {
537            name: name.to_string(),
538            sources,
539            source_crates: vec![],
540            version_from: "Cargo.toml".to_string(),
541            core_import: None,
542            workspace_root: None,
543            skip_core_import: false,
544            error_type: None,
545            error_constructor: None,
546            features: vec![],
547            path_mappings: Default::default(),
548            extra_dependencies: Default::default(),
549            auto_path_mappings: true,
550            languages: vec![],
551            python: None,
552            node: None,
553            ruby: None,
554            php: None,
555            elixir: None,
556            wasm: None,
557            ffi: None,
558            go: None,
559            java: None,
560            dart: None,
561            kotlin: None,
562            kotlin_android: None,
563            swift: None,
564            gleam: None,
565            csharp: None,
566            r: None,
567            zig: None,
568            exclude: Default::default(),
569            include: Default::default(),
570            output_paths: Default::default(),
571            explicit_output: Default::default(),
572            lint: Default::default(),
573            test: Default::default(),
574            setup: Default::default(),
575            update: Default::default(),
576            clean: Default::default(),
577            build_commands: Default::default(),
578            generate: Default::default(),
579            generate_overrides: Default::default(),
580            format: Default::default(),
581            format_overrides: Default::default(),
582            dto: Default::default(),
583            tools: Default::default(),
584            opaque_types: Default::default(),
585            sync: None,
586            publish: None,
587            e2e: None,
588            adapters: vec![],
589            trait_bridges: vec![],
590            scaffold: None,
591            readme: None,
592            custom_files: Default::default(),
593            custom_modules: Default::default(),
594            custom_registrations: Default::default(),
595        };
596
597        let cfg_a = make_cfg("alpha", vec![a]);
598        let cfg_b = make_cfg("beta", vec![b]);
599
600        let hash_a = compute_crate_sources_hash(&cfg_a).unwrap();
601        let hash_b = compute_crate_sources_hash(&cfg_b).unwrap();
602
603        assert_ne!(
604            hash_a, hash_b,
605            "crates with disjoint sources must produce different hashes"
606        );
607    }
608
609    #[test]
610    fn crate_sources_hash_includes_source_crates() {
611        use crate::config::{SourceCrate, resolved::ResolvedCrateConfig};
612
613        let dir = tempdir().unwrap();
614        let a = write_file(dir.path(), "a.rs", "fn a() {}");
615        let b = write_file(dir.path(), "b.rs", "fn b() {}");
616
617        let make_cfg =
618            |sources: Vec<std::path::PathBuf>, source_crate_sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
619                let source_crates = if source_crate_sources.is_empty() {
620                    vec![]
621                } else {
622                    vec![SourceCrate {
623                        name: "extra-crate".to_string(),
624                        sources: source_crate_sources,
625                    }]
626                };
627                ResolvedCrateConfig {
628                    name: "test".to_string(),
629                    sources,
630                    source_crates,
631                    version_from: "Cargo.toml".to_string(),
632                    core_import: None,
633                    workspace_root: None,
634                    skip_core_import: false,
635                    error_type: None,
636                    error_constructor: None,
637                    features: vec![],
638                    path_mappings: Default::default(),
639                    extra_dependencies: Default::default(),
640                    auto_path_mappings: true,
641                    languages: vec![],
642                    python: None,
643                    node: None,
644                    ruby: None,
645                    php: None,
646                    elixir: None,
647                    wasm: None,
648                    ffi: None,
649                    go: None,
650                    java: None,
651                    dart: None,
652                    kotlin: None,
653                    kotlin_android: None,
654                    swift: None,
655                    gleam: None,
656                    csharp: None,
657                    r: None,
658                    zig: None,
659                    exclude: Default::default(),
660                    include: Default::default(),
661                    output_paths: Default::default(),
662                    explicit_output: Default::default(),
663                    lint: Default::default(),
664                    test: Default::default(),
665                    setup: Default::default(),
666                    update: Default::default(),
667                    clean: Default::default(),
668                    build_commands: Default::default(),
669                    generate: Default::default(),
670                    generate_overrides: Default::default(),
671                    format: Default::default(),
672                    format_overrides: Default::default(),
673                    dto: Default::default(),
674                    tools: Default::default(),
675                    opaque_types: Default::default(),
676                    sync: None,
677                    publish: None,
678                    e2e: None,
679                    adapters: vec![],
680                    trait_bridges: vec![],
681                    scaffold: None,
682                    readme: None,
683                    custom_files: Default::default(),
684                    custom_modules: Default::default(),
685                    custom_registrations: Default::default(),
686                }
687            };
688
689        let cfg_without_extra = make_cfg(vec![a.clone()], vec![]);
690        let cfg_with_extra = make_cfg(vec![a.clone()], vec![b.clone()]);
691
692        let hash_without = compute_crate_sources_hash(&cfg_without_extra).unwrap();
693        let hash_with = compute_crate_sources_hash(&cfg_with_extra).unwrap();
694
695        assert_ne!(
696            hash_without, hash_with,
697            "adding a source_crate source file must change the hash"
698        );
699    }
700
701    #[test]
702    fn compute_crate_sources_hash_dedupes_overlapping_paths() {
703        use crate::config::{SourceCrate, resolved::ResolvedCrateConfig};
704        // A source path appearing in both `sources` and a `source_crates` entry
705        // (or repeated within `sources`) is hashed once: the hash equals the
706        // hash of the same crate config with the duplicates removed.
707        let dir = tempdir().unwrap();
708        let a = write_file(dir.path(), "a.rs", "fn a() {}");
709        let b = write_file(dir.path(), "b.rs", "fn b() {}");
710
711        let make_cfg =
712            |sources: Vec<std::path::PathBuf>, source_crate_sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
713                let source_crates = if source_crate_sources.is_empty() {
714                    vec![]
715                } else {
716                    vec![SourceCrate {
717                        name: "extra-crate".to_string(),
718                        sources: source_crate_sources,
719                    }]
720                };
721                ResolvedCrateConfig {
722                    name: "test".to_string(),
723                    sources,
724                    source_crates,
725                    version_from: "Cargo.toml".to_string(),
726                    core_import: None,
727                    workspace_root: None,
728                    skip_core_import: false,
729                    error_type: None,
730                    error_constructor: None,
731                    features: vec![],
732                    path_mappings: Default::default(),
733                    extra_dependencies: Default::default(),
734                    auto_path_mappings: true,
735                    languages: vec![],
736                    python: None,
737                    node: None,
738                    ruby: None,
739                    php: None,
740                    elixir: None,
741                    wasm: None,
742                    ffi: None,
743                    go: None,
744                    java: None,
745                    dart: None,
746                    kotlin: None,
747                    kotlin_android: None,
748                    swift: None,
749                    gleam: None,
750                    csharp: None,
751                    r: None,
752                    zig: None,
753                    exclude: Default::default(),
754                    include: Default::default(),
755                    output_paths: Default::default(),
756                    explicit_output: Default::default(),
757                    lint: Default::default(),
758                    test: Default::default(),
759                    setup: Default::default(),
760                    update: Default::default(),
761                    clean: Default::default(),
762                    build_commands: Default::default(),
763                    generate: Default::default(),
764                    generate_overrides: Default::default(),
765                    format: Default::default(),
766                    format_overrides: Default::default(),
767                    dto: Default::default(),
768                    tools: Default::default(),
769                    opaque_types: Default::default(),
770                    sync: None,
771                    publish: None,
772                    e2e: None,
773                    adapters: vec![],
774                    trait_bridges: vec![],
775                    scaffold: None,
776                    readme: None,
777                    custom_files: Default::default(),
778                    custom_modules: Default::default(),
779                    custom_registrations: Default::default(),
780                }
781            };
782
783        // `sources` lists `a` twice and `source_crates` also references `a`.
784        let cfg_with_dupes = make_cfg(vec![a.clone(), a.clone(), b.clone()], vec![a.clone()]);
785        let cfg_unique = make_cfg(vec![a.clone(), b.clone()], vec![]);
786
787        let hash_dup = compute_crate_sources_hash(&cfg_with_dupes).unwrap();
788        let hash_unique = compute_crate_sources_hash(&cfg_unique).unwrap();
789        assert_eq!(
790            hash_dup, hash_unique,
791            "duplicate source paths must not affect the per-crate sources hash"
792        );
793    }
794
795    #[test]
796    fn compute_crate_sources_hash_is_order_independent() {
797        use crate::config::resolved::ResolvedCrateConfig;
798        // Reordering `sources` (or the entries inside a `source_crates` entry)
799        // does not change the per-crate sources hash.
800        let dir = tempdir().unwrap();
801        let a = write_file(dir.path(), "a.rs", "fn a() {}");
802        let b = write_file(dir.path(), "b.rs", "fn b() {}");
803        let c = write_file(dir.path(), "c.rs", "fn c() {}");
804
805        let make_cfg = |sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
806            ResolvedCrateConfig {
807                name: "test".to_string(),
808                sources,
809                source_crates: vec![],
810                version_from: "Cargo.toml".to_string(),
811                core_import: None,
812                workspace_root: None,
813                skip_core_import: false,
814                error_type: None,
815                error_constructor: None,
816                features: vec![],
817                path_mappings: Default::default(),
818                extra_dependencies: Default::default(),
819                auto_path_mappings: true,
820                languages: vec![],
821                python: None,
822                node: None,
823                ruby: None,
824                php: None,
825                elixir: None,
826                wasm: None,
827                ffi: None,
828                go: None,
829                java: None,
830                dart: None,
831                kotlin: None,
832                kotlin_android: None,
833                swift: None,
834                gleam: None,
835                csharp: None,
836                r: None,
837                zig: None,
838                exclude: Default::default(),
839                include: Default::default(),
840                output_paths: Default::default(),
841                explicit_output: Default::default(),
842                lint: Default::default(),
843                test: Default::default(),
844                setup: Default::default(),
845                update: Default::default(),
846                clean: Default::default(),
847                build_commands: Default::default(),
848                generate: Default::default(),
849                generate_overrides: Default::default(),
850                format: Default::default(),
851                format_overrides: Default::default(),
852                dto: Default::default(),
853                tools: Default::default(),
854                opaque_types: Default::default(),
855                sync: None,
856                publish: None,
857                e2e: None,
858                adapters: vec![],
859                trait_bridges: vec![],
860                scaffold: None,
861                readme: None,
862                custom_files: Default::default(),
863                custom_modules: Default::default(),
864                custom_registrations: Default::default(),
865            }
866        };
867
868        let cfg1 = make_cfg(vec![a.clone(), b.clone(), c.clone()]);
869        let cfg2 = make_cfg(vec![c.clone(), a.clone(), b.clone()]);
870        let cfg3 = make_cfg(vec![b.clone(), c.clone(), a.clone()]);
871
872        let h1 = compute_crate_sources_hash(&cfg1).unwrap();
873        let h2 = compute_crate_sources_hash(&cfg2).unwrap();
874        let h3 = compute_crate_sources_hash(&cfg3).unwrap();
875        assert_eq!(h1, h2, "reordering sources must not change the hash");
876        assert_eq!(h2, h3, "reordering sources must not change the hash");
877    }
878
879    #[test]
880    fn file_hash_round_trip_via_inject_extract() {
881        // Simulate the full generate/verify cycle:
882        // 1. generate: compute hash from stripped content, inject into header
883        // 2. verify: read back, extract hash, recompute from content, compare
884        let sources_hash = "abc123";
885        let raw = "// auto-generated by alef\nfn body() {}\n";
886        let file_hash = compute_file_hash(sources_hash, raw);
887        let on_disk = inject_hash_line(raw, &file_hash);
888
889        let extracted = extract_hash(&on_disk).expect("hash line should be present");
890        let recomputed = compute_file_hash(sources_hash, &on_disk);
891        assert_eq!(extracted, file_hash);
892        assert_eq!(recomputed, file_hash);
893        assert_eq!(extracted, recomputed, "verify must reproduce the embedded hash");
894    }
895}