Skip to main content

alef_core/
hash.rs

1//! Content hashing and generated-file headers.
2//!
3//! Every file produced by alef gets a standard header that identifies it as
4//! generated, tells agents/developers how to fix issues, and embeds a blake3
5//! hash so `alef verify` can detect staleness without external state.
6//!
7//! # Hash semantics
8//!
9//! As of alef v0.10.1, the embedded `alef:hash:<hex>` value is a **per-file
10//! source+output fingerprint** produced by [`compute_file_hash`]:
11//!
12//! ```text
13//! blake3(sources_hash || file_content_without_hash_line)
14//! ```
15//!
16//! Where `sources_hash` is [`compute_sources_hash`] over the sorted Rust source
17//! files alef parses to build the IR. The hash deliberately does **not**
18//! include the alef version or `alef.toml`: any input change that affects the
19//! generated bytes is already reflected by hashing the file content itself,
20//! and excluding the alef version makes `alef verify` idempotent across
21//! `alef` upgrades — a CI run on a tagged repo continues to pass after the
22//! alef CLI is bumped, as long as the rust sources and emitted file contents
23//! are unchanged.
24//!
25//! `alef generate` finalises the embedded hash *after* downstream formatters
26//! (rustfmt, rubocop, dotnet format, spotless, oxfmt, mix format, php-cs-fixer,
27//! mix format, …) have run, so the embedded hash describes the actual
28//! on-disk byte-content. `alef verify` reads the file, strips the
29//! `alef:hash:` line, recomputes the same hash, and compares — no
30//! regeneration, no writes.
31//!
32//! Pre-v0.10.1 alef used a single input-deterministic hash that incorporated
33//! the alef CLI version, which forced every consumer repo to re-run
34//! `alef generate` after every alef bump even when nothing else changed.
35
36const HASH_PREFIX: &str = "alef:hash:";
37
38/// The standard header text (without comment delimiters).
39/// Used by [`header`] to produce language-specific comment blocks.
40const HEADER_BODY: &str = "\
41This file is auto-generated by alef — DO NOT EDIT.
42To regenerate: alef generate
43To verify freshness: alef verify --exit-code
44Issues & docs: https://github.com/kreuzberg-dev/alef";
45
46/// Comment style for the generated header.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum CommentStyle {
49    /// `// line comment`  (Rust, Go, Java, C#, TypeScript, C, PHP)
50    DoubleSlash,
51    /// `# line comment`   (Python, Ruby, Elixir, R, TOML, Shell, Makefile)
52    Hash,
53    /// `/* block comment */` (C headers)
54    Block,
55}
56
57/// Return the standard alef header as a comment block.
58///
59/// ```text
60/// // This file is auto-generated by alef — DO NOT EDIT.
61/// // To regenerate: alef generate
62/// // To verify freshness: alef verify --exit-code
63/// // Issues & docs: https://github.com/kreuzberg-dev/alef
64/// ```
65pub fn header(style: CommentStyle) -> String {
66    match style {
67        CommentStyle::DoubleSlash => HEADER_BODY.lines().map(|l| format!("// {l}\n")).collect(),
68        CommentStyle::Hash => HEADER_BODY.lines().map(|l| format!("# {l}\n")).collect(),
69        CommentStyle::Block => {
70            let mut out = String::from("/*\n");
71            for line in HEADER_BODY.lines() {
72                out.push_str(&format!(" * {line}\n"));
73            }
74            out.push_str(" */\n");
75            out
76        }
77    }
78}
79
80/// The marker string that `inject_hash_line` and `extract_hash` look for.
81/// Every alef-generated header contains this on the first line.
82const HEADER_MARKER: &str = "auto-generated by alef";
83
84/// Blake3 hash of a content string, returned as hex.
85///
86/// Used by the IR / language caches and any caller that needs a hash of an
87/// in-memory string. **Not used for the embedded `alef:hash:` header** — that
88/// is computed by [`compute_file_hash`].
89pub fn hash_content(content: &str) -> String {
90    blake3::hash(content.as_bytes()).to_hex().to_string()
91}
92
93/// Compute a stable hash over the Rust source files that alef extracts.
94///
95/// This is the "source side" of the per-file verify hash. Sources are sorted
96/// by path so the hash is stable regardless of ordering in
97/// `alef.toml`'s `[crate].sources`. The path is mixed in alongside the
98/// content because the same byte-content at a different path produces
99/// different IR (the `rust_path` on extracted types differs).
100///
101/// Used by [`compute_file_hash`]; not by itself the value embedded in any
102/// file header.
103///
104/// # Errors
105/// Returns an error if any source file is missing or unreadable.
106pub fn compute_sources_hash(sources: &[std::path::PathBuf]) -> std::io::Result<String> {
107    let mut hasher = blake3::Hasher::new();
108    let mut sorted: Vec<&std::path::PathBuf> = sources.iter().collect();
109    sorted.sort();
110    for source in sorted {
111        let content = std::fs::read(source)?;
112        hasher.update(b"src\0");
113        hasher.update(source.to_string_lossy().as_bytes());
114        hasher.update(b"\0");
115        hasher.update(&content);
116    }
117    Ok(hasher.finalize().to_hex().to_string())
118}
119
120/// Compute a stable hex-encoded Blake3 hash over all Rust source files
121/// belonging to a [`crate::config::resolved::ResolvedCrateConfig`].
122///
123/// Returns a hex string so callers can feed the result directly to
124/// [`compute_file_hash`], matching [`compute_sources_hash`]'s return type.
125///
126/// The hash covers the union of:
127/// - `crate_cfg.sources` (direct sources on the crate)
128/// - every `source_crates[*].sources` entry
129///
130/// All paths are sorted before hashing so the result is independent of the
131/// order they appear in `alef.toml`.  The path string is mixed in alongside
132/// the file content because the same byte-content at a different path produces
133/// different IR (the `rust_path` on extracted types differs).
134///
135/// # Phase 3 migration note
136///
137/// Phase 3 callers should migrate from the per-file `compute_sources_hash` to
138/// this function when they have a `ResolvedCrateConfig` available, so that
139/// multi-source-crate workspaces produce a single stable hash across all
140/// contributing source files.
141///
142/// # Errors
143///
144/// Returns an error if any source file is missing or unreadable.
145pub fn compute_crate_sources_hash(crate_cfg: &crate::config::resolved::ResolvedCrateConfig) -> std::io::Result<String> {
146    let mut all_sources: Vec<&std::path::PathBuf> = Vec::new();
147
148    for src in &crate_cfg.sources {
149        all_sources.push(src);
150    }
151    for sc in &crate_cfg.source_crates {
152        for src in &sc.sources {
153            all_sources.push(src);
154        }
155    }
156
157    // Stable sort by path so the hash is order-independent.
158    all_sources.sort();
159    all_sources.dedup();
160
161    let mut hasher = blake3::Hasher::new();
162    for source in all_sources {
163        let content = std::fs::read(source)?;
164        hasher.update(b"src\0");
165        hasher.update(source.to_string_lossy().as_bytes());
166        hasher.update(b"\0");
167        hasher.update(&content);
168    }
169    Ok(hasher.finalize().to_hex().to_string())
170}
171
172/// Compute the per-file verify hash that alef embeds in each generated file.
173///
174/// `sources_hash` comes from [`compute_sources_hash`]. `content` is the file
175/// content; any pre-existing `alef:hash:` line is stripped before hashing so
176/// the function is idempotent — calling it on file content that already has a
177/// hash line returns the same value as calling it on the same content with no
178/// hash line. This makes the verify path symmetric with the generate path:
179///
180/// - **Generate**: write the file, run formatters, then call this with the
181///   on-disk content and inject the result.
182/// - **Verify**: read the file, extract the existing hash line, call this
183///   with the on-disk content, compare.
184pub fn compute_file_hash(sources_hash: &str, content: &str) -> String {
185    let stripped = strip_hash_line(content);
186    let mut hasher = blake3::Hasher::new();
187    hasher.update(b"sources\0");
188    hasher.update(sources_hash.as_bytes());
189    hasher.update(b"\0content\0");
190    hasher.update(stripped.as_bytes());
191    hasher.finalize().to_hex().to_string()
192}
193
194/// Inject an `alef:hash:<hex>` line immediately after the first header marker
195/// line found in the first 10 lines.  The comment syntax is inferred from the
196/// marker line itself.
197///
198/// If no marker line is found, the content is returned unchanged.
199pub fn inject_hash_line(content: &str, hash: &str) -> String {
200    let mut result = String::with_capacity(content.len() + 80);
201    let mut injected = false;
202
203    for (i, line) in content.lines().enumerate() {
204        result.push_str(line);
205        result.push('\n');
206
207        if !injected && i < 10 && line.contains(HEADER_MARKER) {
208            let trimmed = line.trim();
209            let hash_line = if trimmed.starts_with("<!--") {
210                // XML comment: inject hash line as XML comment
211                format!("<!-- {HASH_PREFIX}{hash} -->")
212            } else if trimmed.starts_with("//") {
213                format!("// {HASH_PREFIX}{hash}")
214            } else if trimmed.starts_with('#') {
215                format!("# {HASH_PREFIX}{hash}")
216            } else if trimmed.starts_with("/*") || trimmed.starts_with(" *") || trimmed.ends_with("*/") {
217                format!(" * {HASH_PREFIX}{hash}")
218            } else {
219                format!("// {HASH_PREFIX}{hash}")
220            };
221            result.push_str(&hash_line);
222            result.push('\n');
223            injected = true;
224        }
225    }
226
227    // Preserve original trailing-newline behavior.
228    if !content.ends_with('\n') && result.ends_with('\n') {
229        result.pop();
230    }
231
232    result
233}
234
235/// Extract the hash from an `alef:hash:<hex>` token in the first 10 lines.
236pub fn extract_hash(content: &str) -> Option<String> {
237    for (i, line) in content.lines().enumerate() {
238        if i >= 10 {
239            break;
240        }
241        if let Some(pos) = line.find(HASH_PREFIX) {
242            let rest = &line[pos + HASH_PREFIX.len()..];
243            // Trim trailing comment closers and whitespace.
244            let hex = rest.trim().trim_end_matches("*/").trim_end_matches("-->").trim();
245            if !hex.is_empty() {
246                return Some(hex.to_string());
247            }
248        }
249    }
250    None
251}
252
253/// Strip the `alef:hash:` line from content (for fallback comparison).
254pub fn strip_hash_line(content: &str) -> String {
255    let mut result = String::with_capacity(content.len());
256    for line in content.lines() {
257        if line.contains(HASH_PREFIX) {
258            continue;
259        }
260        result.push_str(line);
261        result.push('\n');
262    }
263    // Preserve original trailing-newline behavior.
264    if !content.ends_with('\n') && result.ends_with('\n') {
265        result.pop();
266    }
267    result
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    #[test]
275    fn test_header_double_slash() {
276        let h = header(CommentStyle::DoubleSlash);
277        assert!(h.contains("// This file is auto-generated by alef"));
278        assert!(h.contains("// Issues & docs: https://github.com/kreuzberg-dev/alef"));
279    }
280
281    #[test]
282    fn test_header_hash() {
283        let h = header(CommentStyle::Hash);
284        assert!(h.contains("# This file is auto-generated by alef"));
285    }
286
287    #[test]
288    fn test_header_block() {
289        let h = header(CommentStyle::Block);
290        assert!(h.starts_with("/*\n"));
291        assert!(h.contains(" * This file is auto-generated by alef"));
292        assert!(h.ends_with(" */\n"));
293    }
294
295    #[test]
296    fn test_inject_and_extract_rust() {
297        let h = header(CommentStyle::DoubleSlash);
298        let content = format!("{h}use foo;\n");
299        let hash = hash_content(&content);
300        let injected = inject_hash_line(&content, &hash);
301        assert!(injected.contains(HASH_PREFIX));
302        assert_eq!(extract_hash(&injected), Some(hash));
303    }
304
305    #[test]
306    fn test_inject_and_extract_python() {
307        let h = header(CommentStyle::Hash);
308        let content = format!("{h}import foo\n");
309        let hash = hash_content(&content);
310        let injected = inject_hash_line(&content, &hash);
311        assert!(injected.contains(&format!("# {HASH_PREFIX}")));
312        assert_eq!(extract_hash(&injected), Some(hash));
313    }
314
315    #[test]
316    fn test_inject_and_extract_c_block() {
317        let h = header(CommentStyle::Block);
318        let content = format!("{h}#include <stdio.h>\n");
319        let hash = hash_content(&content);
320        let injected = inject_hash_line(&content, &hash);
321        assert!(injected.contains(HASH_PREFIX));
322        assert_eq!(extract_hash(&injected), Some(hash));
323    }
324
325    #[test]
326    fn test_inject_php_line2() {
327        let h = header(CommentStyle::DoubleSlash);
328        let content = format!("<?php\n{h}namespace Foo;\n");
329        let hash = hash_content(&content);
330        let injected = inject_hash_line(&content, &hash);
331        let lines: Vec<&str> = injected.lines().collect();
332        assert_eq!(lines[0], "<?php");
333        assert!(lines[1].contains(HEADER_MARKER));
334        assert!(lines.iter().any(|l| l.contains(HASH_PREFIX)));
335        assert_eq!(extract_hash(&injected), Some(hash));
336    }
337
338    #[test]
339    fn test_no_header_returns_unchanged() {
340        let content = "fn main() {}\n";
341        let injected = inject_hash_line(content, "abc123");
342        assert_eq!(injected, content);
343        assert_eq!(extract_hash(&injected), None);
344    }
345
346    #[test]
347    fn test_strip_hash_line() {
348        let content = "// auto-generated by alef\n// alef:hash:abc123\nuse foo;\n";
349        let stripped = strip_hash_line(content);
350        assert_eq!(stripped, "// auto-generated by alef\nuse foo;\n");
351    }
352
353    #[test]
354    fn test_roundtrip() {
355        let h = header(CommentStyle::Hash);
356        let original = format!("{h}import sys\n");
357        let hash = hash_content(&original);
358        let injected = inject_hash_line(&original, &hash);
359        let stripped = strip_hash_line(&injected);
360        assert_eq!(stripped, original);
361        assert_eq!(hash_content(&stripped), hash);
362    }
363
364    // ----- compute_sources_hash / compute_file_hash --------------------------
365
366    use std::path::{Path, PathBuf};
367    use tempfile::tempdir;
368
369    fn write_file(dir: &Path, name: &str, content: &str) -> PathBuf {
370        let path = dir.join(name);
371        std::fs::write(&path, content).unwrap();
372        path
373    }
374
375    #[test]
376    fn sources_hash_changes_when_path_changes_even_if_content_same() {
377        let dir = tempdir().unwrap();
378        let s_a = write_file(dir.path(), "a.rs", "fn a() {}");
379        std::fs::create_dir_all(dir.path().join("moved")).unwrap();
380        let s_b = write_file(dir.path(), "moved/a.rs", "fn a() {}");
381        let h_a = compute_sources_hash(&[s_a]).unwrap();
382        let h_b = compute_sources_hash(&[s_b]).unwrap();
383        assert_ne!(
384            h_a, h_b,
385            "same content at a different path can produce different IR (rust_path differs)"
386        );
387    }
388
389    #[test]
390    fn sources_hash_errors_on_missing_source() {
391        let dir = tempdir().unwrap();
392        let bogus = dir.path().join("does-not-exist.rs");
393        assert!(compute_sources_hash(&[bogus]).is_err());
394    }
395
396    #[test]
397    fn sources_hash_stable_across_runs() {
398        let dir = tempdir().unwrap();
399        let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
400        let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
401        let sources = vec![s1, s2];
402        let h1 = compute_sources_hash(&sources).unwrap();
403        let h2 = compute_sources_hash(&sources).unwrap();
404        assert_eq!(h1, h2);
405    }
406
407    #[test]
408    fn sources_hash_path_order_independent() {
409        let dir = tempdir().unwrap();
410        let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
411        let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
412        let h_forward = compute_sources_hash(&[s1.clone(), s2.clone()]).unwrap();
413        let h_reverse = compute_sources_hash(&[s2, s1]).unwrap();
414        assert_eq!(h_forward, h_reverse);
415    }
416
417    #[test]
418    fn sources_hash_changes_with_content() {
419        let dir = tempdir().unwrap();
420        let s = write_file(dir.path(), "a.rs", "fn a() {}");
421        let h_before = compute_sources_hash(std::slice::from_ref(&s)).unwrap();
422        std::fs::write(&s, "fn a() { let _ = 1; }").unwrap();
423        let h_after = compute_sources_hash(&[s]).unwrap();
424        assert_ne!(h_before, h_after);
425    }
426
427    #[test]
428    fn file_hash_idempotent_under_strip_hash_line() {
429        // The defining property: hash(content with hash line) == hash(content without hash line).
430        // This is what makes the verify path symmetric with the generate path.
431        let sources_hash = "abc123";
432        let bare = "// auto-generated by alef\nfn body() {}\n";
433        let with_line = "// auto-generated by alef\n// alef:hash:deadbeef\nfn body() {}\n";
434
435        let h1 = compute_file_hash(sources_hash, bare);
436        let h2 = compute_file_hash(sources_hash, with_line);
437        assert_eq!(h1, h2, "hash must ignore an existing alef:hash: line");
438    }
439
440    #[test]
441    fn file_hash_changes_when_sources_change() {
442        let content = "// auto-generated by alef\nfn body() {}\n";
443        let h_a = compute_file_hash("sources_a", content);
444        let h_b = compute_file_hash("sources_b", content);
445        assert_ne!(h_a, h_b);
446    }
447
448    #[test]
449    fn file_hash_changes_when_content_changes() {
450        let sources_hash = "abc123";
451        let h_a = compute_file_hash(sources_hash, "fn a() {}\n");
452        let h_b = compute_file_hash(sources_hash, "fn b() {}\n");
453        assert_ne!(h_a, h_b);
454    }
455
456    #[test]
457    fn file_hash_independent_of_alef_version() {
458        // Idempotency property: the hash is purely a function of (sources, content).
459        // Bumping the alef CLI version must not change it. Encoded by the type
460        // signature — there is no version parameter — but make it explicit so
461        // a future regression that re-introduces a version dimension is caught.
462        let h = compute_file_hash("sources_hash", "fn a() {}\n");
463        assert_eq!(h.len(), 64, "blake3 hex output is 64 chars");
464    }
465
466    #[test]
467    fn crate_sources_hash_differs_across_crates_with_disjoint_sources() {
468        use crate::config::resolved::ResolvedCrateConfig;
469
470        let dir = tempdir().unwrap();
471        let a = write_file(dir.path(), "a.rs", "fn a() {}");
472        let b = write_file(dir.path(), "b.rs", "fn b() {}");
473
474        // Build two minimal ResolvedCrateConfig values using the builder pattern
475        // isn't available, so we construct via serde round-trip from JSON to avoid
476        // requiring Default on the struct.  Instead, use helper that constructs the
477        // minimal required fields directly.
478        let make_cfg = |name: &str, sources: Vec<std::path::PathBuf>| ResolvedCrateConfig {
479            name: name.to_string(),
480            sources,
481            source_crates: vec![],
482            version_from: "Cargo.toml".to_string(),
483            core_import: None,
484            workspace_root: None,
485            skip_core_import: false,
486            error_type: None,
487            error_constructor: None,
488            features: vec![],
489            path_mappings: Default::default(),
490            extra_dependencies: Default::default(),
491            auto_path_mappings: true,
492            languages: vec![],
493            python: None,
494            node: None,
495            ruby: None,
496            php: None,
497            elixir: None,
498            wasm: None,
499            ffi: None,
500            go: None,
501            java: None,
502            dart: None,
503            kotlin: None,
504            swift: None,
505            csharp: None,
506            r: None,
507            zig: None,
508            exclude: Default::default(),
509            include: Default::default(),
510            output_paths: Default::default(),
511            explicit_output: Default::default(),
512            lint: Default::default(),
513            test: Default::default(),
514            setup: Default::default(),
515            update: Default::default(),
516            clean: Default::default(),
517            build_commands: Default::default(),
518            generate: Default::default(),
519            generate_overrides: Default::default(),
520            format: Default::default(),
521            format_overrides: Default::default(),
522            dto: Default::default(),
523            tools: Default::default(),
524            opaque_types: Default::default(),
525            sync: None,
526            publish: None,
527            e2e: None,
528            adapters: vec![],
529            trait_bridges: vec![],
530            scaffold: None,
531            readme: None,
532            custom_files: Default::default(),
533            custom_modules: Default::default(),
534            custom_registrations: Default::default(),
535        };
536
537        let cfg_a = make_cfg("alpha", vec![a]);
538        let cfg_b = make_cfg("beta", vec![b]);
539
540        let hash_a = compute_crate_sources_hash(&cfg_a).unwrap();
541        let hash_b = compute_crate_sources_hash(&cfg_b).unwrap();
542
543        assert_ne!(
544            hash_a, hash_b,
545            "crates with disjoint sources must produce different hashes"
546        );
547    }
548
549    #[test]
550    fn crate_sources_hash_includes_source_crates() {
551        use crate::config::{SourceCrate, resolved::ResolvedCrateConfig};
552
553        let dir = tempdir().unwrap();
554        let a = write_file(dir.path(), "a.rs", "fn a() {}");
555        let b = write_file(dir.path(), "b.rs", "fn b() {}");
556
557        let make_cfg =
558            |sources: Vec<std::path::PathBuf>, source_crate_sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
559                let source_crates = if source_crate_sources.is_empty() {
560                    vec![]
561                } else {
562                    vec![SourceCrate {
563                        name: "extra-crate".to_string(),
564                        sources: source_crate_sources,
565                    }]
566                };
567                ResolvedCrateConfig {
568                    name: "test".to_string(),
569                    sources,
570                    source_crates,
571                    version_from: "Cargo.toml".to_string(),
572                    core_import: None,
573                    workspace_root: None,
574                    skip_core_import: false,
575                    error_type: None,
576                    error_constructor: None,
577                    features: vec![],
578                    path_mappings: Default::default(),
579                    extra_dependencies: Default::default(),
580                    auto_path_mappings: true,
581                    languages: vec![],
582                    python: None,
583                    node: None,
584                    ruby: None,
585                    php: None,
586                    elixir: None,
587                    wasm: None,
588                    ffi: None,
589                    go: None,
590                    java: None,
591                    dart: None,
592                    kotlin: None,
593                    swift: None,
594                    csharp: None,
595                    r: None,
596                    zig: None,
597                    exclude: Default::default(),
598                    include: Default::default(),
599                    output_paths: Default::default(),
600                    explicit_output: Default::default(),
601                    lint: Default::default(),
602                    test: Default::default(),
603                    setup: Default::default(),
604                    update: Default::default(),
605                    clean: Default::default(),
606                    build_commands: Default::default(),
607                    generate: Default::default(),
608                    generate_overrides: Default::default(),
609                    format: Default::default(),
610                    format_overrides: Default::default(),
611                    dto: Default::default(),
612                    tools: Default::default(),
613                    opaque_types: Default::default(),
614                    sync: None,
615                    publish: None,
616                    e2e: None,
617                    adapters: vec![],
618                    trait_bridges: vec![],
619                    scaffold: None,
620                    readme: None,
621                    custom_files: Default::default(),
622                    custom_modules: Default::default(),
623                    custom_registrations: Default::default(),
624                }
625            };
626
627        let cfg_without_extra = make_cfg(vec![a.clone()], vec![]);
628        let cfg_with_extra = make_cfg(vec![a.clone()], vec![b.clone()]);
629
630        let hash_without = compute_crate_sources_hash(&cfg_without_extra).unwrap();
631        let hash_with = compute_crate_sources_hash(&cfg_with_extra).unwrap();
632
633        assert_ne!(
634            hash_without, hash_with,
635            "adding a source_crate source file must change the hash"
636        );
637    }
638
639    #[test]
640    fn compute_crate_sources_hash_dedupes_overlapping_paths() {
641        use crate::config::{SourceCrate, resolved::ResolvedCrateConfig};
642        // A source path appearing in both `sources` and a `source_crates` entry
643        // (or repeated within `sources`) is hashed once: the hash equals the
644        // hash of the same crate config with the duplicates removed.
645        let dir = tempdir().unwrap();
646        let a = write_file(dir.path(), "a.rs", "fn a() {}");
647        let b = write_file(dir.path(), "b.rs", "fn b() {}");
648
649        let make_cfg =
650            |sources: Vec<std::path::PathBuf>, source_crate_sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
651                let source_crates = if source_crate_sources.is_empty() {
652                    vec![]
653                } else {
654                    vec![SourceCrate {
655                        name: "extra-crate".to_string(),
656                        sources: source_crate_sources,
657                    }]
658                };
659                ResolvedCrateConfig {
660                    name: "test".to_string(),
661                    sources,
662                    source_crates,
663                    version_from: "Cargo.toml".to_string(),
664                    core_import: None,
665                    workspace_root: None,
666                    skip_core_import: false,
667                    error_type: None,
668                    error_constructor: None,
669                    features: vec![],
670                    path_mappings: Default::default(),
671                    extra_dependencies: Default::default(),
672                    auto_path_mappings: true,
673                    languages: vec![],
674                    python: None,
675                    node: None,
676                    ruby: None,
677                    php: None,
678                    elixir: None,
679                    wasm: None,
680                    ffi: None,
681                    go: None,
682                    java: None,
683                    dart: None,
684                    kotlin: None,
685                    swift: None,
686                    csharp: None,
687                    r: None,
688                    zig: None,
689                    exclude: Default::default(),
690                    include: Default::default(),
691                    output_paths: Default::default(),
692                    explicit_output: Default::default(),
693                    lint: Default::default(),
694                    test: Default::default(),
695                    setup: Default::default(),
696                    update: Default::default(),
697                    clean: Default::default(),
698                    build_commands: Default::default(),
699                    generate: Default::default(),
700                    generate_overrides: Default::default(),
701                    format: Default::default(),
702                    format_overrides: Default::default(),
703                    dto: Default::default(),
704                    tools: Default::default(),
705                    opaque_types: Default::default(),
706                    sync: None,
707                    publish: None,
708                    e2e: None,
709                    adapters: vec![],
710                    trait_bridges: vec![],
711                    scaffold: None,
712                    readme: None,
713                    custom_files: Default::default(),
714                    custom_modules: Default::default(),
715                    custom_registrations: Default::default(),
716                }
717            };
718
719        // `sources` lists `a` twice and `source_crates` also references `a`.
720        let cfg_with_dupes = make_cfg(vec![a.clone(), a.clone(), b.clone()], vec![a.clone()]);
721        let cfg_unique = make_cfg(vec![a.clone(), b.clone()], vec![]);
722
723        let hash_dup = compute_crate_sources_hash(&cfg_with_dupes).unwrap();
724        let hash_unique = compute_crate_sources_hash(&cfg_unique).unwrap();
725        assert_eq!(
726            hash_dup, hash_unique,
727            "duplicate source paths must not affect the per-crate sources hash"
728        );
729    }
730
731    #[test]
732    fn compute_crate_sources_hash_is_order_independent() {
733        use crate::config::resolved::ResolvedCrateConfig;
734        // Reordering `sources` (or the entries inside a `source_crates` entry)
735        // does not change the per-crate sources hash.
736        let dir = tempdir().unwrap();
737        let a = write_file(dir.path(), "a.rs", "fn a() {}");
738        let b = write_file(dir.path(), "b.rs", "fn b() {}");
739        let c = write_file(dir.path(), "c.rs", "fn c() {}");
740
741        let make_cfg = |sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
742            ResolvedCrateConfig {
743                name: "test".to_string(),
744                sources,
745                source_crates: vec![],
746                version_from: "Cargo.toml".to_string(),
747                core_import: None,
748                workspace_root: None,
749                skip_core_import: false,
750                error_type: None,
751                error_constructor: None,
752                features: vec![],
753                path_mappings: Default::default(),
754                extra_dependencies: Default::default(),
755                auto_path_mappings: true,
756                languages: vec![],
757                python: None,
758                node: None,
759                ruby: None,
760                php: None,
761                elixir: None,
762                wasm: None,
763                ffi: None,
764                go: None,
765                java: None,
766                dart: None,
767                kotlin: None,
768                swift: None,
769                csharp: None,
770                r: None,
771                zig: None,
772                exclude: Default::default(),
773                include: Default::default(),
774                output_paths: Default::default(),
775                explicit_output: Default::default(),
776                lint: Default::default(),
777                test: Default::default(),
778                setup: Default::default(),
779                update: Default::default(),
780                clean: Default::default(),
781                build_commands: Default::default(),
782                generate: Default::default(),
783                generate_overrides: Default::default(),
784                format: Default::default(),
785                format_overrides: Default::default(),
786                dto: Default::default(),
787                tools: Default::default(),
788                opaque_types: Default::default(),
789                sync: None,
790                publish: None,
791                e2e: None,
792                adapters: vec![],
793                trait_bridges: vec![],
794                scaffold: None,
795                readme: None,
796                custom_files: Default::default(),
797                custom_modules: Default::default(),
798                custom_registrations: Default::default(),
799            }
800        };
801
802        let cfg1 = make_cfg(vec![a.clone(), b.clone(), c.clone()]);
803        let cfg2 = make_cfg(vec![c.clone(), a.clone(), b.clone()]);
804        let cfg3 = make_cfg(vec![b.clone(), c.clone(), a.clone()]);
805
806        let h1 = compute_crate_sources_hash(&cfg1).unwrap();
807        let h2 = compute_crate_sources_hash(&cfg2).unwrap();
808        let h3 = compute_crate_sources_hash(&cfg3).unwrap();
809        assert_eq!(h1, h2, "reordering sources must not change the hash");
810        assert_eq!(h2, h3, "reordering sources must not change the hash");
811    }
812
813    #[test]
814    fn file_hash_round_trip_via_inject_extract() {
815        // Simulate the full generate/verify cycle:
816        // 1. generate: compute hash from stripped content, inject into header
817        // 2. verify: read back, extract hash, recompute from content, compare
818        let sources_hash = "abc123";
819        let raw = "// auto-generated by alef\nfn body() {}\n";
820        let file_hash = compute_file_hash(sources_hash, raw);
821        let on_disk = inject_hash_line(raw, &file_hash);
822
823        let extracted = extract_hash(&on_disk).expect("hash line should be present");
824        let recomputed = compute_file_hash(sources_hash, &on_disk);
825        assert_eq!(extracted, file_hash);
826        assert_eq!(recomputed, file_hash);
827        assert_eq!(extracted, recomputed, "verify must reproduce the embedded hash");
828    }
829}