Skip to main content

alef_core/
hash.rs

1//! Content hashing and generated-file headers.
2//!
3//! Every file produced by alef gets a standard header that identifies it as
4//! generated, tells agents/developers how to fix issues, and embeds a blake3
5//! hash so `alef verify` can detect staleness without external state.
6//!
7//! # Hash semantics
8//!
9//! As of alef v0.10.1, the embedded `alef:hash:<hex>` value is a **per-file
10//! source+output fingerprint** produced by [`compute_file_hash`]:
11//!
12//! ```text
13//! blake3(sources_hash || file_content_without_hash_line)
14//! ```
15//!
16//! Where `sources_hash` is [`compute_sources_hash`] over the sorted Rust source
17//! files alef parses to build the IR. The hash deliberately does **not**
18//! include the alef version or `alef.toml`: any input change that affects the
19//! generated bytes is already reflected by hashing the file content itself,
20//! and excluding the alef version makes `alef verify` idempotent across
21//! `alef` upgrades — a CI run on a tagged repo continues to pass after the
22//! alef CLI is bumped, as long as the rust sources and emitted file contents
23//! are unchanged.
24//!
25//! `alef generate` finalises the embedded hash *after* downstream formatters
26//! (rustfmt, rubocop, dotnet format, spotless, oxfmt, mix format, php-cs-fixer,
27//! mix format, …) have run, so the embedded hash describes the actual
28//! on-disk byte-content. `alef verify` reads the file, strips the
29//! `alef:hash:` line, recomputes the same hash, and compares — no
30//! regeneration, no writes.
31//!
32//! Pre-v0.10.1 alef used a single input-deterministic hash that incorporated
33//! the alef CLI version, which forced every consumer repo to re-run
34//! `alef generate` after every alef bump even when nothing else changed.
35
36const HASH_PREFIX: &str = "alef:hash:";
37const DEFAULT_REGENERATE_COMMAND: &str = "alef generate";
38const DEFAULT_VERIFY_COMMAND: &str = "alef verify --exit-code";
39const DEFAULT_ISSUES_URL: &str = "https://github.com/kreuzberg-dev/alef";
40
41/// Comment style for the generated header.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum CommentStyle {
44    /// `// line comment`  (Rust, Go, Java, C#, TypeScript, C, PHP)
45    DoubleSlash,
46    /// `# line comment`   (Python, Ruby, Elixir, R, TOML, Shell, Makefile)
47    Hash,
48    /// `/* block comment */` (C headers)
49    Block,
50}
51
52/// Return the standard alef header as a comment block.
53///
54/// ```text
55/// // This file is auto-generated by alef — DO NOT EDIT.
56/// // To regenerate: alef generate
57/// // To verify freshness: alef verify --exit-code
58/// // Issues & docs: https://github.com/kreuzberg-dev/alef
59/// ```
60pub fn header(style: CommentStyle) -> String {
61    render_header(style, &default_header_body())
62}
63
64/// Return the standard alef header using metadata from a resolved crate config.
65pub fn header_for_config(style: CommentStyle, config: &crate::config::ResolvedCrateConfig) -> String {
66    let header_config = config.scaffold.as_ref().and_then(|s| s.generated_header.as_ref());
67    let body = match header_config {
68        Some(header) => {
69            let regenerate = header
70                .regenerate_command
71                .as_deref()
72                .unwrap_or(DEFAULT_REGENERATE_COMMAND);
73            let verify = header.verify_command.as_deref().unwrap_or(DEFAULT_VERIFY_COMMAND);
74            let issues_url = header.issues_url.as_deref().unwrap_or(DEFAULT_ISSUES_URL);
75            format!(
76                "This file is auto-generated by alef — DO NOT EDIT.\n\
77To regenerate: {regenerate}\n\
78To verify freshness: {verify}\n\
79Issues & docs: {issues_url}"
80            )
81        }
82        None => default_header_body(),
83    };
84    render_header(style, &body)
85}
86
87fn default_header_body() -> String {
88    format!(
89        "This file is auto-generated by alef — DO NOT EDIT.\n\
90To regenerate: {DEFAULT_REGENERATE_COMMAND}\n\
91To verify freshness: {DEFAULT_VERIFY_COMMAND}\n\
92Issues & docs: {DEFAULT_ISSUES_URL}"
93    )
94}
95
96fn render_header(style: CommentStyle, body: &str) -> String {
97    match style {
98        CommentStyle::DoubleSlash => body.lines().map(|l| format!("// {l}\n")).collect(),
99        CommentStyle::Hash => body.lines().map(|l| format!("# {l}\n")).collect(),
100        CommentStyle::Block => {
101            let mut out = String::from("/*\n");
102            for line in body.lines() {
103                out.push_str(&format!(" * {line}\n"));
104            }
105            out.push_str(" */\n");
106            out
107        }
108    }
109}
110
111/// The marker string that `inject_hash_line` and `extract_hash` look for.
112/// Every alef-generated header contains this on the first line.
113const HEADER_MARKER: &str = "auto-generated by alef";
114
115/// Blake3 hash of a content string, returned as hex.
116///
117/// Used by the IR / language caches and any caller that needs a hash of an
118/// in-memory string. **Not used for the embedded `alef:hash:` header** — that
119/// is computed by [`compute_file_hash`].
120pub fn hash_content(content: &str) -> String {
121    blake3::hash(content.as_bytes()).to_hex().to_string()
122}
123
124/// Compute a stable hash over the Rust source files that alef extracts.
125///
126/// This is the "source side" of the per-file verify hash. Sources are sorted
127/// by path so the hash is stable regardless of ordering in
128/// `alef.toml`'s `[crate].sources`. The path is mixed in alongside the
129/// content because the same byte-content at a different path produces
130/// different IR (the `rust_path` on extracted types differs).
131///
132/// Used by [`compute_file_hash`]; not by itself the value embedded in any
133/// file header.
134///
135/// # Errors
136/// Returns an error if any source file is missing or unreadable.
137pub fn compute_sources_hash(sources: &[std::path::PathBuf]) -> std::io::Result<String> {
138    let mut hasher = blake3::Hasher::new();
139    let mut sorted: Vec<&std::path::PathBuf> = sources.iter().collect();
140    sorted.sort();
141    for source in sorted {
142        let content = std::fs::read(source)?;
143        hasher.update(b"src\0");
144        hasher.update(source.to_string_lossy().as_bytes());
145        hasher.update(b"\0");
146        hasher.update(&content);
147    }
148    Ok(hasher.finalize().to_hex().to_string())
149}
150
151/// Compute a stable hex-encoded Blake3 hash over all Rust source files
152/// belonging to a [`crate::config::resolved::ResolvedCrateConfig`].
153///
154/// Returns a hex string so callers can feed the result directly to
155/// [`compute_file_hash`], matching [`compute_sources_hash`]'s return type.
156///
157/// The hash covers the union of:
158/// - `crate_cfg.sources` (direct sources on the crate)
159/// - every `source_crates[*].sources` entry
160///
161/// All paths are sorted before hashing so the result is independent of the
162/// order they appear in `alef.toml`.  The path string is mixed in alongside
163/// the file content because the same byte-content at a different path produces
164/// different IR (the `rust_path` on extracted types differs).
165///
166/// # Phase 3 migration note
167///
168/// Phase 3 callers should migrate from the per-file `compute_sources_hash` to
169/// this function when they have a `ResolvedCrateConfig` available, so that
170/// multi-source-crate workspaces produce a single stable hash across all
171/// contributing source files.
172///
173/// # Errors
174///
175/// Returns an error if any source file is missing or unreadable.
176pub fn compute_crate_sources_hash(crate_cfg: &crate::config::resolved::ResolvedCrateConfig) -> std::io::Result<String> {
177    let mut all_sources: Vec<&std::path::PathBuf> = Vec::new();
178
179    for src in &crate_cfg.sources {
180        all_sources.push(src);
181    }
182    for sc in &crate_cfg.source_crates {
183        for src in &sc.sources {
184            all_sources.push(src);
185        }
186    }
187
188    // Stable sort by path so the hash is order-independent.
189    all_sources.sort();
190    all_sources.dedup();
191
192    let mut hasher = blake3::Hasher::new();
193    for source in all_sources {
194        let content = std::fs::read(source)?;
195        hasher.update(b"src\0");
196        hasher.update(source.to_string_lossy().as_bytes());
197        hasher.update(b"\0");
198        hasher.update(&content);
199    }
200    Ok(hasher.finalize().to_hex().to_string())
201}
202
203/// Compute the per-file verify hash that alef embeds in each generated file.
204///
205/// `sources_hash` comes from [`compute_sources_hash`]. `content` is the file
206/// content; any pre-existing `alef:hash:` line is stripped before hashing so
207/// the function is idempotent — calling it on file content that already has a
208/// hash line returns the same value as calling it on the same content with no
209/// hash line. This makes the verify path symmetric with the generate path:
210///
211/// - **Generate**: write the file, run formatters, then call this with the
212///   on-disk content and inject the result.
213/// - **Verify**: read the file, extract the existing hash line, call this
214///   with the on-disk content, compare.
215pub fn compute_file_hash(sources_hash: &str, content: &str) -> String {
216    let stripped = strip_hash_line(content);
217    let mut hasher = blake3::Hasher::new();
218    hasher.update(b"sources\0");
219    hasher.update(sources_hash.as_bytes());
220    hasher.update(b"\0content\0");
221    hasher.update(stripped.as_bytes());
222    hasher.finalize().to_hex().to_string()
223}
224
225/// Inject an `alef:hash:<hex>` line immediately after the first header marker
226/// line found in the first 10 lines.  The comment syntax is inferred from the
227/// marker line itself.
228///
229/// If no marker line is found, the content is returned unchanged.
230pub fn inject_hash_line(content: &str, hash: &str) -> String {
231    let mut result = String::with_capacity(content.len() + 80);
232    let mut injected = false;
233
234    for (i, line) in content.lines().enumerate() {
235        result.push_str(line);
236        result.push('\n');
237
238        if !injected && i < 10 && line.contains(HEADER_MARKER) {
239            let trimmed = line.trim();
240            let hash_line = if trimmed.starts_with("<!--") {
241                // XML comment: inject hash line as XML comment
242                format!("<!-- {HASH_PREFIX}{hash} -->")
243            } else if trimmed.starts_with("//") {
244                format!("// {HASH_PREFIX}{hash}")
245            } else if trimmed.starts_with('#') {
246                format!("# {HASH_PREFIX}{hash}")
247            } else if trimmed.starts_with("/*") || trimmed.starts_with(" *") || trimmed.ends_with("*/") {
248                format!(" * {HASH_PREFIX}{hash}")
249            } else {
250                format!("// {HASH_PREFIX}{hash}")
251            };
252            result.push_str(&hash_line);
253            result.push('\n');
254            injected = true;
255        }
256    }
257
258    // Preserve original trailing-newline behavior.
259    if !content.ends_with('\n') && result.ends_with('\n') {
260        result.pop();
261    }
262
263    result
264}
265
266/// Extract the hash from an `alef:hash:<hex>` token in the first 10 lines.
267pub fn extract_hash(content: &str) -> Option<String> {
268    for (i, line) in content.lines().enumerate() {
269        if i >= 10 {
270            break;
271        }
272        if let Some(pos) = line.find(HASH_PREFIX) {
273            let rest = &line[pos + HASH_PREFIX.len()..];
274            // Trim trailing comment closers and whitespace.
275            let hex = rest.trim().trim_end_matches("*/").trim_end_matches("-->").trim();
276            if !hex.is_empty() {
277                return Some(hex.to_string());
278            }
279        }
280    }
281    None
282}
283
284/// Strip the `alef:hash:` line from content (for fallback comparison).
285pub fn strip_hash_line(content: &str) -> String {
286    let mut result = String::with_capacity(content.len());
287    for line in content.lines() {
288        if line.contains(HASH_PREFIX) {
289            continue;
290        }
291        result.push_str(line);
292        result.push('\n');
293    }
294    // Preserve original trailing-newline behavior.
295    if !content.ends_with('\n') && result.ends_with('\n') {
296        result.pop();
297    }
298    result
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    #[test]
306    fn test_header_double_slash() {
307        let h = header(CommentStyle::DoubleSlash);
308        assert!(h.contains("// This file is auto-generated by alef"));
309        assert!(h.contains("// Issues & docs: https://github.com/kreuzberg-dev/alef"));
310    }
311
312    #[test]
313    fn test_header_for_config_uses_configured_metadata() {
314        let cfg: crate::config::NewAlefConfig = toml::from_str(
315            r#"
316[workspace]
317languages = ["python"]
318
319[workspace.generated_header]
320issues_url = "https://docs.example.invalid/alef"
321regenerate_command = "task generate"
322verify_command = "task verify"
323
324[[crates]]
325name = "demo"
326sources = ["src/lib.rs"]
327"#,
328        )
329        .unwrap();
330        let resolved = cfg.resolve().unwrap().remove(0);
331
332        let h = header_for_config(CommentStyle::DoubleSlash, &resolved);
333
334        assert!(h.contains("// To regenerate: task generate"));
335        assert!(h.contains("// To verify freshness: task verify"));
336        assert!(h.contains("// Issues & docs: https://docs.example.invalid/alef"));
337    }
338
339    #[test]
340    fn test_header_hash() {
341        let h = header(CommentStyle::Hash);
342        assert!(h.contains("# This file is auto-generated by alef"));
343    }
344
345    #[test]
346    fn test_header_block() {
347        let h = header(CommentStyle::Block);
348        assert!(h.starts_with("/*\n"));
349        assert!(h.contains(" * This file is auto-generated by alef"));
350        assert!(h.ends_with(" */\n"));
351    }
352
353    #[test]
354    fn test_inject_and_extract_rust() {
355        let h = header(CommentStyle::DoubleSlash);
356        let content = format!("{h}use foo;\n");
357        let hash = hash_content(&content);
358        let injected = inject_hash_line(&content, &hash);
359        assert!(injected.contains(HASH_PREFIX));
360        assert_eq!(extract_hash(&injected), Some(hash));
361    }
362
363    #[test]
364    fn test_inject_and_extract_python() {
365        let h = header(CommentStyle::Hash);
366        let content = format!("{h}import foo\n");
367        let hash = hash_content(&content);
368        let injected = inject_hash_line(&content, &hash);
369        assert!(injected.contains(&format!("# {HASH_PREFIX}")));
370        assert_eq!(extract_hash(&injected), Some(hash));
371    }
372
373    #[test]
374    fn test_inject_and_extract_c_block() {
375        let h = header(CommentStyle::Block);
376        let content = format!("{h}#include <stdio.h>\n");
377        let hash = hash_content(&content);
378        let injected = inject_hash_line(&content, &hash);
379        assert!(injected.contains(HASH_PREFIX));
380        assert_eq!(extract_hash(&injected), Some(hash));
381    }
382
383    #[test]
384    fn test_inject_php_line2() {
385        let h = header(CommentStyle::DoubleSlash);
386        let content = format!("<?php\n{h}namespace Foo;\n");
387        let hash = hash_content(&content);
388        let injected = inject_hash_line(&content, &hash);
389        let lines: Vec<&str> = injected.lines().collect();
390        assert_eq!(lines[0], "<?php");
391        assert!(lines[1].contains(HEADER_MARKER));
392        assert!(lines.iter().any(|l| l.contains(HASH_PREFIX)));
393        assert_eq!(extract_hash(&injected), Some(hash));
394    }
395
396    #[test]
397    fn test_no_header_returns_unchanged() {
398        let content = "fn main() {}\n";
399        let injected = inject_hash_line(content, "abc123");
400        assert_eq!(injected, content);
401        assert_eq!(extract_hash(&injected), None);
402    }
403
404    #[test]
405    fn test_strip_hash_line() {
406        let content = "// auto-generated by alef\n// alef:hash:abc123\nuse foo;\n";
407        let stripped = strip_hash_line(content);
408        assert_eq!(stripped, "// auto-generated by alef\nuse foo;\n");
409    }
410
411    #[test]
412    fn test_roundtrip() {
413        let h = header(CommentStyle::Hash);
414        let original = format!("{h}import sys\n");
415        let hash = hash_content(&original);
416        let injected = inject_hash_line(&original, &hash);
417        let stripped = strip_hash_line(&injected);
418        assert_eq!(stripped, original);
419        assert_eq!(hash_content(&stripped), hash);
420    }
421
422    // ----- compute_sources_hash / compute_file_hash --------------------------
423
424    use std::path::{Path, PathBuf};
425    use tempfile::tempdir;
426
427    fn write_file(dir: &Path, name: &str, content: &str) -> PathBuf {
428        let path = dir.join(name);
429        std::fs::write(&path, content).unwrap();
430        path
431    }
432
433    #[test]
434    fn sources_hash_changes_when_path_changes_even_if_content_same() {
435        let dir = tempdir().unwrap();
436        let s_a = write_file(dir.path(), "a.rs", "fn a() {}");
437        std::fs::create_dir_all(dir.path().join("moved")).unwrap();
438        let s_b = write_file(dir.path(), "moved/a.rs", "fn a() {}");
439        let h_a = compute_sources_hash(&[s_a]).unwrap();
440        let h_b = compute_sources_hash(&[s_b]).unwrap();
441        assert_ne!(
442            h_a, h_b,
443            "same content at a different path can produce different IR (rust_path differs)"
444        );
445    }
446
447    #[test]
448    fn sources_hash_errors_on_missing_source() {
449        let dir = tempdir().unwrap();
450        let bogus = dir.path().join("does-not-exist.rs");
451        assert!(compute_sources_hash(&[bogus]).is_err());
452    }
453
454    #[test]
455    fn sources_hash_stable_across_runs() {
456        let dir = tempdir().unwrap();
457        let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
458        let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
459        let sources = vec![s1, s2];
460        let h1 = compute_sources_hash(&sources).unwrap();
461        let h2 = compute_sources_hash(&sources).unwrap();
462        assert_eq!(h1, h2);
463    }
464
465    #[test]
466    fn sources_hash_path_order_independent() {
467        let dir = tempdir().unwrap();
468        let s1 = write_file(dir.path(), "a.rs", "fn a() {}");
469        let s2 = write_file(dir.path(), "b.rs", "fn b() {}");
470        let h_forward = compute_sources_hash(&[s1.clone(), s2.clone()]).unwrap();
471        let h_reverse = compute_sources_hash(&[s2, s1]).unwrap();
472        assert_eq!(h_forward, h_reverse);
473    }
474
475    #[test]
476    fn sources_hash_changes_with_content() {
477        let dir = tempdir().unwrap();
478        let s = write_file(dir.path(), "a.rs", "fn a() {}");
479        let h_before = compute_sources_hash(std::slice::from_ref(&s)).unwrap();
480        std::fs::write(&s, "fn a() { let _ = 1; }").unwrap();
481        let h_after = compute_sources_hash(&[s]).unwrap();
482        assert_ne!(h_before, h_after);
483    }
484
485    #[test]
486    fn file_hash_idempotent_under_strip_hash_line() {
487        // The defining property: hash(content with hash line) == hash(content without hash line).
488        // This is what makes the verify path symmetric with the generate path.
489        let sources_hash = "abc123";
490        let bare = "// auto-generated by alef\nfn body() {}\n";
491        let with_line = "// auto-generated by alef\n// alef:hash:deadbeef\nfn body() {}\n";
492
493        let h1 = compute_file_hash(sources_hash, bare);
494        let h2 = compute_file_hash(sources_hash, with_line);
495        assert_eq!(h1, h2, "hash must ignore an existing alef:hash: line");
496    }
497
498    #[test]
499    fn file_hash_changes_when_sources_change() {
500        let content = "// auto-generated by alef\nfn body() {}\n";
501        let h_a = compute_file_hash("sources_a", content);
502        let h_b = compute_file_hash("sources_b", content);
503        assert_ne!(h_a, h_b);
504    }
505
506    #[test]
507    fn file_hash_changes_when_content_changes() {
508        let sources_hash = "abc123";
509        let h_a = compute_file_hash(sources_hash, "fn a() {}\n");
510        let h_b = compute_file_hash(sources_hash, "fn b() {}\n");
511        assert_ne!(h_a, h_b);
512    }
513
514    #[test]
515    fn file_hash_independent_of_alef_version() {
516        // Idempotency property: the hash is purely a function of (sources, content).
517        // Bumping the alef CLI version must not change it. Encoded by the type
518        // signature — there is no version parameter — but make it explicit so
519        // a future regression that re-introduces a version dimension is caught.
520        let h = compute_file_hash("sources_hash", "fn a() {}\n");
521        assert_eq!(h.len(), 64, "blake3 hex output is 64 chars");
522    }
523
524    #[test]
525    fn crate_sources_hash_differs_across_crates_with_disjoint_sources() {
526        use crate::config::resolved::ResolvedCrateConfig;
527
528        let dir = tempdir().unwrap();
529        let a = write_file(dir.path(), "a.rs", "fn a() {}");
530        let b = write_file(dir.path(), "b.rs", "fn b() {}");
531
532        // Build two minimal ResolvedCrateConfig values using the builder pattern
533        // isn't available, so we construct via serde round-trip from JSON to avoid
534        // requiring Default on the struct.  Instead, use helper that constructs the
535        // minimal required fields directly.
536        let make_cfg = |name: &str, sources: Vec<std::path::PathBuf>| ResolvedCrateConfig {
537            name: name.to_string(),
538            sources,
539            source_crates: vec![],
540            version_from: "Cargo.toml".to_string(),
541            core_import: None,
542            workspace_root: None,
543            skip_core_import: false,
544            error_type: None,
545            error_constructor: None,
546            features: vec![],
547            path_mappings: Default::default(),
548            extra_dependencies: Default::default(),
549            auto_path_mappings: true,
550            languages: vec![],
551            python: None,
552            node: None,
553            ruby: None,
554            php: None,
555            elixir: None,
556            wasm: None,
557            ffi: None,
558            go: None,
559            java: None,
560            dart: None,
561            kotlin: None,
562            kotlin_android: None,
563            jni: None,
564            swift: None,
565            gleam: None,
566            csharp: None,
567            r: None,
568            zig: None,
569            exclude: Default::default(),
570            include: Default::default(),
571            output_paths: Default::default(),
572            explicit_output: Default::default(),
573            lint: Default::default(),
574            test: Default::default(),
575            setup: Default::default(),
576            update: Default::default(),
577            clean: Default::default(),
578            build_commands: Default::default(),
579            generate: Default::default(),
580            generate_overrides: Default::default(),
581            format: Default::default(),
582            format_overrides: Default::default(),
583            dto: Default::default(),
584            tools: Default::default(),
585            opaque_types: Default::default(),
586            sync: None,
587            citation: None,
588            publish: None,
589            e2e: None,
590            adapters: vec![],
591            trait_bridges: vec![],
592            scaffold: None,
593            readme: None,
594            custom_files: Default::default(),
595            custom_modules: Default::default(),
596            custom_registrations: Default::default(),
597        };
598
599        let cfg_a = make_cfg("alpha", vec![a]);
600        let cfg_b = make_cfg("beta", vec![b]);
601
602        let hash_a = compute_crate_sources_hash(&cfg_a).unwrap();
603        let hash_b = compute_crate_sources_hash(&cfg_b).unwrap();
604
605        assert_ne!(
606            hash_a, hash_b,
607            "crates with disjoint sources must produce different hashes"
608        );
609    }
610
611    #[test]
612    fn crate_sources_hash_includes_source_crates() {
613        use crate::config::{SourceCrate, resolved::ResolvedCrateConfig};
614
615        let dir = tempdir().unwrap();
616        let a = write_file(dir.path(), "a.rs", "fn a() {}");
617        let b = write_file(dir.path(), "b.rs", "fn b() {}");
618
619        let make_cfg =
620            |sources: Vec<std::path::PathBuf>, source_crate_sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
621                let source_crates = if source_crate_sources.is_empty() {
622                    vec![]
623                } else {
624                    vec![SourceCrate {
625                        name: "extra-crate".to_string(),
626                        sources: source_crate_sources,
627                    }]
628                };
629                ResolvedCrateConfig {
630                    name: "test".to_string(),
631                    sources,
632                    source_crates,
633                    version_from: "Cargo.toml".to_string(),
634                    core_import: None,
635                    workspace_root: None,
636                    skip_core_import: false,
637                    error_type: None,
638                    error_constructor: None,
639                    features: vec![],
640                    path_mappings: Default::default(),
641                    extra_dependencies: Default::default(),
642                    auto_path_mappings: true,
643                    languages: vec![],
644                    python: None,
645                    node: None,
646                    ruby: None,
647                    php: None,
648                    elixir: None,
649                    wasm: None,
650                    ffi: None,
651                    go: None,
652                    java: None,
653                    dart: None,
654                    kotlin: None,
655                    kotlin_android: None,
656                    jni: None,
657                    swift: None,
658                    gleam: None,
659                    csharp: None,
660                    r: None,
661                    zig: None,
662                    exclude: Default::default(),
663                    include: Default::default(),
664                    output_paths: Default::default(),
665                    explicit_output: Default::default(),
666                    lint: Default::default(),
667                    test: Default::default(),
668                    setup: Default::default(),
669                    update: Default::default(),
670                    clean: Default::default(),
671                    build_commands: Default::default(),
672                    generate: Default::default(),
673                    generate_overrides: Default::default(),
674                    format: Default::default(),
675                    format_overrides: Default::default(),
676                    dto: Default::default(),
677                    tools: Default::default(),
678                    opaque_types: Default::default(),
679                    sync: None,
680                    citation: None,
681                    publish: None,
682                    e2e: None,
683                    adapters: vec![],
684                    trait_bridges: vec![],
685                    scaffold: None,
686                    readme: None,
687                    custom_files: Default::default(),
688                    custom_modules: Default::default(),
689                    custom_registrations: Default::default(),
690                }
691            };
692
693        let cfg_without_extra = make_cfg(vec![a.clone()], vec![]);
694        let cfg_with_extra = make_cfg(vec![a.clone()], vec![b.clone()]);
695
696        let hash_without = compute_crate_sources_hash(&cfg_without_extra).unwrap();
697        let hash_with = compute_crate_sources_hash(&cfg_with_extra).unwrap();
698
699        assert_ne!(
700            hash_without, hash_with,
701            "adding a source_crate source file must change the hash"
702        );
703    }
704
705    #[test]
706    fn compute_crate_sources_hash_dedupes_overlapping_paths() {
707        use crate::config::{SourceCrate, resolved::ResolvedCrateConfig};
708        // A source path appearing in both `sources` and a `source_crates` entry
709        // (or repeated within `sources`) is hashed once: the hash equals the
710        // hash of the same crate config with the duplicates removed.
711        let dir = tempdir().unwrap();
712        let a = write_file(dir.path(), "a.rs", "fn a() {}");
713        let b = write_file(dir.path(), "b.rs", "fn b() {}");
714
715        let make_cfg =
716            |sources: Vec<std::path::PathBuf>, source_crate_sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
717                let source_crates = if source_crate_sources.is_empty() {
718                    vec![]
719                } else {
720                    vec![SourceCrate {
721                        name: "extra-crate".to_string(),
722                        sources: source_crate_sources,
723                    }]
724                };
725                ResolvedCrateConfig {
726                    name: "test".to_string(),
727                    sources,
728                    source_crates,
729                    version_from: "Cargo.toml".to_string(),
730                    core_import: None,
731                    workspace_root: None,
732                    skip_core_import: false,
733                    error_type: None,
734                    error_constructor: None,
735                    features: vec![],
736                    path_mappings: Default::default(),
737                    extra_dependencies: Default::default(),
738                    auto_path_mappings: true,
739                    languages: vec![],
740                    python: None,
741                    node: None,
742                    ruby: None,
743                    php: None,
744                    elixir: None,
745                    wasm: None,
746                    ffi: None,
747                    go: None,
748                    java: None,
749                    dart: None,
750                    kotlin: None,
751                    kotlin_android: None,
752                    jni: None,
753                    swift: None,
754                    gleam: None,
755                    csharp: None,
756                    r: None,
757                    zig: None,
758                    exclude: Default::default(),
759                    include: Default::default(),
760                    output_paths: Default::default(),
761                    explicit_output: Default::default(),
762                    lint: Default::default(),
763                    test: Default::default(),
764                    setup: Default::default(),
765                    update: Default::default(),
766                    clean: Default::default(),
767                    build_commands: Default::default(),
768                    generate: Default::default(),
769                    generate_overrides: Default::default(),
770                    format: Default::default(),
771                    format_overrides: Default::default(),
772                    dto: Default::default(),
773                    tools: Default::default(),
774                    opaque_types: Default::default(),
775                    sync: None,
776                    citation: None,
777                    publish: None,
778                    e2e: None,
779                    adapters: vec![],
780                    trait_bridges: vec![],
781                    scaffold: None,
782                    readme: None,
783                    custom_files: Default::default(),
784                    custom_modules: Default::default(),
785                    custom_registrations: Default::default(),
786                }
787            };
788
789        // `sources` lists `a` twice and `source_crates` also references `a`.
790        let cfg_with_dupes = make_cfg(vec![a.clone(), a.clone(), b.clone()], vec![a.clone()]);
791        let cfg_unique = make_cfg(vec![a.clone(), b.clone()], vec![]);
792
793        let hash_dup = compute_crate_sources_hash(&cfg_with_dupes).unwrap();
794        let hash_unique = compute_crate_sources_hash(&cfg_unique).unwrap();
795        assert_eq!(
796            hash_dup, hash_unique,
797            "duplicate source paths must not affect the per-crate sources hash"
798        );
799    }
800
801    #[test]
802    fn compute_crate_sources_hash_is_order_independent() {
803        use crate::config::resolved::ResolvedCrateConfig;
804        // Reordering `sources` (or the entries inside a `source_crates` entry)
805        // does not change the per-crate sources hash.
806        let dir = tempdir().unwrap();
807        let a = write_file(dir.path(), "a.rs", "fn a() {}");
808        let b = write_file(dir.path(), "b.rs", "fn b() {}");
809        let c = write_file(dir.path(), "c.rs", "fn c() {}");
810
811        let make_cfg = |sources: Vec<std::path::PathBuf>| -> ResolvedCrateConfig {
812            ResolvedCrateConfig {
813                name: "test".to_string(),
814                sources,
815                source_crates: vec![],
816                version_from: "Cargo.toml".to_string(),
817                core_import: None,
818                workspace_root: None,
819                skip_core_import: false,
820                error_type: None,
821                error_constructor: None,
822                features: vec![],
823                path_mappings: Default::default(),
824                extra_dependencies: Default::default(),
825                auto_path_mappings: true,
826                languages: vec![],
827                python: None,
828                node: None,
829                ruby: None,
830                php: None,
831                elixir: None,
832                wasm: None,
833                ffi: None,
834                go: None,
835                java: None,
836                dart: None,
837                kotlin: None,
838                kotlin_android: None,
839                jni: None,
840                swift: None,
841                gleam: None,
842                csharp: None,
843                r: None,
844                zig: None,
845                exclude: Default::default(),
846                include: Default::default(),
847                output_paths: Default::default(),
848                explicit_output: Default::default(),
849                lint: Default::default(),
850                test: Default::default(),
851                setup: Default::default(),
852                update: Default::default(),
853                clean: Default::default(),
854                build_commands: Default::default(),
855                generate: Default::default(),
856                generate_overrides: Default::default(),
857                format: Default::default(),
858                format_overrides: Default::default(),
859                dto: Default::default(),
860                tools: Default::default(),
861                opaque_types: Default::default(),
862                sync: None,
863                citation: None,
864                publish: None,
865                e2e: None,
866                adapters: vec![],
867                trait_bridges: vec![],
868                scaffold: None,
869                readme: None,
870                custom_files: Default::default(),
871                custom_modules: Default::default(),
872                custom_registrations: Default::default(),
873            }
874        };
875
876        let cfg1 = make_cfg(vec![a.clone(), b.clone(), c.clone()]);
877        let cfg2 = make_cfg(vec![c.clone(), a.clone(), b.clone()]);
878        let cfg3 = make_cfg(vec![b.clone(), c.clone(), a.clone()]);
879
880        let h1 = compute_crate_sources_hash(&cfg1).unwrap();
881        let h2 = compute_crate_sources_hash(&cfg2).unwrap();
882        let h3 = compute_crate_sources_hash(&cfg3).unwrap();
883        assert_eq!(h1, h2, "reordering sources must not change the hash");
884        assert_eq!(h2, h3, "reordering sources must not change the hash");
885    }
886
887    #[test]
888    fn file_hash_round_trip_via_inject_extract() {
889        // Simulate the full generate/verify cycle:
890        // 1. generate: compute hash from stripped content, inject into header
891        // 2. verify: read back, extract hash, recompute from content, compare
892        let sources_hash = "abc123";
893        let raw = "// auto-generated by alef\nfn body() {}\n";
894        let file_hash = compute_file_hash(sources_hash, raw);
895        let on_disk = inject_hash_line(raw, &file_hash);
896
897        let extracted = extract_hash(&on_disk).expect("hash line should be present");
898        let recomputed = compute_file_hash(sources_hash, &on_disk);
899        assert_eq!(extracted, file_hash);
900        assert_eq!(recomputed, file_hash);
901        assert_eq!(extracted, recomputed, "verify must reproduce the embedded hash");
902    }
903}