alef 0.25.12

Opinionated polyglot binding generator for Rust libraries
Documentation
//! Content hashing and generated-file headers.
//!
//! Every file produced by alef gets a standard header that identifies it as
//! generated, tells agents/developers how to fix issues, and embeds a blake3
//! hash so `alef verify` can detect staleness without external state.
//!
//! # Hash semantics
//!
//! As of alef v0.21.0, the embedded `alef:hash:<hex>` value is a
//! **generation-inputs fingerprint** produced by [`compute_inputs_hash`]:
//!
//! ```text
//! blake3(
//!   "alef:inputs\0"
//!   || ALEF_REV || "\0"
//!   || sources_hash || "\0"
//!   || alef_toml_bytes
//! )
//! ```
//!
//! Where `sources_hash` is [`compute_sources_hash`] over the sorted Rust source
//! files alef parses to build the IR, and `alef_toml_bytes` is the raw content
//! of the `alef.toml` file. The hash answers **"was this file generated from the
//! current alef inputs?"** — post-generation formatter drift (rustfmt, ruff,
//! rumdl-fmt, oxfmt, etc.) is irrelevant because the hash is not derived from
//! the emitted file content.
//!
//! `alef verify` re-derives the same inputs hash from the current `alef.toml`
//! and Rust sources, embeds nothing from the on-disk file, and compares to the
//! embedded line — pure read+compare, no regeneration, no writes.
//!
//! # Migration from v0.10.1 — v0.20.x
//!
//! Pre-v0.21.0 alef embedded `blake3(sources_hash || file_content_without_hash_line)`.
//! Any file regenerated with v0.21.0+ will carry a new hash value; `alef verify`
//! from v0.21.0+ rejects old-format hashes. Run `alef generate` once after
//! upgrading to stamp all files with the new inputs hash.

const HASH_PREFIX: &str = "alef:hash:";
const DEFAULT_REGENERATE_COMMAND: &str = "alef generate";
const DEFAULT_VERIFY_COMMAND: &str = "alef verify --exit-code";

/// Comment style for the generated header.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CommentStyle {
    /// `// line comment`  (Rust, Go, Java, C#, TypeScript, C, PHP)
    DoubleSlash,
    /// `# line comment`   (Python, Ruby, Elixir, R, TOML, Shell, Makefile)
    Hash,
    /// `/* block comment */` (C headers)
    Block,
}

/// Return the standard alef header as a comment block.
///
/// ```text
/// // This file is auto-generated by alef — DO NOT EDIT.
/// // To regenerate: alef generate
/// // To verify freshness: alef verify --exit-code
/// ```
pub fn header(style: CommentStyle) -> String {
    render_header(style, &default_header_body())
}

/// Return the standard alef header using metadata from a resolved crate config.
pub fn header_for_config(style: CommentStyle, config: &crate::core::config::ResolvedCrateConfig) -> String {
    let header_config = config.scaffold.as_ref().and_then(|s| s.generated_header.as_ref());
    let body = match header_config {
        Some(header) => {
            let regenerate = header
                .regenerate_command
                .as_deref()
                .unwrap_or(DEFAULT_REGENERATE_COMMAND);
            let verify = header.verify_command.as_deref().unwrap_or(DEFAULT_VERIFY_COMMAND);
            let issues_url = header.issues_url.as_deref().or_else(|| configured_header_url(config));
            header_body(regenerate, verify, issues_url)
        }
        None => header_body(
            DEFAULT_REGENERATE_COMMAND,
            DEFAULT_VERIFY_COMMAND,
            configured_header_url(config),
        ),
    };
    render_header(style, &body)
}

fn header_body(regenerate: &str, verify: &str, issues_url: Option<&str>) -> String {
    let mut body = format!(
        "This file is auto-generated by alef — DO NOT EDIT.\n\
To regenerate: {regenerate}\n\
To verify freshness: {verify}"
    );
    if let Some(url) = issues_url {
        body.push_str(&format!("\nIssues & docs: {url}"));
    }
    body
}

fn configured_header_url(config: &crate::core::config::ResolvedCrateConfig) -> Option<&str> {
    config
        .package_metadata
        .as_ref()
        .and_then(|m| m.issues.as_deref().or(m.documentation.as_deref()))
}

fn default_header_body() -> String {
    header_body(DEFAULT_REGENERATE_COMMAND, DEFAULT_VERIFY_COMMAND, None)
}

fn render_header(style: CommentStyle, body: &str) -> String {
    match style {
        CommentStyle::DoubleSlash => body.lines().map(|l| format!("// {l}\n")).collect(),
        CommentStyle::Hash => body.lines().map(|l| format!("# {l}\n")).collect(),
        CommentStyle::Block => {
            let mut out = String::from("/*\n");
            for line in body.lines() {
                out.push_str(&format!(" * {line}\n"));
            }
            out.push_str(" */\n");
            out
        }
    }
}

/// The marker string that `inject_hash_line` and `extract_hash` look for.
/// Every alef-generated header contains this on the first line.
/// Recognizes both "auto-generated by alef" (standard header) and
/// "Generated by alef" (custom headers in Swift, Kotlin, Dart, Gleam, Zig, JNI).
const HEADER_MARKER: &str = "auto-generated by alef";
const ALT_HEADER_MARKER: &str = "Generated by alef";

/// Blake3 hash of a content string, returned as hex.
///
/// Used by the IR / language caches and any caller that needs a hash of an
/// in-memory string. **Not used for the embedded `alef:hash:` header** — that
/// is computed by [`compute_file_hash`].
pub fn hash_content(content: &str) -> String {
    blake3::hash(content.as_bytes()).to_hex().to_string()
}

/// Compute a stable hash over the Rust source files that alef extracts.
///
/// This is the "source side" of the per-file verify hash. Sources are sorted
/// by path so the hash is stable regardless of ordering in
/// `alef.toml`'s `[crate].sources`. The path is mixed in alongside the
/// content because the same byte-content at a different path produces
/// different IR (the `rust_path` on extracted types differs).
///
/// Used by [`compute_file_hash`]; not by itself the value embedded in any
/// file header.
///
/// # Errors
/// Returns an error if any source file is missing or unreadable.
pub fn compute_sources_hash(sources: &[std::path::PathBuf]) -> std::io::Result<String> {
    let mut hasher = blake3::Hasher::new();
    let mut sorted: Vec<&std::path::PathBuf> = sources.iter().collect();
    sorted.sort();
    for source in sorted {
        let content = std::fs::read(source)?;
        hasher.update(b"src\0");
        hasher.update(source.to_string_lossy().as_bytes());
        hasher.update(b"\0");
        hasher.update(&content);
    }
    Ok(hasher.finalize().to_hex().to_string())
}

/// Compute a stable hex-encoded Blake3 hash over all Rust source files
/// belonging to a [`crate::core::config::resolved::ResolvedCrateConfig`].
///
/// Returns a hex string so callers can feed the result directly to
/// [`compute_file_hash`], matching [`compute_sources_hash`]'s return type.
///
/// The hash covers the union of:
/// - `crate_cfg.sources` (direct sources on the crate)
/// - every `source_crates[*].sources` entry
///
/// All paths are sorted before hashing so the result is independent of the
/// order they appear in `alef.toml`.  The path string is mixed in alongside
/// the file content because the same byte-content at a different path produces
/// different IR (the `rust_path` on extracted types differs).
///
/// # Phase 3 migration note
///
/// Phase 3 callers should migrate from the per-file `compute_sources_hash` to
/// this function when they have a `ResolvedCrateConfig` available, so that
/// multi-source-crate workspaces produce a single stable hash across all
/// contributing source files.
///
/// # Errors
///
/// Returns an error if any source file is missing or unreadable.
pub fn compute_crate_sources_hash(
    crate_cfg: &crate::core::config::resolved::ResolvedCrateConfig,
) -> std::io::Result<String> {
    let mut all_sources: Vec<&std::path::PathBuf> = Vec::new();

    for src in &crate_cfg.sources {
        all_sources.push(src);
    }
    for sc in &crate_cfg.source_crates {
        for src in &sc.sources {
            all_sources.push(src);
        }
    }

    // Stable sort by path so the hash is order-independent.
    all_sources.sort();
    all_sources.dedup();

    let mut hasher = blake3::Hasher::new();
    for source in all_sources {
        let content = std::fs::read(source)?;
        hasher.update(b"src\0");
        hasher.update(source.to_string_lossy().as_bytes());
        hasher.update(b"\0");
        hasher.update(&content);
    }
    Ok(hasher.finalize().to_hex().to_string())
}

/// Compute the generation-inputs hash that alef embeds in each generated file.
///
/// The hash covers the alef revision, the Rust source fingerprint, and the
/// raw `alef.toml` bytes. It does **not** include the emitted file content, so
/// post-generation formatter rewrites (rustfmt, ruff, rumdl-fmt, oxfmt, …)
/// never invalidate the embedded hash.
///
/// - **Generate**: compute once per run, inject into every generated file header.
/// - **Verify**: re-derive from the current inputs, compare to the embedded line.
///   No file content is read or hashed — pure input comparison.
///
/// # Arguments
///
/// * `sources_hash` — output of [`compute_sources_hash`] or
///   [`compute_crate_sources_hash`] for the crate being generated.
/// * `alef_toml_bytes` — raw bytes of the `alef.toml` config file. Pass an
///   empty slice when the config path is unavailable (e.g. in tests); the hash
///   will still change when `sources_hash` or `ALEF_REV` changes.
pub fn compute_inputs_hash(sources_hash: &str, alef_toml_bytes: &[u8]) -> String {
    let alef_rev = crate::core::template_versions::precommit::ALEF_REV;
    let mut hasher = blake3::Hasher::new();
    hasher.update(b"alef:inputs\0");
    hasher.update(alef_rev.as_bytes());
    hasher.update(b"\0");
    hasher.update(sources_hash.as_bytes());
    hasher.update(b"\0");
    hasher.update(alef_toml_bytes);
    hasher.finalize().to_hex().to_string()
}

/// Compute the per-file verify hash that alef embeds in each generated file.
///
/// Kept for internal use by tests that verify the old content-derived hash
/// semantics. New callers should use [`compute_inputs_hash`].
///
/// `sources_hash` comes from [`compute_sources_hash`]. `content` is the file
/// content; any pre-existing `alef:hash:` line is stripped before hashing so
/// the function is idempotent.
#[doc(hidden)]
pub fn compute_file_hash(sources_hash: &str, content: &str) -> String {
    let stripped = strip_hash_line(content);
    let mut hasher = blake3::Hasher::new();
    hasher.update(b"sources\0");
    hasher.update(sources_hash.as_bytes());
    hasher.update(b"\0content\0");
    hasher.update(stripped.as_bytes());
    hasher.finalize().to_hex().to_string()
}

/// Inject an `alef:hash:<hex>` line immediately after the first header marker
/// line found in the first 10 lines.  The comment syntax is inferred from the
/// marker line itself.
///
/// If no marker line is found, the content is returned unchanged.
pub fn inject_hash_line(content: &str, hash: &str) -> String {
    let mut result = String::with_capacity(content.len() + 80);
    let mut injected = false;

    for (i, line) in content.lines().enumerate() {
        result.push_str(line);
        result.push('\n');

        if !injected && i < 10 && (line.contains(HEADER_MARKER) || line.contains(ALT_HEADER_MARKER)) {
            let trimmed = line.trim();
            let hash_line = if trimmed.starts_with("<!--") {
                // XML comment: inject hash line as XML comment
                format!("<!-- {HASH_PREFIX}{hash} -->")
            } else if trimmed.starts_with("//") {
                format!("// {HASH_PREFIX}{hash}")
            } else if trimmed.starts_with('#') {
                format!("# {HASH_PREFIX}{hash}")
            } else if trimmed.starts_with("/*") || trimmed.starts_with('*') || trimmed.ends_with("*/") {
                format!(" * {HASH_PREFIX}{hash}")
            } else {
                format!("// {HASH_PREFIX}{hash}")
            };
            result.push_str(&hash_line);
            result.push('\n');
            injected = true;
        }
    }

    // Preserve original trailing-newline behavior.
    if !content.ends_with('\n') && result.ends_with('\n') {
        result.pop();
    }

    result
}

/// Extract the hash from an `alef:hash:<hex>` token in the first 10 lines.
pub fn extract_hash(content: &str) -> Option<String> {
    for (i, line) in content.lines().enumerate() {
        if i >= 10 {
            break;
        }
        if let Some(pos) = line.find(HASH_PREFIX) {
            let rest = &line[pos + HASH_PREFIX.len()..];
            // Trim trailing comment closers and whitespace.
            let hex = rest.trim().trim_end_matches("*/").trim_end_matches("-->").trim();
            if !hex.is_empty() {
                return Some(hex.to_string());
            }
        }
    }
    None
}

/// Strip the `alef:hash:` line from content (for fallback comparison).
pub fn strip_hash_line(content: &str) -> String {
    let mut result = String::with_capacity(content.len());
    for line in content.lines() {
        if line.contains(HASH_PREFIX) {
            continue;
        }
        result.push_str(line);
        result.push('\n');
    }
    // Preserve original trailing-newline behavior.
    if !content.ends_with('\n') && result.ends_with('\n') {
        result.pop();
    }
    result
}

#[cfg(test)]
mod tests;