islands-build 0.1.3

Layout-agnostic build pipeline for islands.rs apps: WASM bundling, the V8 module-namespace patch, per-page CSS, and content-hash manifests. Composed by a thin xtask in any workspace.
Documentation
//! The content-hash post-build pass: SHA-256 every emitted asset, rewrite
//! cross-asset references inside JS, rename files to `<stem>.<hash>.<ext>`, and
//! write `manifest.json`. Skipped entirely when `plan.manifest` is false (the
//! logical-path / dev story).

use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fs;
use std::path::{Path, PathBuf};

use anyhow::{bail, Context, Result};
use sha2::{Digest, Sha256};
use walkdir::WalkDir;

use crate::config::BuildPlan;
use crate::manifest::{BuildManifest, ManifestBundleEntry};

/// Insert `.<hash>` before the first extension, treating `_bg.wasm` as atomic.
fn hashed_basename(original: &str, hash: &str) -> String {
    if let Some(stem) = original.strip_suffix("_bg.wasm") {
        return format!("{stem}.{hash}_bg.wasm");
    }
    match original.find('.') {
        Some(dot) => {
            let (stem, extension) = original.split_at(dot);
            format!("{stem}.{hash}{extension}")
        }
        None => format!("{original}.{hash}"),
    }
}

/// SHA-256 of `bytes`, first 8 hex characters.
fn sha256_prefix(bytes: &[u8]) -> String {
    let mut hasher = Sha256::new();
    hasher.update(bytes);
    hex::encode(&hasher.finalize()[..4])
}

/// Whether a basename already carries an 8-hex-char content-hash segment (a
/// product of a previous pass that must be cleaned before the next one).
fn basename_is_already_hashed(basename: &str) -> bool {
    basename.split('.').any(|part| {
        let bare = part.strip_suffix("_bg").unwrap_or(part);
        bare.len() == 8 && bare.bytes().all(|byte| matches!(byte, b'0'..=b'9' | b'a'..=b'f'))
    })
}

/// Collect `.js`/`.wasm`/`.css` files under `static_dir`, skipping `manifest.json`
/// and any file under a `snippets/` directory (their import-module name is baked
/// unhashed into the importing WASM, so hashing them would break the import).
fn collect_static_assets(static_dir: &Path) -> Result<Vec<(PathBuf, String)>> {
    let mut assets = Vec::new();
    for entry in WalkDir::new(static_dir)
        .into_iter()
        .filter_map(Result::ok)
        .filter(|entry| entry.file_type().is_file())
    {
        let path = entry.path().to_path_buf();
        let Some(file_name) = path.file_name().and_then(|name| name.to_str()).map(str::to_owned)
        else {
            continue;
        };
        if file_name == "manifest.json" {
            continue;
        }
        if path.components().any(|component| component.as_os_str() == "snippets") {
            continue;
        }
        let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
        if matches!(extension, "js" | "wasm" | "css") {
            assets.push((path, file_name));
        }
    }
    Ok(assets)
}

/// Remove stale already-hashed files from a previous pass so they don't pollute
/// the rename map.
fn remove_stale_hashed(static_dir: &Path) -> Result<()> {
    for entry in WalkDir::new(static_dir)
        .into_iter()
        .filter_map(Result::ok)
        .filter(|entry| entry.file_type().is_file())
    {
        let path = entry.path();
        let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
            continue;
        };
        if file_name == "manifest.json" {
            continue;
        }
        let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
        if matches!(extension, "js" | "wasm" | "css") && basename_is_already_hashed(file_name) {
            fs::remove_file(path)
                .with_context(|| format!("remove stale hashed asset {}", path.display()))?;
        }
    }
    Ok(())
}

/// Run the content-hash pass over `plan.out_dir` and write `manifest.json`.
pub fn post_build_hashing_pass(plan: &BuildPlan) -> Result<()> {
    let static_dir = &plan.out_dir;

    remove_stale_hashed(static_dir)?;
    let assets = collect_static_assets(static_dir)?;

    // Build the original → hashed basename rename map.
    let mut rename_map: HashMap<String, String> = HashMap::new();
    for (path, basename) in &assets {
        let bytes = fs::read(path).with_context(|| format!("read {}", path.display()))?;
        rename_map.insert(basename.clone(), hashed_basename(basename, &sha256_prefix(&bytes)));
    }

    // No two distinct sources should hash to the same name.
    let mut seen: BTreeSet<&str> = BTreeSet::new();
    for hashed in rename_map.values() {
        if !seen.insert(hashed.as_str()) {
            bail!("hash collision: two assets produced the same hashed basename '{hashed}'");
        }
    }
    eprintln!("[islands-build] computed {} asset hashes; no collisions", rename_map.len());

    rewrite_js_references(&assets, &rename_map)?;
    rename_assets(&assets, &rename_map)?;

    let manifest = build_manifest(static_dir, plan)?;
    let manifest_json = serde_json::to_string_pretty(&manifest).context("serialize manifest")?;
    let manifest_path = static_dir.join("manifest.json");
    fs::write(&manifest_path, &manifest_json)
        .with_context(|| format!("write {}", manifest_path.display()))?;
    eprintln!("[islands-build] wrote {}", manifest_path.display());

    verify_no_unhashed_references(static_dir, &rename_map)?;
    eprintln!("[islands-build] hashing pass sanity checks passed");
    Ok(())
}

/// Blanket-rewrite basenames in every JS file, then restore the runtime
/// importObject KEY to its unhashed form (delegates to [`crate::patch`]).
fn rewrite_js_references(
    assets: &[(PathBuf, String)],
    rename_map: &HashMap<String, String>,
) -> Result<()> {
    let rename_pairs: Vec<(String, String)> = rename_map
        .iter()
        .map(|(original, hashed)| (original.clone(), hashed.clone()))
        .collect();
    for (path, basename) in assets {
        if !basename.ends_with(".js") {
            continue;
        }
        let content =
            fs::read_to_string(path).with_context(|| format!("read JS file {}", path.display()))?;
        let rewritten = crate::patch::rewrite_basenames_in_js(&content, &rename_pairs);
        fs::write(path, &rewritten)
            .with_context(|| format!("write rewritten JS {}", path.display()))?;
    }
    eprintln!("[islands-build] rewrote cross-asset references in JS files");
    Ok(())
}

/// Rename every asset to its hashed basename.
fn rename_assets(
    assets: &[(PathBuf, String)],
    rename_map: &HashMap<String, String>,
) -> Result<()> {
    for (path, basename) in assets {
        let hashed = rename_map
            .get(basename)
            .with_context(|| format!("missing rename entry for {basename}"))?;
        let parent = path
            .parent()
            .with_context(|| format!("no parent for {}", path.display()))?;
        let hashed_path = parent.join(hashed);
        fs::rename(path, &hashed_path)
            .with_context(|| format!("rename {} → {}", path.display(), hashed_path.display()))?;
    }
    eprintln!("[islands-build] renamed all assets to hashed filenames");
    Ok(())
}

/// Sanity: after renaming, no JS file still references an original (unhashed)
/// basename — except the runtime importObject key, which is intentionally
/// preserved to match the WASM's compiled import-module name.
fn verify_no_unhashed_references(
    static_dir: &Path,
    rename_map: &HashMap<String, String>,
) -> Result<()> {
    let originals: Vec<&str> = rename_map.keys().map(String::as_str).collect();
    for (path, basename) in collect_static_assets(static_dir)? {
        if !basename.ends_with(".js") {
            continue;
        }
        let mut content =
            fs::read_to_string(&path).with_context(|| format!("read final JS {}", path.display()))?;
        // Strip the runtime importObject-key line so it doesn't trip the check.
        for line in content.clone().lines() {
            if line.contains(crate::patch::PATCH_TOKEN) && line.contains("islands_core.js") {
                content = content.replace(line, "");
            }
        }
        for original in &originals {
            if content.contains(original) {
                bail!(
                    "sanity failure: {} still references unhashed basename '{}' after rewrite",
                    path.display(),
                    original
                );
            }
        }
    }
    Ok(())
}

/// Walk `static/` after renaming and assemble the manifest, keyed by each page's
/// `bundle_key`.
fn build_manifest(static_dir: &Path, plan: &BuildPlan) -> Result<BuildManifest> {
    let mut by_dir: BTreeMap<String, Vec<String>> = BTreeMap::new();
    for entry in WalkDir::new(static_dir)
        .min_depth(1)
        .into_iter()
        .filter_map(Result::ok)
        .filter(|entry| entry.file_type().is_file())
    {
        let path = entry.path();
        let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
            continue;
        };
        if file_name == "manifest.json" {
            continue;
        }
        let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
        if !matches!(extension, "js" | "wasm" | "css") {
            continue;
        }
        let relative = path
            .strip_prefix(static_dir)
            .with_context(|| format!("{} not under {}", path.display(), static_dir.display()))?;
        let parent_relative = relative
            .parent()
            .with_context(|| format!("no parent for {}", relative.display()))?;
        let bundle_key = parent_relative
            .to_str()
            .with_context(|| format!("non-UTF-8 directory: {}", parent_relative.display()))?
            .replace('\\', "/");
        let relative_string = relative
            .to_str()
            .with_context(|| format!("non-UTF-8 path: {}", relative.display()))?
            .to_owned();
        by_dir.entry(bundle_key).or_default().push(relative_string);
    }

    let mut pages: BTreeMap<String, ManifestBundleEntry> = BTreeMap::new();
    for page in &plan.pages {
        let entry = extract_bundle_entry(&by_dir, &page.bundle_key, true)?;
        pages.insert(page.bundle_key.clone(), entry);
    }

    Ok(BuildManifest {
        islands_core: extract_bundle_entry(&by_dir, "islands-core", false)?,
        css_base: extract_css_base(&by_dir)?,
        pages,
    })
}

/// Extract the js + wasm (+ optional css) paths for one bundle directory.
fn extract_bundle_entry(
    by_dir: &BTreeMap<String, Vec<String>>,
    dir_name: &str,
    include_css: bool,
) -> Result<ManifestBundleEntry> {
    let files = by_dir
        .get(dir_name)
        .with_context(|| format!("no files under static/{dir_name} after hashing"))?;
    let js = files
        .iter()
        .find(|path| path.ends_with(".js"))
        .with_context(|| format!("no .js in static/{dir_name}"))?
        .clone();
    let wasm = files
        .iter()
        .find(|path| path.ends_with(".wasm"))
        .with_context(|| format!("no .wasm in static/{dir_name}"))?
        .clone();
    let css = if include_css {
        Some(
            files
                .iter()
                .find(|path| path.ends_with(".css"))
                .with_context(|| format!("no .css in static/{dir_name}"))?
                .clone(),
        )
    } else {
        None
    };
    Ok(ManifestBundleEntry { js, wasm, css })
}

/// Extract the hashed `static/css/base.<hash>.css` path.
fn extract_css_base(by_dir: &BTreeMap<String, Vec<String>>) -> Result<String> {
    by_dir
        .get("css")
        .with_context(|| "no files under static/css after hashing")?
        .iter()
        .find(|path| path.contains("base") && path.ends_with(".css"))
        .with_context(|| "no base.css under static/css after hashing")
        .cloned()
}