tower-serve-embedded-build 0.1.0

Build-time helper for tower-serve-embedded: walks an asset directory, content-hashes each file, and generates the embedded manifest and `asset!` macro. Use it from your build.rs.
Documentation
//! Build-time helper for [`tower-serve-embedded`](https://docs.rs/tower-serve-embedded).
//!
//! Call this from your crate's `build.rs`. It walks an asset directory, content-hashes every file
//! with BLAKE3, and writes a generated Rust file to `OUT_DIR` containing the embedded manifest
//! (`ASSETS`) and a compile-time `asset!` macro. Pull that into your crate with
//! [`tower_serve_embedded::embed!()`](https://docs.rs/tower-serve-embedded).
//!
//! ```ignore
//! // build.rs
//! fn main() {
//!     tower_serve_embedded_build::Builder::new("assets").emit().unwrap();
//! }
//! ```
//!
//! Asset paths are expressed **relative to the crate root**, so the embedded URL mirrors the
//! file's location in your project (`assets/css/style.css` → `/assets/css/style.<hash>.css`).
//! Hidden files and directories (names starting with `.`) and symlinks are ignored.

use std::env;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};

/// Configures and runs asset embedding from a `build.rs`.
pub struct Builder {
    dir: PathBuf,
    hash_len: usize,
    ignore: Vec<String>,
}

impl Builder {
    /// Embed every file under `dir`, resolved relative to `CARGO_MANIFEST_DIR` (your crate root).
    ///
    /// Files keep their crate-root-relative paths, so embedding `assets/` makes
    /// `assets/css/style.css` available as `asset!("assets/css/style.css")`, served at
    /// `/assets/css/style.<hash>.css`. The cache-busting hash defaults to 16 hex chars.
    pub fn new(dir: impl Into<PathBuf>) -> Self {
        Self {
            dir: dir.into(),
            hash_len: 16,
            ignore: Vec::new(),
        }
    }

    /// Skip a directory (and everything under it), given as a path **relative to the embedded
    /// directory** passed to [`Builder::new`].
    ///
    /// Handy for vendored dependencies you don't want embedded, e.g. `.ignore_dir("lib")` to skip
    /// `assets/lib`. A leading or trailing slash is accepted (`"/lib"`, `"lib/"`); matching is an
    /// exact path, not a glob. Call it multiple times to ignore several directories. Ignored
    /// directories aren't watched for changes either.
    pub fn ignore_dir(mut self, dir: impl AsRef<str>) -> Self {
        let normalized = dir.as_ref().trim_matches('/').to_string();
        if !normalized.is_empty() {
            self.ignore.push(normalized);
        }
        self
    }

    /// Number of hex characters of the BLAKE3 hash to embed in filenames and ETags (default 16,
    /// i.e. 64 bits — clamped to 1..=64).
    pub fn hash_length(mut self, len: usize) -> Self {
        self.hash_len = len.clamp(1, 64);
        self
    }

    /// Walk the directory, hash the files, and write the generated code to `OUT_DIR`.
    ///
    /// Also emits `cargo:rerun-if-changed` lines so a content change refreshes the hashes and
    /// added/removed files are picked up — pair it with `cargo watch` for hot reload.
    pub fn emit(self) -> io::Result<()> {
        let manifest_dir = PathBuf::from(env_var("CARGO_MANIFEST_DIR")?);
        let root = manifest_dir.join(&self.dir);
        let out_dir = PathBuf::from(env_var("OUT_DIR")?);

        // Re-run if the build script itself changes (emitting any rerun-if-changed line opts out
        // of cargo's default "rerun if any package file changed" behaviour).
        println!("cargo:rerun-if-changed=build.rs");

        let mut files = Vec::new();
        let mut dirs = Vec::new();
        if root.is_dir() {
            // `root` here doubles as the base for `ignore_dir` matching (relative to the embedded
            // directory); file `logical_path`s are taken relative to the crate root below.
            collect(&root, &root, &self.ignore, &mut files, &mut dirs)?;
        } else {
            println!(
                "cargo:warning=tower-serve-embedded: asset directory {} not found",
                root.display()
            );
        }

        // rerun-if-changed for every directory (catches added/removed files) and every file
        // (catches content edits, which change the hash).
        for dir in &dirs {
            println!("cargo:rerun-if-changed={}", dir.display());
        }

        let mut assets: Vec<Asset> = Vec::with_capacity(files.len());
        for abs in &files {
            println!("cargo:rerun-if-changed={}", abs.display());
            let bytes = fs::read(abs)?;
            // Relative to the crate root, so the URL mirrors the project layout.
            let logical = logical_path(&manifest_dir, abs);
            let hash = hash_hex(&bytes, self.hash_len);
            let path = hashed_path(&logical, &hash);
            let content_type = mime_guess::from_path(abs)
                .first_or_octet_stream()
                .to_string();
            assets.push(Asset {
                abs: abs.clone(),
                logical,
                path,
                hash,
                content_type,
            });
        }

        // Sort by served path so `Assets::get` can binary-search.
        assets.sort_by(|a, b| a.path.cmp(&b.path));

        let code = generate(&assets);
        fs::write(out_dir.join("embed_assets.rs"), code)?;
        Ok(())
    }
}

struct Asset {
    abs: PathBuf,
    logical: String,
    path: String,
    hash: String,
    content_type: String,
}

fn generate(assets: &[Asset]) -> String {
    let mut out = String::new();
    out.push_str("// @generated by tower-serve-embedded-build. Do not edit.\n");

    out.push_str("#[doc(hidden)]\n");
    out.push_str("static __TSE_FILES: &[::tower_serve_embedded::EmbeddedFile] = &[\n");
    for a in assets {
        let etag = format!("\"{}\"", a.hash);
        out.push_str("    ::tower_serve_embedded::EmbeddedFile {\n");
        out.push_str(&format!("        path: {},\n", lit(&a.path)));
        out.push_str(&format!("        logical_path: {},\n", lit(&a.logical)));
        out.push_str(&format!(
            "        bytes: ::core::include_bytes!({}),\n",
            lit(&a.abs.to_string_lossy())
        ));
        out.push_str(&format!("        content_type: {},\n", lit(&a.content_type)));
        out.push_str(&format!("        etag: {},\n", lit(&etag)));
        out.push_str(&format!("        hash: {},\n", lit(&a.hash)));
        out.push_str("    },\n");
    }
    out.push_str("];\n\n");

    out.push_str(
        "/// Assets embedded at build time by `tower-serve-embedded`.\n\
         pub static ASSETS: ::tower_serve_embedded::Assets =\n    \
         ::tower_serve_embedded::Assets::new(__TSE_FILES);\n\n",
    );

    // A compile-time map from crate-root-relative path to served URL. Unknown names are a
    // compile error.
    out.push_str(
        "/// Resolve a crate-root-relative asset path to its cache-busted URL at compile time.\n",
    );
    out.push_str("#[macro_export]\n");
    out.push_str("macro_rules! asset {\n");
    for a in assets {
        out.push_str(&format!("    ({}) => {{ {} }};\n", lit(&a.logical), lit(&a.path)));
    }
    out.push_str(
        "    ($other:literal) => {\n        \
         ::core::compile_error!(::core::concat!(\"tower-serve-embedded: unknown asset `\", $other, \"`\"))\n    \
         };\n",
    );
    out.push_str("}\n");

    out
}

/// Recursively collect files (into `files`) and directories (into `dirs`), skipping dotfiles,
/// symlinks, and any directory whose path relative to `base` is listed in `ignore`. Entries are
/// visited in sorted order for deterministic output.
fn collect(
    base: &Path,
    dir: &Path,
    ignore: &[String],
    files: &mut Vec<PathBuf>,
    dirs: &mut Vec<PathBuf>,
) -> io::Result<()> {
    dirs.push(dir.to_path_buf());
    let mut entries: Vec<_> = fs::read_dir(dir)?.collect::<Result<_, _>>()?;
    entries.sort_by_key(|e| e.file_name());
    for entry in entries {
        if entry.file_name().to_string_lossy().starts_with('.') {
            continue;
        }
        let file_type = entry.file_type()?;
        let path = entry.path();
        if file_type.is_dir() {
            if ignore.iter().any(|i| *i == logical_path(base, &path)) {
                continue;
            }
            collect(base, &path, ignore, files, dirs)?;
        } else if file_type.is_file() {
            files.push(path);
        }
    }
    Ok(())
}

/// The path of `file` relative to `base`, using `/` separators (e.g. `assets/css/style.css`).
fn logical_path(base: &Path, file: &Path) -> String {
    file.strip_prefix(base)
        .unwrap_or(file)
        .components()
        .map(|c| c.as_os_str().to_string_lossy())
        .collect::<Vec<_>>()
        .join("/")
}

/// Insert `hash` before the extension and prepend a leading slash:
/// `assets/css/style.css` + `9f3a1c2b` → `/assets/css/style.9f3a1c2b.css`.
fn hashed_path(logical: &str, hash: &str) -> String {
    let (dir, file) = match logical.rsplit_once('/') {
        Some((d, f)) => (Some(d), f),
        None => (None, logical),
    };
    let hashed_file = match file.rsplit_once('.') {
        Some((stem, ext)) if !stem.is_empty() => format!("{stem}.{hash}.{ext}"),
        _ => format!("{file}.{hash}"),
    };
    match dir {
        Some(d) => format!("/{d}/{hashed_file}"),
        None => format!("/{hashed_file}"),
    }
}

fn hash_hex(bytes: &[u8], len: usize) -> String {
    let full = blake3::hash(bytes).to_hex();
    full[..len.min(full.len())].to_string()
}

/// Render `s` as a valid Rust string literal (handles quotes, backslashes, etc.).
fn lit(s: &str) -> String {
    format!("{s:?}")
}

fn env_var(key: &str) -> io::Result<String> {
    env::var(key).map_err(|_| {
        io::Error::new(
            io::ErrorKind::NotFound,
            format!("environment variable {key} is not set (is this running from build.rs?)"),
        )
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn hashed_path_inserts_hash_before_extension() {
        assert_eq!(
            hashed_path("assets/css/style.css", "abcd"),
            "/assets/css/style.abcd.css"
        );
        assert_eq!(hashed_path("static/app.js", "abcd"), "/static/app.abcd.js");
        assert_eq!(hashed_path("a/b/c.png", "ff"), "/a/b/c.ff.png");
    }

    #[test]
    fn hashed_path_handles_no_extension_and_multi_dot() {
        assert_eq!(hashed_path("assets/LICENSE", "abcd"), "/assets/LICENSE.abcd");
        assert_eq!(hashed_path("a.tar.gz", "ff"), "/a.tar.ff.gz");
    }

    #[test]
    fn hash_is_deterministic_and_truncated() {
        let a = hash_hex(b"hello world", 16);
        let b = hash_hex(b"hello world", 16);
        assert_eq!(a, b);
        assert_eq!(a.len(), 16);
        assert_ne!(hash_hex(b"hello world", 16), hash_hex(b"goodbye world", 16));
    }

    #[test]
    fn lit_escapes() {
        assert_eq!(lit("a\"b"), "\"a\\\"b\"");
    }

    #[test]
    fn collect_skips_ignored_dirs() {
        let base = std::env::temp_dir().join(format!(
            "tse_ignore_{}_{}",
            std::process::id(),
            line!()
        ));
        let _ = fs::remove_dir_all(&base);
        fs::create_dir_all(base.join("css")).unwrap();
        fs::create_dir_all(base.join("lib/sub")).unwrap();
        fs::write(base.join("css/a.css"), "a").unwrap();
        fs::write(base.join("root.txt"), "r").unwrap();
        fs::write(base.join("lib/b.js"), "b").unwrap();
        fs::write(base.join("lib/sub/c.js"), "c").unwrap();

        let ignore = vec!["lib".to_string()];
        let mut files = Vec::new();
        let mut dirs = Vec::new();
        collect(&base, &base, &ignore, &mut files, &mut dirs).unwrap();

        let mut logicals: Vec<String> = files.iter().map(|f| logical_path(&base, f)).collect();
        logicals.sort();
        assert_eq!(logicals, vec!["css/a.css", "root.txt"]);
        // The ignored directory is neither walked nor watched.
        assert!(!dirs.iter().any(|d| logical_path(&base, d) == "lib"));

        fs::remove_dir_all(&base).unwrap();
    }
}