tower-serve-embedded-build 0.2.0

Build-time helper for tower-serve-embedded: walks an asset directory, content-hashes each file, and generates the embedded manifest and `asset!` macro. Use it from your build.rs.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
//! Build-time helper for [`tower-serve-embedded`](https://docs.rs/tower-serve-embedded).
//!
//! Call this from your crate's `build.rs`. It walks an asset directory, content-hashes every file
//! with BLAKE3, and writes a generated Rust file to `OUT_DIR` containing the embedded manifest
//! (`ASSETS`) and a compile-time `asset!` macro. Pull that into your crate with
//! [`tower_serve_embedded::embed!()`](https://docs.rs/tower-serve-embedded).
//!
//! ```ignore
//! // build.rs
//! fn main() {
//!     tower_serve_embedded_build::Builder::new("assets").emit().unwrap();
//! }
//! ```
//!
//! Asset paths are expressed **relative to the crate root**, so the embedded URL mirrors the
//! file's location in your project. Ordinary files are served at a cache-busted URL
//! (`/assets/css/style.<hash>.css`). Files under [`Builder::immutable_dir`] are treated as already
//! versioned and served only at their plain URL (`/assets/lib/htmx-1.9.10.min.js`). Hidden files
//! and directories (names starting with `.`) and symlinks are ignored.

use std::env;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};

/// Configures and runs asset embedding from a `build.rs`.
pub struct Builder {
    dir: PathBuf,
    hash_len: usize,
    immutable: Vec<String>,
}

impl Builder {
    /// Embed every file under `dir`, resolved relative to `CARGO_MANIFEST_DIR` (your crate root).
    ///
    /// Files keep their crate-root-relative paths, so embedding `assets/` makes
    /// `assets/css/style.css` available as `asset!("assets/css/style.css")`. Ordinary files are
    /// served at a cache-busted URL (`/assets/css/style.<hash>.css`). Files in an
    /// [`immutable_dir`] are served only at their plain URL (`/assets/lib/htmx-1.9.10.min.js`).
    /// The hash defaults to 16 hex chars.
    ///
    /// [`immutable_dir`]: Builder::immutable_dir
    pub fn new(dir: impl Into<PathBuf>) -> Self {
        Self {
            dir: dir.into(),
            hash_len: 16,
            immutable: Vec::new(),
        }
    }

    /// Mark a directory (and everything under it) as **already immutable**, given as a path
    /// **relative to the embedded directory** passed to [`Builder::new`].
    ///
    /// Files under it are still embedded and served, but at their original, non-hashed URL with a
    /// one-year `immutable` cache — they are *not* given an extra content hash. Use this for assets
    /// whose names already encode a version (vendored libraries, CDN-style
    /// `lib/htmx-1.9.10.min.js`), so `asset!("assets/lib/htmx-1.9.10.min.js")` resolves to
    /// `/assets/lib/htmx-1.9.10.min.js`. A leading or trailing slash is accepted (`"/lib"`,
    /// `"lib/"`); matching is by path prefix, not a glob. Call it multiple times for several
    /// directories.
    pub fn immutable_dir(mut self, dir: impl AsRef<str>) -> Self {
        let normalized = dir.as_ref().trim_matches('/').to_string();
        if !normalized.is_empty() {
            self.immutable.push(normalized);
        }
        self
    }

    /// Number of hex characters of the BLAKE3 hash to embed in filenames and ETags (default 16,
    /// i.e. 64 bits — clamped to 1..=64).
    pub fn hash_length(mut self, len: usize) -> Self {
        self.hash_len = len.clamp(1, 64);
        self
    }

    /// Walk the directory, hash the files, and write the generated code to `OUT_DIR`.
    ///
    /// Also emits `cargo:rerun-if-changed` lines so a content change refreshes the hashes and
    /// added/removed files are picked up — pair it with `cargo watch` for hot reload.
    pub fn emit(self) -> io::Result<()> {
        let manifest_dir = PathBuf::from(env_var("CARGO_MANIFEST_DIR")?);
        let root = manifest_dir.join(&self.dir);
        let out_dir = PathBuf::from(env_var("OUT_DIR")?);

        // Re-run if the build script itself changes (emitting any rerun-if-changed line opts out
        // of cargo's default "rerun if any package file changed" behaviour).
        println!("cargo:rerun-if-changed=build.rs");

        let mut files = Vec::new();
        let mut dirs = Vec::new();
        if root.is_dir() {
            collect(&root, &mut files, &mut dirs)?;
        } else {
            println!(
                "cargo:warning=tower-serve-embedded: asset directory {} not found",
                root.display()
            );
        }

        // rerun-if-changed for every directory (catches added/removed files) and every file
        // (catches content edits, which change the hash).
        for dir in &dirs {
            println!("cargo:rerun-if-changed={}", dir.display());
        }

        let mut assets: Vec<Asset> = Vec::with_capacity(files.len());
        for abs in &files {
            println!("cargo:rerun-if-changed={}", abs.display());
            let bytes = fs::read(abs)?;
            // Relative to the crate root, so the URL mirrors the project layout.
            let logical = logical_path(&manifest_dir, abs);
            // Relative to the embedded dir, for matching against `immutable_dir` entries.
            let rel = logical_path(&root, abs);
            let immutable = is_immutable(&rel, &self.immutable);
            let hash = hash_hex(&bytes, self.hash_len);
            let url = format!("/{logical}");
            // Already-immutable assets keep their plain URL; everything else gets a hashed alias.
            let hashed_url = if immutable {
                None
            } else {
                Some(hashed_path(&logical, &hash))
            };
            let content_type = mime_guess::from_path(abs)
                .first_or_octet_stream()
                .to_string();
            assets.push(Asset {
                abs: abs.clone(),
                logical,
                url,
                hashed_url,
                hash,
                content_type,
            });
        }

        // Stable, deterministic codegen order.
        assets.sort_by(|a, b| a.logical.cmp(&b.logical));

        let code = generate(&assets);
        fs::write(out_dir.join("embed_assets.rs"), code)?;
        Ok(())
    }
}

struct Asset {
    abs: PathBuf,
    logical: String,
    /// The plain, non-hashed URL, e.g. `/assets/css/style.css`. This is served only for assets in
    /// an `immutable_dir`.
    url: String,
    /// The cache-busted served URL, e.g. `/assets/css/style.<hash>.css`. `None` for assets in an
    /// `immutable_dir`, which are served only at `url`.
    hashed_url: Option<String>,
    hash: String,
    content_type: String,
}

/// Whether `rel` (a path relative to the embedded dir) is inside one of the `immutable` dirs —
/// either the dir itself or a descendant (prefix match on path components).
fn is_immutable(rel: &str, immutable: &[String]) -> bool {
    immutable
        .iter()
        .any(|i| rel == i || rel.starts_with(&format!("{i}/")))
}

fn generate(assets: &[Asset]) -> String {
    let mut out = String::new();
    out.push_str("// @generated by tower-serve-embedded-build. Do not edit.\n");

    out.push_str("#[doc(hidden)]\n");
    out.push_str("static __TSE_FILES: &[::tower_serve_embedded::EmbeddedFile] = &[\n");
    for a in assets {
        let etag = format!("\"{}\"", a.hash);
        out.push_str("    ::tower_serve_embedded::EmbeddedFile {\n");
        out.push_str(&format!("        url: {},\n", lit(&a.url)));
        out.push_str(&format!(
            "        hashed_url: {},\n",
            opt_lit(&a.hashed_url)
        ));
        out.push_str(&format!("        logical_path: {},\n", lit(&a.logical)));
        out.push_str(&format!(
            "        bytes: ::core::include_bytes!({}),\n",
            lit(&a.abs.to_string_lossy())
        ));
        out.push_str(&format!(
            "        content_type: {},\n",
            lit(&a.content_type)
        ));
        out.push_str(&format!("        etag: {},\n", lit(&etag)));
        out.push_str(&format!("        hash: {},\n", lit(&a.hash)));
        out.push_str("    },\n");
    }
    out.push_str("];\n\n");

    // Every served URL → (file index, Cache-Control), sorted by URL for binary search. Hashable
    // assets serve only their cache-busted alias; already-immutable assets serve only their plain
    // URL.
    let immutable = "::core::option::Option::Some(::tower_serve_embedded::IMMUTABLE_CACHE_CONTROL)";
    let mut routes: Vec<(String, usize, &str)> = Vec::with_capacity(assets.len());
    for (i, a) in assets.iter().enumerate() {
        match &a.hashed_url {
            // Hashable asset: only the hashed URL is served, immutable.
            Some(hashed) => routes.push((hashed.clone(), i, immutable)),
            // Already-immutable asset: served only at its plain URL, immutable.
            None => routes.push((a.url.clone(), i, immutable)),
        }
    }
    routes.sort_by(|a, b| a.0.cmp(&b.0));

    out.push_str("#[doc(hidden)]\n");
    out.push_str("static __TSE_ROUTES: &[::tower_serve_embedded::Route] = &[\n");
    for (url, index, cache_control) in &routes {
        out.push_str("    ::tower_serve_embedded::Route {\n");
        out.push_str(&format!("        url: {},\n", lit(url)));
        out.push_str(&format!("        file: {index}usize,\n"));
        out.push_str(&format!("        cache_control: {cache_control},\n"));
        out.push_str("    },\n");
    }
    out.push_str("];\n\n");

    out.push_str(
        "/// Assets embedded at build time by `tower-serve-embedded`.\n\
         pub static ASSETS: ::tower_serve_embedded::Assets =\n    \
         ::tower_serve_embedded::Assets::new(__TSE_FILES, __TSE_ROUTES);\n\n",
    );

    // A compile-time map from crate-root-relative path to the URL to reference it by (the
    // cache-busted alias when present, otherwise the plain URL). Unknown names are a compile error.
    out.push_str(
        "/// Resolve a crate-root-relative asset path to its served URL at compile time.\n",
    );
    out.push_str("#[doc(hidden)]\n");
    out.push_str("macro_rules! __tower_serve_embedded_asset {\n");
    for a in assets {
        let referenced = a.hashed_url.as_deref().unwrap_or(&a.url);
        out.push_str(&format!(
            "    ({}) => {{ {} }};\n",
            lit(&a.logical),
            lit(referenced)
        ));
    }
    out.push_str(
        "    ($other:literal) => {\n        \
         ::core::compile_error!(::core::concat!(\"tower-serve-embedded: unknown asset `\", $other, \"`\"))\n    \
         };\n",
    );
    out.push_str("}\n");
    out.push_str("#[doc(hidden)]\n");
    out.push_str("pub(crate) use __tower_serve_embedded_asset as asset;\n");

    out
}

/// Recursively collect files (into `files`) and directories (into `dirs`), skipping dotfiles and
/// symlinks. Entries are visited in sorted order for deterministic output.
fn collect(dir: &Path, files: &mut Vec<PathBuf>, dirs: &mut Vec<PathBuf>) -> io::Result<()> {
    dirs.push(dir.to_path_buf());
    let mut entries: Vec<_> = fs::read_dir(dir)?.collect::<Result<_, _>>()?;
    entries.sort_by_key(|e| e.file_name());
    for entry in entries {
        if entry.file_name().to_string_lossy().starts_with('.') {
            continue;
        }
        let file_type = entry.file_type()?;
        let path = entry.path();
        if file_type.is_dir() {
            collect(&path, files, dirs)?;
        } else if file_type.is_file() {
            files.push(path);
        }
    }
    Ok(())
}

/// The path of `file` relative to `base`, using `/` separators (e.g. `assets/css/style.css`).
fn logical_path(base: &Path, file: &Path) -> String {
    file.strip_prefix(base)
        .unwrap_or(file)
        .components()
        .map(|c| c.as_os_str().to_string_lossy())
        .collect::<Vec<_>>()
        .join("/")
}

/// Insert `hash` before the extension and prepend a leading slash:
/// `assets/css/style.css` + `9f3a1c2b` → `/assets/css/style.9f3a1c2b.css`.
fn hashed_path(logical: &str, hash: &str) -> String {
    let (dir, file) = match logical.rsplit_once('/') {
        Some((d, f)) => (Some(d), f),
        None => (None, logical),
    };
    let hashed_file = match file.rsplit_once('.') {
        Some((stem, ext)) if !stem.is_empty() => format!("{stem}.{hash}.{ext}"),
        _ => format!("{file}.{hash}"),
    };
    match dir {
        Some(d) => format!("/{d}/{hashed_file}"),
        None => format!("/{hashed_file}"),
    }
}

fn hash_hex(bytes: &[u8], len: usize) -> String {
    let full = blake3::hash(bytes).to_hex();
    full[..len.min(full.len())].to_string()
}

/// Render `s` as a valid Rust string literal (handles quotes, backslashes, etc.).
fn lit(s: &str) -> String {
    format!("{s:?}")
}

/// Render an optional string as a Rust `Option<&'static str>` expression.
fn opt_lit(s: &Option<String>) -> String {
    match s {
        Some(s) => format!("::core::option::Option::Some({})", lit(s)),
        None => "::core::option::Option::None".to_string(),
    }
}

fn env_var(key: &str) -> io::Result<String> {
    env::var(key).map_err(|_| {
        io::Error::new(
            io::ErrorKind::NotFound,
            format!("environment variable {key} is not set (is this running from build.rs?)"),
        )
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn hashed_path_inserts_hash_before_extension() {
        assert_eq!(
            hashed_path("assets/css/style.css", "abcd"),
            "/assets/css/style.abcd.css"
        );
        assert_eq!(hashed_path("static/app.js", "abcd"), "/static/app.abcd.js");
        assert_eq!(hashed_path("a/b/c.png", "ff"), "/a/b/c.ff.png");
    }

    #[test]
    fn hashed_path_handles_no_extension_and_multi_dot() {
        assert_eq!(
            hashed_path("assets/LICENSE", "abcd"),
            "/assets/LICENSE.abcd"
        );
        assert_eq!(hashed_path("a.tar.gz", "ff"), "/a.tar.ff.gz");
    }

    #[test]
    fn hash_is_deterministic_and_truncated() {
        let a = hash_hex(b"hello world", 16);
        let b = hash_hex(b"hello world", 16);
        assert_eq!(a, b);
        assert_eq!(a.len(), 16);
        assert_ne!(hash_hex(b"hello world", 16), hash_hex(b"goodbye world", 16));
    }

    #[test]
    fn lit_escapes() {
        assert_eq!(lit("a\"b"), "\"a\\\"b\"");
    }

    #[test]
    fn collect_walks_every_dir_including_what_will_be_immutable() {
        let base =
            std::env::temp_dir().join(format!("tse_collect_{}_{}", std::process::id(), line!()));
        let _ = fs::remove_dir_all(&base);
        fs::create_dir_all(base.join("css")).unwrap();
        fs::create_dir_all(base.join("lib/sub")).unwrap();
        fs::write(base.join("css/a.css"), "a").unwrap();
        fs::write(base.join("root.txt"), "r").unwrap();
        fs::write(base.join("lib/b.js"), "b").unwrap();
        fs::write(base.join("lib/sub/c.js"), "c").unwrap();

        let mut files = Vec::new();
        let mut dirs = Vec::new();
        collect(&base, &mut files, &mut dirs).unwrap();

        // Everything is collected — nothing is skipped anymore.
        let mut logicals: Vec<String> = files.iter().map(|f| logical_path(&base, f)).collect();
        logicals.sort();
        assert_eq!(
            logicals,
            vec!["css/a.css", "lib/b.js", "lib/sub/c.js", "root.txt"]
        );
        // The "immutable" directory is walked and watched like any other.
        assert!(dirs.iter().any(|d| logical_path(&base, d) == "lib"));
        assert!(dirs.iter().any(|d| logical_path(&base, d) == "lib/sub"));

        fs::remove_dir_all(&base).unwrap();
    }

    #[test]
    fn is_immutable_matches_dir_and_descendants_only() {
        let immutable = vec!["lib".to_string(), "vendor/pkg".to_string()];
        assert!(is_immutable("lib/htmx.js", &immutable));
        assert!(is_immutable("lib/sub/a.js", &immutable));
        assert!(is_immutable("vendor/pkg/x.css", &immutable));
        // Not under an immutable dir.
        assert!(!is_immutable("css/a.css", &immutable));
        assert!(!is_immutable("vendor/other.js", &immutable));
        // A prefix that isn't a path boundary must not match.
        assert!(!is_immutable("library/a.js", &immutable));
    }

    #[test]
    fn generated_routes_only_include_plain_urls_for_immutable_assets() {
        let assets = vec![
            Asset {
                abs: PathBuf::from("/tmp/assets/css/style.css"),
                logical: "assets/css/style.css".to_string(),
                url: "/assets/css/style.css".to_string(),
                hashed_url: Some("/assets/css/style.abcd.css".to_string()),
                hash: "abcd".to_string(),
                content_type: "text/css".to_string(),
            },
            Asset {
                abs: PathBuf::from("/tmp/assets/lib/htmx-1.9.10.min.js"),
                logical: "assets/lib/htmx-1.9.10.min.js".to_string(),
                url: "/assets/lib/htmx-1.9.10.min.js".to_string(),
                hashed_url: None,
                hash: "beef".to_string(),
                content_type: "text/javascript".to_string(),
            },
        ];

        let code = generate(&assets);
        let routes = code
            .split("static __TSE_ROUTES")
            .nth(1)
            .unwrap()
            .split("];")
            .next()
            .unwrap();

        assert!(routes.contains("url: \"/assets/css/style.abcd.css\""));
        assert!(!routes.contains("url: \"/assets/css/style.css\""));
        assert!(routes.contains("url: \"/assets/lib/htmx-1.9.10.min.js\""));
    }

    #[test]
    fn generated_asset_macro_is_not_macro_exported() {
        let assets = vec![Asset {
            abs: PathBuf::from("/tmp/assets/css/style.css"),
            logical: "assets/css/style.css".to_string(),
            url: "/assets/css/style.css".to_string(),
            hashed_url: Some("/assets/css/style.abcd.css".to_string()),
            hash: "abcd".to_string(),
            content_type: "text/css".to_string(),
        }];

        let code = generate(&assets);

        assert!(!code.contains("#[macro_export]"));
        assert!(code.contains("macro_rules! __tower_serve_embedded_asset"));
        assert!(code.contains("pub(crate) use __tower_serve_embedded_asset as asset;"));
    }
}