toolkit_zero/dependency-graph/build.rs
1//! Build-time fingerprint generator for use in a downstream `build.rs`.
2//!
3//! Produces **`fingerprint.json`** in `$OUT_DIR`: a compact, normalised,
4//! deterministically sorted JSON document capturing a stable snapshot of the
5//! build environment.
6//!
7//! ## Functions
8//!
9//! | Function | Description |
10//! |---|---|
11//! | [`generate_fingerprint`] | Always call this. Writes compact `fingerprint.json` to `$OUT_DIR` and emits `cargo:rerun-if-changed` directives. Pass `true` to also export a pretty-printed copy alongside `Cargo.toml`. |
12//! | [`export`] | Optional standalone export. Writes a pretty-printed `fingerprint.json` alongside `Cargo.toml`. Note: incurs a second `cargo metadata` call if used after `generate_fingerprint(false)`. |
13//!
14//! ## Sections captured
15//!
16//! | Section | Contents |
17//! |---|---|
18//! | `package` | Crate name and version |
19//! | `build` | Profile, opt-level, target triple, rustc version, and active feature flags |
20//! | `deps` | Full normalised `cargo metadata` dependency graph (sorted, no absolute paths) |
21//! | `cargo_lock_sha256` | SHA-256 of `Cargo.lock` (comment lines stripped) |
22//! | `source` | SHA-256 of every `.rs` file under `src/` |
23//!
24//! ## Usage
25//!
26//! In the downstream crate's `build.rs`:
27//!
28//! ```rust,ignore
29//! fn main() {
30//! // Pass `true` to also write a pretty-printed copy alongside Cargo.toml.
31//! toolkit_zero::dependency_graph::build::generate_fingerprint(cfg!(debug_assertions))
32//! .expect("fingerprint generation failed");
33//! }
34//! ```
35//!
36//! Embed the fingerprint in the binary:
37//!
38//! ```rust,ignore
39//! const BUILD_TIME_FINGERPRINT: &str = include_str!(concat!(env!("OUT_DIR"), "/fingerprint.json"));
40//! ```
41//!
42//! ## Concerns
43//!
44//! * **Not tamper-proof** — the fingerprint resides as plain text in the binary's
45//! read-only data section. It is informational in nature; it does not constitute
46//! a security boundary.
47//! * **Export file** — `generate_fingerprint(true)` (or `export(true)`) writes
48//! `fingerprint.json` to the crate root. Add it to `.gitignore` to prevent
49//! unintentional commits.
50//! * **Build-time overhead** — `cargo metadata` is executed on every rebuild.
51//! The `cargo:rerun-if-changed` directives restrict this to changes in `src/`,
52//! `Cargo.toml`, or `Cargo.lock`.
53//! * **Path stripping** — absolute paths (`workspace_root`, `manifest_path`,
54//! `src_path`, `path`, and others) are removed from `cargo metadata` output
55//! to ensure the fingerprint is stable across machines and checkout locations.
56//! * **Feature scope** — `build.features` captures the active features of the
57//! crate being built, not toolkit-zero's own features.
58//! * **Compile-time only** — the snapshot does not update at runtime.
59//! * **Atomic writes** — both fingerprint files are written via a `.tmp` rename
60//! so a partially-written file is never observed by a parallel reader.
61
62use std::{collections::BTreeMap, env, fs, path::Path, process::Command};
63
64use serde_json::{json, Map, Value};
65use sha2::{Digest, Sha256};
66
67// ─── public error ────────────────────────────────────────────────────────────
68
69/// Errors that can occur while generating `fingerprint.json`.
70#[derive(Debug)]
71pub enum BuildTimeFingerprintError {
72 /// `cargo metadata` process failed or returned non-zero.
73 CargoMetadataFailed(String),
74 /// `cargo metadata` stdout was not valid UTF-8.
75 CargoMetadataNotUtf8,
76 /// `cargo metadata` stdout could not be parsed as JSON.
77 CargoMetadataInvalidJson(String),
78 /// `Cargo.lock` was not found at the expected path.
79 CargoLockNotFound(String),
80 /// A filesystem operation failed.
81 IoError(std::io::Error),
82 /// The final JSON could not be serialised.
83 SerializationFailed(String),
84}
85
86impl std::fmt::Display for BuildTimeFingerprintError {
87 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88 match self {
89 Self::CargoMetadataFailed(e) => write!(f, "cargo metadata failed: {e}"),
90 Self::CargoMetadataNotUtf8 => write!(f, "cargo metadata output is not valid UTF-8"),
91 Self::CargoMetadataInvalidJson(e) => write!(f, "cargo metadata output is invalid JSON: {e}"),
92 Self::CargoLockNotFound(p) => write!(f, "Cargo.lock not found at: {p}"),
93 Self::IoError(e) => write!(f, "I/O error: {e}"),
94 Self::SerializationFailed(e) => write!(f, "serialisation failed: {e}"),
95 }
96 }
97}
98
99impl std::error::Error for BuildTimeFingerprintError {}
100
101impl From<std::io::Error> for BuildTimeFingerprintError {
102 fn from(e: std::io::Error) -> Self { Self::IoError(e) }
103}
104
105// ─── public entry point ──────────────────────────────────────────────────────
106
107/// Generate `fingerprint.json` in `$OUT_DIR`.
108///
109/// All inputs are read from the environment variables that Cargo sets for
110/// `build.rs` scripts. The necessary `cargo:rerun-if-changed` directives are
111/// emitted automatically; no additional boilerplate is required in the
112/// calling `build.rs`.
113///
114/// If `export` is `true`, a pretty-printed copy is also written alongside
115/// `Cargo.toml` for local inspection. Both writes are **atomic** (written to a
116/// `.tmp` file then renamed), so a partially-written file is never observed.
117/// Only a single `cargo metadata` call is made regardless of the `export` flag.
118///
119/// Pass `cfg!(debug_assertions)` to export only in debug builds:
120///
121/// ```rust,ignore
122/// fn main() {
123/// toolkit_zero::dependency_graph::build::generate_fingerprint(cfg!(debug_assertions))
124/// .expect("fingerprint generation failed");
125/// }
126/// ```
127pub fn generate_fingerprint(export: bool) -> Result<(), BuildTimeFingerprintError> {
128 // Emit rerun directives — cargo reads these from build script stdout
129 // regardless of which function in the call stack prints them.
130 println!("cargo:rerun-if-changed=src");
131 println!("cargo:rerun-if-changed=Cargo.toml");
132 println!("cargo:rerun-if-changed=Cargo.lock");
133
134 let out_dir = env::var("OUT_DIR").unwrap_or_default();
135 let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
136
137 // Build the fingerprint once — shared by both the compact and pretty copies.
138 let fingerprint = build_fingerprint()?;
139
140 let compact = serde_json::to_string(&fingerprint)
141 .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
142
143 write_atomic(&format!("{out_dir}/fingerprint.json"), compact.as_bytes())?;
144
145 if export {
146 let pretty = serde_json::to_string_pretty(&fingerprint)
147 .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
148 write_atomic(&format!("{manifest_dir}/fingerprint.json"), pretty.as_bytes())?;
149 }
150
151 Ok(())
152}
153
154/// Write a pretty-printed `fingerprint.json` alongside the crate's `Cargo.toml`
155/// when `enabled` is `true`.
156///
157/// This file is intended for **local inspection only**. It is distinct from
158/// the compact `fingerprint.json` written to `$OUT_DIR`; the binary always
159/// embeds the `$OUT_DIR` copy.
160///
161/// > **Tip:** prefer passing `true` to [`generate_fingerprint`] instead of
162/// > calling both functions, as `generate_fingerprint(true)` only runs
163/// > `cargo metadata` once.
164///
165/// # Concerns
166///
167/// The exported file contains the full dependency graph, per-file source
168/// hashes, target triple, and compiler version. **Add `fingerprint.json` to
169/// `.gitignore`** to prevent unintentional commits. The write is atomic
170/// (written to a `.tmp` file then renamed).
171///
172/// ```rust,ignore
173/// fn main() {
174/// toolkit_zero::dependency_graph::build::generate_fingerprint(false)
175/// .expect("fingerprint generation failed");
176/// toolkit_zero::dependency_graph::build::export(cfg!(debug_assertions))
177/// .expect("fingerprint export failed");
178/// }
179/// ```
180pub fn export(enabled: bool) -> Result<(), BuildTimeFingerprintError> {
181 if !enabled { return Ok(()); }
182
183 let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
184
185 let fingerprint = build_fingerprint()?;
186 let pretty = serde_json::to_string_pretty(&fingerprint)
187 .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
188
189 write_atomic(&format!("{manifest_dir}/fingerprint.json"), pretty.as_bytes())?;
190 Ok(())
191}
192
193// ─── atomic write helper ──────────────────────────────────────────────────────
194
195/// Write `data` to `path` atomically: write to `path.tmp` then rename.
196///
197/// On POSIX systems `rename(2)` is atomic within the same filesystem,
198/// so `path` is never observed in a partially-written state.
199fn write_atomic(path: &str, data: &[u8]) -> Result<(), BuildTimeFingerprintError> {
200 let tmp = format!("{path}.tmp");
201 fs::write(&tmp, data)?;
202 fs::rename(&tmp, path)?;
203 Ok(())
204}
205
206// ─── core fingerprint builder (shared by generate_fingerprint + export) ────────
207
208fn build_fingerprint() -> Result<Value, BuildTimeFingerprintError> {
209 let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
210
211 // ── 1. Package identity ───────────────────────────────────────────────────
212 let pkg_name = env::var("CARGO_PKG_NAME").unwrap_or_default();
213 let pkg_version = env::var("CARGO_PKG_VERSION").unwrap_or_default();
214
215 // ── 2. Build environment ──────────────────────────────────────────────────
216 let profile = env::var("PROFILE").unwrap_or_default();
217 let opt_level = env::var("OPT_LEVEL").unwrap_or_default();
218 let target = env::var("TARGET").unwrap_or_default();
219
220 // Collect all active features (CARGO_FEATURE_<NAME> → "feature-name")
221 let mut features: Vec<String> = env::vars()
222 .filter_map(|(k, _)| {
223 k.strip_prefix("CARGO_FEATURE_")
224 .map(|feat| feat.to_lowercase().replace('_', "-"))
225 })
226 .collect();
227 features.sort_unstable();
228
229 // ── 3. rustc version ──────────────────────────────────────────────────────
230 let rustc_version = Command::new("rustc")
231 .arg("--version")
232 .output()
233 .ok()
234 .and_then(|o| String::from_utf8(o.stdout).ok())
235 .map(|s| s.trim().to_owned())
236 .unwrap_or_else(|| "unknown".to_owned());
237
238 // ── 4. Normalised cargo metadata ──────────────────────────────────────────
239 let cargo_bin = env::var("CARGO").unwrap_or_else(|_| "cargo".to_owned());
240
241 let meta_out = Command::new(&cargo_bin)
242 .args([
243 "metadata",
244 "--format-version=1",
245 "--manifest-path",
246 &format!("{manifest_dir}/Cargo.toml"),
247 ])
248 .output()
249 .map_err(|e| BuildTimeFingerprintError::CargoMetadataFailed(e.to_string()))?;
250
251 if !meta_out.status.success() {
252 let err = String::from_utf8_lossy(&meta_out.stderr).to_string();
253 return Err(BuildTimeFingerprintError::CargoMetadataFailed(err));
254 }
255
256 let meta_str = String::from_utf8(meta_out.stdout)
257 .map_err(|_| BuildTimeFingerprintError::CargoMetadataNotUtf8)?;
258
259 let meta_raw: Value = serde_json::from_str(&meta_str)
260 .map_err(|e| BuildTimeFingerprintError::CargoMetadataInvalidJson(e.to_string()))?;
261
262 let meta_clean = strip_absolute_paths(meta_raw);
263 let meta_normalised = normalise_json(meta_clean);
264
265 // ── 5. Cargo.lock SHA-256 ─────────────────────────────────────────────────
266 let lock_path = format!("{manifest_dir}/Cargo.lock");
267 if !Path::new(&lock_path).exists() {
268 return Err(BuildTimeFingerprintError::CargoLockNotFound(lock_path));
269 }
270 let lock_raw = fs::read(&lock_path)?;
271 let lock_stripped: Vec<u8> = lock_raw
272 .split(|&b| b == b'\n')
273 .filter(|line| !line.starts_with(b"#"))
274 .flat_map(|line| line.iter().chain(std::iter::once(&b'\n')))
275 .copied()
276 .collect();
277 let lock_sha256 = hex_sha256(&lock_stripped);
278
279 // ── 6. Source file hashes ─────────────────────────────────────────────────
280 let src_dir = format!("{manifest_dir}/src");
281 let source_hashes = hash_source_tree(&src_dir, &manifest_dir)?;
282
283 // ── 7. Assemble & normalise ───────────────────────────────────────────────
284 let fingerprint = json!({
285 "package": {
286 "name": pkg_name,
287 "version": pkg_version,
288 },
289 "build": {
290 "features": features,
291 "opt_level": opt_level,
292 "profile": profile,
293 "rustc_version": rustc_version,
294 "target": target,
295 },
296 "cargo_lock_sha256": lock_sha256,
297 "deps": meta_normalised,
298 "source": source_hashes,
299 });
300
301 Ok(normalise_json(fingerprint))
302}
303
304// ─── JSON normalisation ───────────────────────────────────────────────────────
305
306/// Recursively normalise a [`Value`]:
307///
308/// * **Objects** — keys are sorted alphabetically (serde_json's default `Map`
309/// is `BTreeMap`-backed, so collecting into it sorts automatically).
310/// * **Arrays** — items are recursively normalised *and* reordered by a stable
311/// derived key so that cargo-version-dependent ordering differences vanish.
312/// * **Primitives** — unchanged.
313fn normalise_json(value: Value) -> Value {
314 match value {
315 Value::Object(map) => {
316 // BTreeMap-backed Map: inserting via collect() automatically sorts keys.
317 let sorted: Map<String, Value> = map
318 .into_iter()
319 .map(|(k, v)| (k, normalise_json(v)))
320 .collect();
321 Value::Object(sorted)
322 }
323 Value::Array(arr) => {
324 let mut items: Vec<Value> = arr.into_iter().map(normalise_json).collect();
325 items.sort_by(|a, b| array_sort_key(a).cmp(&array_sort_key(b)));
326 Value::Array(items)
327 }
328 other => other,
329 }
330}
331
332/// Derive a stable sort key for an element inside a JSON array.
333///
334/// Preference order:
335/// 1. `"id"` field (cargo package IDs are globally unique and stable)
336/// 2. `"name"` + `"version"` concatenated
337/// 3. Compact JSON serialisation as a last resort
338fn array_sort_key(v: &Value) -> String {
339 if let Some(obj) = v.as_object() {
340 if let Some(id) = obj.get("id").and_then(|v| v.as_str()) {
341 return id.to_owned();
342 }
343 let name = obj.get("name").and_then(|v| v.as_str()).unwrap_or("");
344 let ver = obj.get("version").and_then(|v| v.as_str()).unwrap_or("");
345 if !name.is_empty() {
346 return format!("{name}@{ver}");
347 }
348 }
349 serde_json::to_string(v).unwrap_or_default()
350}
351
352/// Remove fields that carry absolute or machine-specific paths from the
353/// `cargo metadata` JSON so the digest is stable across different machines
354/// and checkout locations.
355///
356/// Removed fields (all carry machine-specific absolute paths):
357/// * `workspace_root` — absolute path to workspace checkout
358/// * `target_directory` / `build_directory` — absolute path to `target/`
359/// * `manifest_path` — per-package absolute `Cargo.toml` path
360/// * `src_path` — per-target absolute source file path
361/// * `workspace_members` / `workspace_default_members` — IDs with `file://` paths
362fn strip_absolute_paths(value: Value) -> Value {
363 match value {
364 Value::Object(mut map) => {
365 for key in &[
366 "workspace_root",
367 "workspace_members",
368 "workspace_default_members",
369 "target_directory",
370 "build_directory",
371 "manifest_path",
372 "src_path",
373 "path",
374 ] {
375 map.remove(*key);
376 }
377 Value::Object(
378 map.into_iter()
379 .map(|(k, v)| (k, strip_absolute_paths(v)))
380 .collect(),
381 )
382 }
383 Value::Array(arr) => {
384 Value::Array(arr.into_iter().map(strip_absolute_paths).collect())
385 }
386 other => other,
387 }
388}
389
390// ─── hashing helpers ─────────────────────────────────────────────────────────
391
392/// SHA-256 of `data`, returned as a lowercase hex string.
393fn hex_sha256(data: &[u8]) -> String {
394 let mut h = Sha256::new();
395 h.update(data);
396 format!("{:x}", h.finalize())
397}
398
399/// Walk `src_dir` recursively, hash every `.rs` file, and return a
400/// `BTreeMap<relative_path, "sha256:<hex>">`.
401///
402/// Paths are relative to `manifest_dir` and always use `/` as the separator.
403fn hash_source_tree(
404 src_dir: &str,
405 manifest_dir: &str,
406) -> Result<BTreeMap<String, String>, BuildTimeFingerprintError> {
407 let mut map = BTreeMap::new();
408 visit_rs_files(Path::new(src_dir), Path::new(manifest_dir), &mut map)?;
409 Ok(map)
410}
411
412fn visit_rs_files(
413 dir: &Path,
414 base: &Path,
415 map: &mut BTreeMap<String, String>,
416) -> Result<(), BuildTimeFingerprintError> {
417 if !dir.exists() {
418 return Ok(());
419 }
420 let mut entries: Vec<_> = fs::read_dir(dir)?.collect::<Result<_, _>>()?;
421 // Sort for determinism across file-systems that don't guarantee readdir order.
422 entries.sort_by_key(|e| e.path());
423
424 for entry in entries {
425 let path = entry.path();
426 if path.is_dir() {
427 visit_rs_files(&path, base, map)?;
428 } else if path.extension().and_then(|e| e.to_str()) == Some("rs") {
429 let rel = path
430 .strip_prefix(base)
431 .unwrap_or(&path)
432 .to_string_lossy()
433 .replace('\\', "/");
434 let contents = fs::read(&path)?;
435 map.insert(rel, format!("sha256:{}", hex_sha256(&contents)));
436 }
437 }
438 Ok(())
439}