Skip to main content

toolkit_zero/dependency-graph/
build.rs

1//! Build-time fingerprint generator for use in a downstream `build.rs`.
2//!
3//! Produces **`fingerprint.json`** in `$OUT_DIR`: a compact, normalised,
4//! deterministically sorted JSON document capturing a stable snapshot of the
5//! build environment.
6//!
7//! ## Functions
8//!
9//! | Function | Description |
10//! |---|---|
11//! | [`generate_fingerprint`] | Always call this. Writes compact `fingerprint.json` to `$OUT_DIR` and emits `cargo:rerun-if-changed` directives. Pass `true` to also export a pretty-printed copy alongside `Cargo.toml`. |
12//! | [`export`] | Optional standalone export. Writes a pretty-printed `fingerprint.json` alongside `Cargo.toml`. Note: incurs a second `cargo metadata` call if used after `generate_fingerprint(false)`. |
13//!
14//! ## Sections captured
15//!
16//! | Section | Contents |
17//! |---|---|
18//! | `package` | Crate name and version |
19//! | `build` | Profile, opt-level, target triple, rustc version, and active feature flags |
20//! | `deps` | Full normalised `cargo metadata` dependency graph (sorted, no absolute paths) |
21//! | `cargo_lock_sha256` | SHA-256 of `Cargo.lock` (comment lines stripped) |
22//! | `source` | SHA-256 of every `.rs` file under `src/` |
23//!
24//! ## Usage
25//!
26//! In the downstream crate's `build.rs`:
27//!
28//! ```rust,ignore
29//! fn main() {
30//!     // Pass `true` to also write a pretty-printed copy alongside Cargo.toml.
31//!     toolkit_zero::dependency_graph::build::generate_fingerprint(cfg!(debug_assertions))
32//!         .expect("fingerprint generation failed");
33//! }
34//! ```
35//!
36//! Embed the fingerprint in the binary:
37//!
38//! ```rust,ignore
39//! const BUILD_TIME_FINGERPRINT: &str = include_str!(concat!(env!("OUT_DIR"), "/fingerprint.json"));
40//! ```
41//!
42//! ## Concerns
43//!
44//! * **Not tamper-proof** — the fingerprint resides as plain text in the binary's
45//!   read-only data section. It is informational in nature; it does not constitute
46//!   a security boundary.
47//! * **Export file** — `generate_fingerprint(true)` (or `export(true)`) writes
48//!   `fingerprint.json` to the crate root. Add it to `.gitignore` to prevent
49//!   unintentional commits.
50//! * **Build-time overhead** — `cargo metadata` is executed on every rebuild.
51//!   The `cargo:rerun-if-changed` directives restrict this to changes in `src/`,
52//!   `Cargo.toml`, or `Cargo.lock`.
53//! * **Path stripping** — absolute paths (`workspace_root`, `manifest_path`,
54//!   `src_path`, `path`, and others) are removed from `cargo metadata` output
55//!   to ensure the fingerprint is stable across machines and checkout locations.
56//! * **Feature scope** — `build.features` captures the active features of the
57//!   crate being built, not toolkit-zero's own features.
58//! * **Compile-time only** — the snapshot does not update at runtime.
59//! * **Atomic writes** — both fingerprint files are written via a `.tmp` rename
60//!   so a partially-written file is never observed by a parallel reader.
61
62use std::{collections::BTreeMap, env, fs, path::Path, process::Command};
63
64use serde_json::{json, Map, Value};
65use sha2::{Digest, Sha256};
66
67// ─── public error ────────────────────────────────────────────────────────────
68
69/// Errors that can occur while generating `fingerprint.json`.
70#[derive(Debug)]
71pub enum BuildTimeFingerprintError {
72    /// `cargo metadata` process failed or returned non-zero.
73    CargoMetadataFailed(String),
74    /// `cargo metadata` stdout was not valid UTF-8.
75    CargoMetadataNotUtf8,
76    /// `cargo metadata` stdout could not be parsed as JSON.
77    CargoMetadataInvalidJson(String),
78    /// `Cargo.lock` was not found at the expected path.
79    CargoLockNotFound(String),
80    /// A filesystem operation failed.
81    IoError(std::io::Error),
82    /// The final JSON could not be serialised.
83    SerializationFailed(String),
84}
85
86impl std::fmt::Display for BuildTimeFingerprintError {
87    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88        match self {
89            Self::CargoMetadataFailed(e)      => write!(f, "cargo metadata failed: {e}"),
90            Self::CargoMetadataNotUtf8        => write!(f, "cargo metadata output is not valid UTF-8"),
91            Self::CargoMetadataInvalidJson(e) => write!(f, "cargo metadata output is invalid JSON: {e}"),
92            Self::CargoLockNotFound(p)        => write!(f, "Cargo.lock not found at: {p}"),
93            Self::IoError(e)                  => write!(f, "I/O error: {e}"),
94            Self::SerializationFailed(e)      => write!(f, "serialisation failed: {e}"),
95        }
96    }
97}
98
99impl std::error::Error for BuildTimeFingerprintError {}
100
101impl From<std::io::Error> for BuildTimeFingerprintError {
102    fn from(e: std::io::Error) -> Self { Self::IoError(e) }
103}
104
105// ─── public entry point ──────────────────────────────────────────────────────
106
107/// Generate `fingerprint.json` in `$OUT_DIR`.
108///
109/// All inputs are read from the environment variables that Cargo sets for
110/// `build.rs` scripts. The necessary `cargo:rerun-if-changed` directives are
111/// emitted automatically; no additional boilerplate is required in the
112/// calling `build.rs`.
113///
114/// If `export` is `true`, a pretty-printed copy is also written alongside
115/// `Cargo.toml` for local inspection. Both writes are **atomic** (written to a
116/// `.tmp` file then renamed), so a partially-written file is never observed.
117/// Only a single `cargo metadata` call is made regardless of the `export` flag.
118///
119/// Pass `cfg!(debug_assertions)` to export only in debug builds:
120///
121/// ```rust,ignore
122/// fn main() {
123///     toolkit_zero::dependency_graph::build::generate_fingerprint(cfg!(debug_assertions))
124///         .expect("fingerprint generation failed");
125/// }
126/// ```
127pub fn generate_fingerprint(export: bool) -> Result<(), BuildTimeFingerprintError> {
128    // Emit rerun directives — cargo reads these from build script stdout
129    // regardless of which function in the call stack prints them.
130    println!("cargo:rerun-if-changed=src");
131    println!("cargo:rerun-if-changed=Cargo.toml");
132    println!("cargo:rerun-if-changed=Cargo.lock");
133
134    let out_dir      = env::var("OUT_DIR").unwrap_or_default();
135    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
136
137    // Build the fingerprint once — shared by both the compact and pretty copies.
138    let fingerprint = build_fingerprint()?;
139
140    let compact = serde_json::to_string(&fingerprint)
141        .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
142
143    write_atomic(&format!("{out_dir}/fingerprint.json"), compact.as_bytes())?;
144
145    if export {
146        let pretty = serde_json::to_string_pretty(&fingerprint)
147            .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
148        write_atomic(&format!("{manifest_dir}/fingerprint.json"), pretty.as_bytes())?;
149    }
150
151    Ok(())
152}
153
154/// Write a pretty-printed `fingerprint.json` alongside the crate's `Cargo.toml`
155/// when `enabled` is `true`.
156///
157/// This file is intended for **local inspection only**. It is distinct from
158/// the compact `fingerprint.json` written to `$OUT_DIR`; the binary always
159/// embeds the `$OUT_DIR` copy.
160///
161/// > **Tip:** prefer passing `true` to [`generate_fingerprint`] instead of
162/// > calling both functions, as `generate_fingerprint(true)` only runs
163/// > `cargo metadata` once.
164///
165/// # Concerns
166///
167/// The exported file contains the full dependency graph, per-file source
168/// hashes, target triple, and compiler version. **Add `fingerprint.json` to
169/// `.gitignore`** to prevent unintentional commits. The write is atomic
170/// (written to a `.tmp` file then renamed).
171///
172/// ```rust,ignore
173/// fn main() {
174///     toolkit_zero::dependency_graph::build::generate_fingerprint(false)
175///         .expect("fingerprint generation failed");
176///     toolkit_zero::dependency_graph::build::export(cfg!(debug_assertions))
177///         .expect("fingerprint export failed");
178/// }
179/// ```
180pub fn export(enabled: bool) -> Result<(), BuildTimeFingerprintError> {
181    if !enabled { return Ok(()); }
182
183    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
184
185    let fingerprint = build_fingerprint()?;
186    let pretty = serde_json::to_string_pretty(&fingerprint)
187        .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
188
189    write_atomic(&format!("{manifest_dir}/fingerprint.json"), pretty.as_bytes())?;
190    Ok(())
191}
192
193// ─── atomic write helper ──────────────────────────────────────────────────────
194
195/// Write `data` to `path` atomically: write to `path.tmp` then rename.
196///
197/// On POSIX systems `rename(2)` is atomic within the same filesystem,
198/// so `path` is never observed in a partially-written state.
199fn write_atomic(path: &str, data: &[u8]) -> Result<(), BuildTimeFingerprintError> {
200    let tmp = format!("{path}.tmp");
201    fs::write(&tmp, data)?;
202    fs::rename(&tmp, path)?;
203    Ok(())
204}
205
206// ─── core fingerprint builder (shared by generate_fingerprint + export) ────────
207
208fn build_fingerprint() -> Result<Value, BuildTimeFingerprintError> {
209    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
210
211    // ── 1. Package identity ───────────────────────────────────────────────────
212    let pkg_name    = env::var("CARGO_PKG_NAME").unwrap_or_default();
213    let pkg_version = env::var("CARGO_PKG_VERSION").unwrap_or_default();
214
215    // ── 2. Build environment ──────────────────────────────────────────────────
216    let profile   = env::var("PROFILE").unwrap_or_default();
217    let opt_level = env::var("OPT_LEVEL").unwrap_or_default();
218    let target    = env::var("TARGET").unwrap_or_default();
219
220    // Collect all active features (CARGO_FEATURE_<NAME> → "feature-name")
221    let mut features: Vec<String> = env::vars()
222        .filter_map(|(k, _)| {
223            k.strip_prefix("CARGO_FEATURE_")
224                .map(|feat| feat.to_lowercase().replace('_', "-"))
225        })
226        .collect();
227    features.sort_unstable();
228
229    // ── 3. rustc version ──────────────────────────────────────────────────────
230    let rustc_version = Command::new("rustc")
231        .arg("--version")
232        .output()
233        .ok()
234        .and_then(|o| String::from_utf8(o.stdout).ok())
235        .map(|s| s.trim().to_owned())
236        .unwrap_or_else(|| "unknown".to_owned());
237
238    // ── 4. Normalised cargo metadata ──────────────────────────────────────────
239    let cargo_bin = env::var("CARGO").unwrap_or_else(|_| "cargo".to_owned());
240
241    let meta_out = Command::new(&cargo_bin)
242        .args([
243            "metadata",
244            "--format-version=1",
245            "--manifest-path",
246            &format!("{manifest_dir}/Cargo.toml"),
247        ])
248        .output()
249        .map_err(|e| BuildTimeFingerprintError::CargoMetadataFailed(e.to_string()))?;
250
251    if !meta_out.status.success() {
252        let err = String::from_utf8_lossy(&meta_out.stderr).to_string();
253        return Err(BuildTimeFingerprintError::CargoMetadataFailed(err));
254    }
255
256    let meta_str = String::from_utf8(meta_out.stdout)
257        .map_err(|_| BuildTimeFingerprintError::CargoMetadataNotUtf8)?;
258
259    let meta_raw: Value = serde_json::from_str(&meta_str)
260        .map_err(|e| BuildTimeFingerprintError::CargoMetadataInvalidJson(e.to_string()))?;
261
262    let meta_clean      = strip_absolute_paths(meta_raw);
263    let meta_normalised = normalise_json(meta_clean);
264
265    // ── 5. Cargo.lock SHA-256 ─────────────────────────────────────────────────
266    let lock_path = format!("{manifest_dir}/Cargo.lock");
267    if !Path::new(&lock_path).exists() {
268        return Err(BuildTimeFingerprintError::CargoLockNotFound(lock_path));
269    }
270    let lock_raw = fs::read(&lock_path)?;
271    let lock_stripped: Vec<u8> = lock_raw
272        .split(|&b| b == b'\n')
273        .filter(|line| !line.starts_with(b"#"))
274        .flat_map(|line| line.iter().chain(std::iter::once(&b'\n')))
275        .copied()
276        .collect();
277    let lock_sha256 = hex_sha256(&lock_stripped);
278
279    // ── 6. Source file hashes ─────────────────────────────────────────────────
280    let src_dir = format!("{manifest_dir}/src");
281    let source_hashes = hash_source_tree(&src_dir, &manifest_dir)?;
282
283    // ── 7. Assemble & normalise ───────────────────────────────────────────────
284    let fingerprint = json!({
285        "package": {
286            "name":    pkg_name,
287            "version": pkg_version,
288        },
289        "build": {
290            "features":      features,
291            "opt_level":     opt_level,
292            "profile":       profile,
293            "rustc_version": rustc_version,
294            "target":        target,
295        },
296        "cargo_lock_sha256": lock_sha256,
297        "deps":   meta_normalised,
298        "source": source_hashes,
299    });
300
301    Ok(normalise_json(fingerprint))
302}
303
304// ─── JSON normalisation ───────────────────────────────────────────────────────
305
306/// Recursively normalise a [`Value`]:
307///
308/// * **Objects** — keys are sorted alphabetically (serde_json's default `Map`
309///   is `BTreeMap`-backed, so collecting into it sorts automatically).
310/// * **Arrays** — items are recursively normalised *and* reordered by a stable
311///   derived key so that cargo-version-dependent ordering differences vanish.
312/// * **Primitives** — unchanged.
313fn normalise_json(value: Value) -> Value {
314    match value {
315        Value::Object(map) => {
316            // BTreeMap-backed Map: inserting via collect() automatically sorts keys.
317            let sorted: Map<String, Value> = map
318                .into_iter()
319                .map(|(k, v)| (k, normalise_json(v)))
320                .collect();
321            Value::Object(sorted)
322        }
323        Value::Array(arr) => {
324            let mut items: Vec<Value> = arr.into_iter().map(normalise_json).collect();
325            items.sort_by(|a, b| array_sort_key(a).cmp(&array_sort_key(b)));
326            Value::Array(items)
327        }
328        other => other,
329    }
330}
331
332/// Derive a stable sort key for an element inside a JSON array.
333///
334/// Preference order:
335/// 1. `"id"` field (cargo package IDs are globally unique and stable)
336/// 2. `"name"` + `"version"` concatenated
337/// 3. Compact JSON serialisation as a last resort
338fn array_sort_key(v: &Value) -> String {
339    if let Some(obj) = v.as_object() {
340        if let Some(id) = obj.get("id").and_then(|v| v.as_str()) {
341            return id.to_owned();
342        }
343        let name = obj.get("name").and_then(|v| v.as_str()).unwrap_or("");
344        let ver  = obj.get("version").and_then(|v| v.as_str()).unwrap_or("");
345        if !name.is_empty() {
346            return format!("{name}@{ver}");
347        }
348    }
349    serde_json::to_string(v).unwrap_or_default()
350}
351
352/// Remove fields that carry absolute or machine-specific paths from the
353/// `cargo metadata` JSON so the digest is stable across different machines
354/// and checkout locations.
355///
356/// Removed fields (all carry machine-specific absolute paths):
357/// * `workspace_root` — absolute path to workspace checkout
358/// * `target_directory` / `build_directory` — absolute path to `target/`
359/// * `manifest_path` — per-package absolute `Cargo.toml` path
360/// * `src_path` — per-target absolute source file path
361/// * `workspace_members` / `workspace_default_members` — IDs with `file://` paths
362fn strip_absolute_paths(value: Value) -> Value {
363    match value {
364        Value::Object(mut map) => {
365            for key in &[
366                "workspace_root",
367                "workspace_members",
368                "workspace_default_members",
369                "target_directory",
370                "build_directory",
371                "manifest_path",
372                "src_path",
373                "path",
374            ] {
375                map.remove(*key);
376            }
377            Value::Object(
378                map.into_iter()
379                    .map(|(k, v)| (k, strip_absolute_paths(v)))
380                    .collect(),
381            )
382        }
383        Value::Array(arr) => {
384            Value::Array(arr.into_iter().map(strip_absolute_paths).collect())
385        }
386        other => other,
387    }
388}
389
390// ─── hashing helpers ─────────────────────────────────────────────────────────
391
392/// SHA-256 of `data`, returned as a lowercase hex string.
393fn hex_sha256(data: &[u8]) -> String {
394    let mut h = Sha256::new();
395    h.update(data);
396    format!("{:x}", h.finalize())
397}
398
399/// Walk `src_dir` recursively, hash every `.rs` file, and return a
400/// `BTreeMap<relative_path, "sha256:<hex>">`.
401///
402/// Paths are relative to `manifest_dir` and always use `/` as the separator.
403fn hash_source_tree(
404    src_dir:      &str,
405    manifest_dir: &str,
406) -> Result<BTreeMap<String, String>, BuildTimeFingerprintError> {
407    let mut map = BTreeMap::new();
408    visit_rs_files(Path::new(src_dir), Path::new(manifest_dir), &mut map)?;
409    Ok(map)
410}
411
412fn visit_rs_files(
413    dir:  &Path,
414    base: &Path,
415    map:  &mut BTreeMap<String, String>,
416) -> Result<(), BuildTimeFingerprintError> {
417    if !dir.exists() {
418        return Ok(());
419    }
420    let mut entries: Vec<_> = fs::read_dir(dir)?.collect::<Result<_, _>>()?;
421    // Sort for determinism across file-systems that don't guarantee readdir order.
422    entries.sort_by_key(|e| e.path());
423
424    for entry in entries {
425        let path = entry.path();
426        if path.is_dir() {
427            visit_rs_files(&path, base, map)?;
428        } else if path.extension().and_then(|e| e.to_str()) == Some("rs") {
429            let rel = path
430                .strip_prefix(base)
431                .unwrap_or(&path)
432                .to_string_lossy()
433                .replace('\\', "/");
434            let contents = fs::read(&path)?;
435            map.insert(rel, format!("sha256:{}", hex_sha256(&contents)));
436        }
437    }
438    Ok(())
439}