Skip to main content

toolkit_zero/dependency-graph/
build.rs

1//! Build-time fingerprint generator for use in a downstream `build.rs`.
2//!
3//! Produces **`fingerprint.json`** in `$OUT_DIR`: a compact, normalised,
4//! deterministically sorted JSON document capturing a stable snapshot of the
5//! build environment.
6//!
7//! ## Functions
8//!
9//! | Function | Description |
10//! |---|---|
11//! | [`generate_fingerprint`] | Always call this. Writes compact `fingerprint.json` to `$OUT_DIR` and emits `cargo:rerun-if-changed` directives. |
12//! | [`export`] | Optional. Writes a pretty-printed `fingerprint.json` alongside `Cargo.toml` for local inspection. Pass `false` or condition on `cfg!(debug_assertions)` to suppress in release builds. |
13//!
14//! ## Sections captured
15//!
16//! | Section | Contents |
17//! |---|---|
18//! | `package` | Crate name and version |
19//! | `build` | Profile, opt-level, target triple, rustc version, and active feature flags |
20//! | `deps` | Full normalised `cargo metadata` dependency graph (sorted, no absolute paths) |
21//! | `cargo_lock_sha256` | SHA-256 of `Cargo.lock` (comment lines stripped) |
22//! | `source` | SHA-256 of every `.rs` file under `src/` |
23//!
24//! ## Usage
25//!
26//! In the downstream crate's `build.rs`:
27//!
28//! ```rust,ignore
29//! fn main() {
30//!     toolkit_zero::dependency_graph::build::generate_fingerprint()
31//!         .expect("fingerprint generation failed");
32//!     // optional — pretty-print alongside Cargo.toml for local inspection
33//!     toolkit_zero::dependency_graph::build::export(cfg!(debug_assertions))
34//!         .expect("fingerprint export failed");
35//! }
36//! ```
37//!
38//! Embed the fingerprint in the binary:
39//!
40//! ```rust,ignore
41//! const BUILD_TIME_FINGERPRINT: &str = include_str!(concat!(env!("OUT_DIR"), "/fingerprint.json"));
42//! ```
43//!
44//! ## Concerns
45//!
46//! * **Not tamper-proof** — the fingerprint resides as plain text in the binary's
47//!   read-only data section. It is informational in nature; it does not constitute
48//!   a security boundary.
49//! * **Export file** — `export(true)` writes `fingerprint.json` to the crate root.
50//!   Add it to `.gitignore` to prevent unintentional commits.
51//! * **Build-time overhead** — `cargo metadata` is executed on every rebuild.
52//!   The `cargo:rerun-if-changed` directives restrict this to changes in `src/`,
53//!   `Cargo.toml`, or `Cargo.lock`.
54//! * **Path stripping** — absolute paths (`workspace_root`, `manifest_path`,
55//!   `src_path`, `path`, and others) are removed from `cargo metadata` output
56//!   to ensure the fingerprint is stable across machines and checkout locations.
57//! * **Feature scope** — `build.features` captures the active features of the
58//!   crate being built, not toolkit-zero's own features.
59//! * **Compile-time only** — the snapshot does not update at runtime.
60
61use std::{collections::BTreeMap, env, fs, path::Path, process::Command};
62
63use serde_json::{json, Map, Value};
64use sha2::{Digest, Sha256};
65
66// ─── public error ────────────────────────────────────────────────────────────
67
68/// Errors that can occur while generating `fingerprint.json`.
69#[derive(Debug)]
70pub enum BuildTimeFingerprintError {
71    /// `cargo metadata` process failed or returned non-zero.
72    CargoMetadataFailed(String),
73    /// `cargo metadata` stdout was not valid UTF-8.
74    CargoMetadataNotUtf8,
75    /// `cargo metadata` stdout could not be parsed as JSON.
76    CargoMetadataInvalidJson(String),
77    /// `Cargo.lock` was not found at the expected path.
78    CargoLockNotFound(String),
79    /// A filesystem operation failed.
80    IoError(std::io::Error),
81    /// The final JSON could not be serialised.
82    SerializationFailed(String),
83}
84
85impl std::fmt::Display for BuildTimeFingerprintError {
86    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87        match self {
88            Self::CargoMetadataFailed(e)      => write!(f, "cargo metadata failed: {e}"),
89            Self::CargoMetadataNotUtf8        => write!(f, "cargo metadata output is not valid UTF-8"),
90            Self::CargoMetadataInvalidJson(e) => write!(f, "cargo metadata output is invalid JSON: {e}"),
91            Self::CargoLockNotFound(p)        => write!(f, "Cargo.lock not found at: {p}"),
92            Self::IoError(e)                  => write!(f, "I/O error: {e}"),
93            Self::SerializationFailed(e)      => write!(f, "serialisation failed: {e}"),
94        }
95    }
96}
97
98impl std::error::Error for BuildTimeFingerprintError {}
99
100impl From<std::io::Error> for BuildTimeFingerprintError {
101    fn from(e: std::io::Error) -> Self { Self::IoError(e) }
102}
103
104// ─── public entry point ──────────────────────────────────────────────────────
105
106/// Generate `fingerprint.json` in `$OUT_DIR`.
107///
108/// All inputs are read from the environment variables that Cargo sets for
109/// `build.rs` scripts. The necessary `cargo:rerun-if-changed` directives are
110/// emitted automatically; no additional boilerplate is required in the
111/// calling `build.rs`.
112///
113/// To obtain a pretty-printed copy alongside `Cargo.toml` for local
114/// inspection, also call [`export`]`(true)`.
115///
116/// ```rust,ignore
117/// fn main() {
118///     toolkit_zero::dependency_graph::build::generate_fingerprint()
119///         .expect("fingerprint generation failed");
120/// }
121/// ```
122pub fn generate_fingerprint() -> Result<(), BuildTimeFingerprintError> {
123    // Emit rerun directives — cargo reads these from build script stdout
124    // regardless of which function in the call stack prints them.
125    println!("cargo:rerun-if-changed=src");
126    println!("cargo:rerun-if-changed=Cargo.toml");
127    println!("cargo:rerun-if-changed=Cargo.lock");
128
129    let out_dir = env::var("OUT_DIR").unwrap_or_default();
130
131    let fingerprint = build_fingerprint()?;
132    let compact = serde_json::to_string(&fingerprint)
133        .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
134
135    fs::write(format!("{out_dir}/fingerprint.json"), compact)?;
136    Ok(())
137}
138
139/// Write a pretty-printed `fingerprint.json` alongside the crate's `Cargo.toml`
140/// when `enabled` is `true`.
141///
142/// This file is intended for **local inspection only**. It is distinct from
143/// the compact `fingerprint.json` written to `$OUT_DIR`; the binary always
144/// embeds the `$OUT_DIR` copy. Pass `false`, or condition the call on
145/// `cfg!(debug_assertions)`, to suppress the file in release builds.
146///
147/// # Concerns
148///
149/// The exported file contains the full dependency graph, per-file source
150/// hashes, target triple, and compiler version. **Add `fingerprint.json` to
151/// `.gitignore`** to prevent unintentional commits. If an error occurs and
152/// `enabled` is `true`, the file may be partially written; the error is
153/// propagated to the caller.
154///
155/// ```rust,ignore
156/// fn main() {
157///     toolkit_zero::dependency_graph::build::generate_fingerprint()
158///         .expect("fingerprint generation failed");
159///     toolkit_zero::dependency_graph::build::export(cfg!(debug_assertions))
160///         .expect("fingerprint export failed");
161/// }
162/// ```
163pub fn export(enabled: bool) -> Result<(), BuildTimeFingerprintError> {
164    if !enabled { return Ok(()); }
165
166    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
167
168    let fingerprint = build_fingerprint()?;
169    let pretty = serde_json::to_string_pretty(&fingerprint)
170        .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
171
172    fs::write(format!("{manifest_dir}/fingerprint.json"), pretty)?;
173    Ok(())
174}
175
176// ─── core fingerprint builder (shared by generate_fingerprint + export) ────────
177
178fn build_fingerprint() -> Result<Value, BuildTimeFingerprintError> {
179    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
180
181    // ── 1. Package identity ───────────────────────────────────────────────────
182    let pkg_name    = env::var("CARGO_PKG_NAME").unwrap_or_default();
183    let pkg_version = env::var("CARGO_PKG_VERSION").unwrap_or_default();
184
185    // ── 2. Build environment ──────────────────────────────────────────────────
186    let profile   = env::var("PROFILE").unwrap_or_default();
187    let opt_level = env::var("OPT_LEVEL").unwrap_or_default();
188    let target    = env::var("TARGET").unwrap_or_default();
189
190    // Collect all active features (CARGO_FEATURE_<NAME> → "feature-name")
191    let mut features: Vec<String> = env::vars()
192        .filter_map(|(k, _)| {
193            k.strip_prefix("CARGO_FEATURE_")
194                .map(|feat| feat.to_lowercase().replace('_', "-"))
195        })
196        .collect();
197    features.sort_unstable();
198
199    // ── 3. rustc version ──────────────────────────────────────────────────────
200    let rustc_version = Command::new("rustc")
201        .arg("--version")
202        .output()
203        .ok()
204        .and_then(|o| String::from_utf8(o.stdout).ok())
205        .map(|s| s.trim().to_owned())
206        .unwrap_or_else(|| "unknown".to_owned());
207
208    // ── 4. Normalised cargo metadata ──────────────────────────────────────────
209    let cargo_bin = env::var("CARGO").unwrap_or_else(|_| "cargo".to_owned());
210
211    let meta_out = Command::new(&cargo_bin)
212        .args([
213            "metadata",
214            "--format-version=1",
215            "--manifest-path",
216            &format!("{manifest_dir}/Cargo.toml"),
217        ])
218        .output()
219        .map_err(|e| BuildTimeFingerprintError::CargoMetadataFailed(e.to_string()))?;
220
221    if !meta_out.status.success() {
222        let err = String::from_utf8_lossy(&meta_out.stderr).to_string();
223        return Err(BuildTimeFingerprintError::CargoMetadataFailed(err));
224    }
225
226    let meta_str = String::from_utf8(meta_out.stdout)
227        .map_err(|_| BuildTimeFingerprintError::CargoMetadataNotUtf8)?;
228
229    let meta_raw: Value = serde_json::from_str(&meta_str)
230        .map_err(|e| BuildTimeFingerprintError::CargoMetadataInvalidJson(e.to_string()))?;
231
232    let meta_clean      = strip_absolute_paths(meta_raw);
233    let meta_normalised = normalise_json(meta_clean);
234
235    // ── 5. Cargo.lock SHA-256 ─────────────────────────────────────────────────
236    let lock_path = format!("{manifest_dir}/Cargo.lock");
237    if !Path::new(&lock_path).exists() {
238        return Err(BuildTimeFingerprintError::CargoLockNotFound(lock_path));
239    }
240    let lock_raw = fs::read(&lock_path)?;
241    let lock_stripped: Vec<u8> = lock_raw
242        .split(|&b| b == b'\n')
243        .filter(|line| !line.starts_with(b"#"))
244        .flat_map(|line| line.iter().chain(std::iter::once(&b'\n')))
245        .copied()
246        .collect();
247    let lock_sha256 = hex_sha256(&lock_stripped);
248
249    // ── 6. Source file hashes ─────────────────────────────────────────────────
250    let src_dir = format!("{manifest_dir}/src");
251    let source_hashes = hash_source_tree(&src_dir, &manifest_dir)?;
252
253    // ── 7. Assemble & normalise ───────────────────────────────────────────────
254    let fingerprint = json!({
255        "package": {
256            "name":    pkg_name,
257            "version": pkg_version,
258        },
259        "build": {
260            "features":      features,
261            "opt_level":     opt_level,
262            "profile":       profile,
263            "rustc_version": rustc_version,
264            "target":        target,
265        },
266        "cargo_lock_sha256": lock_sha256,
267        "deps":   meta_normalised,
268        "source": source_hashes,
269    });
270
271    Ok(normalise_json(fingerprint))
272}
273
274// ─── JSON normalisation ───────────────────────────────────────────────────────
275
276/// Recursively normalise a [`Value`]:
277///
278/// * **Objects** — keys are sorted alphabetically (serde_json's default `Map`
279///   is `BTreeMap`-backed, so collecting into it sorts automatically).
280/// * **Arrays** — items are recursively normalised *and* reordered by a stable
281///   derived key so that cargo-version-dependent ordering differences vanish.
282/// * **Primitives** — unchanged.
283fn normalise_json(value: Value) -> Value {
284    match value {
285        Value::Object(map) => {
286            // BTreeMap-backed Map: inserting via collect() automatically sorts keys.
287            let sorted: Map<String, Value> = map
288                .into_iter()
289                .map(|(k, v)| (k, normalise_json(v)))
290                .collect();
291            Value::Object(sorted)
292        }
293        Value::Array(arr) => {
294            let mut items: Vec<Value> = arr.into_iter().map(normalise_json).collect();
295            items.sort_by(|a, b| array_sort_key(a).cmp(&array_sort_key(b)));
296            Value::Array(items)
297        }
298        other => other,
299    }
300}
301
302/// Derive a stable sort key for an element inside a JSON array.
303///
304/// Preference order:
305/// 1. `"id"` field (cargo package IDs are globally unique and stable)
306/// 2. `"name"` + `"version"` concatenated
307/// 3. Compact JSON serialisation as a last resort
308fn array_sort_key(v: &Value) -> String {
309    if let Some(obj) = v.as_object() {
310        if let Some(id) = obj.get("id").and_then(|v| v.as_str()) {
311            return id.to_owned();
312        }
313        let name = obj.get("name").and_then(|v| v.as_str()).unwrap_or("");
314        let ver  = obj.get("version").and_then(|v| v.as_str()).unwrap_or("");
315        if !name.is_empty() {
316            return format!("{name}@{ver}");
317        }
318    }
319    serde_json::to_string(v).unwrap_or_default()
320}
321
322/// Remove fields that carry absolute or machine-specific paths from the
323/// `cargo metadata` JSON so the digest is stable across different machines
324/// and checkout locations.
325///
326/// Removed fields (all carry machine-specific absolute paths):
327/// * `workspace_root` — absolute path to workspace checkout
328/// * `target_directory` / `build_directory` — absolute path to `target/`
329/// * `manifest_path` — per-package absolute `Cargo.toml` path
330/// * `src_path` — per-target absolute source file path
331/// * `workspace_members` / `workspace_default_members` — IDs with `file://` paths
332fn strip_absolute_paths(value: Value) -> Value {
333    match value {
334        Value::Object(mut map) => {
335            for key in &[
336                "workspace_root",
337                "workspace_members",
338                "workspace_default_members",
339                "target_directory",
340                "build_directory",
341                "manifest_path",
342                "src_path",
343                "path",
344            ] {
345                map.remove(*key);
346            }
347            Value::Object(
348                map.into_iter()
349                    .map(|(k, v)| (k, strip_absolute_paths(v)))
350                    .collect(),
351            )
352        }
353        Value::Array(arr) => {
354            Value::Array(arr.into_iter().map(strip_absolute_paths).collect())
355        }
356        other => other,
357    }
358}
359
360// ─── hashing helpers ─────────────────────────────────────────────────────────
361
362/// SHA-256 of `data`, returned as a lowercase hex string.
363fn hex_sha256(data: &[u8]) -> String {
364    let mut h = Sha256::new();
365    h.update(data);
366    format!("{:x}", h.finalize())
367}
368
369/// Walk `src_dir` recursively, hash every `.rs` file, and return a
370/// `BTreeMap<relative_path, "sha256:<hex>">`.
371///
372/// Paths are relative to `manifest_dir` and always use `/` as the separator.
373fn hash_source_tree(
374    src_dir:      &str,
375    manifest_dir: &str,
376) -> Result<BTreeMap<String, String>, BuildTimeFingerprintError> {
377    let mut map = BTreeMap::new();
378    visit_rs_files(Path::new(src_dir), Path::new(manifest_dir), &mut map)?;
379    Ok(map)
380}
381
382fn visit_rs_files(
383    dir:  &Path,
384    base: &Path,
385    map:  &mut BTreeMap<String, String>,
386) -> Result<(), BuildTimeFingerprintError> {
387    if !dir.exists() {
388        return Ok(());
389    }
390    let mut entries: Vec<_> = fs::read_dir(dir)?.collect::<Result<_, _>>()?;
391    // Sort for determinism across file-systems that don't guarantee readdir order.
392    entries.sort_by_key(|e| e.path());
393
394    for entry in entries {
395        let path = entry.path();
396        if path.is_dir() {
397            visit_rs_files(&path, base, map)?;
398        } else if path.extension().and_then(|e| e.to_str()) == Some("rs") {
399            let rel = path
400                .strip_prefix(base)
401                .unwrap_or(&path)
402                .to_string_lossy()
403                .replace('\\', "/");
404            let contents = fs::read(&path)?;
405            map.insert(rel, format!("sha256:{}", hex_sha256(&contents)));
406        }
407    }
408    Ok(())
409}