toolkit_zero/dependency-graph/build.rs
1//! Build-time fingerprint generator for use in a downstream `build.rs`.
2//!
3//! Produces **`fingerprint.json`** in `$OUT_DIR`: a compact, normalised,
4//! deterministically sorted JSON document capturing a stable snapshot of the
5//! build environment.
6//!
7//! ## Functions
8//!
9//! | Function | Description |
10//! |---|---|
11//! | [`generate_fingerprint`] | Always call this. Writes compact `fingerprint.json` to `$OUT_DIR` and emits `cargo:rerun-if-changed` directives. |
12//! | [`export`] | Optional. Writes a pretty-printed `fingerprint.json` alongside `Cargo.toml` for local inspection. Pass `false` or condition on `cfg!(debug_assertions)` to suppress in release builds. |
13//!
14//! ## Sections captured
15//!
16//! | Section | Contents |
17//! |---|---|
18//! | `package` | Crate name and version |
19//! | `build` | Profile, opt-level, target triple, rustc version, and active feature flags |
20//! | `deps` | Full normalised `cargo metadata` dependency graph (sorted, no absolute paths) |
21//! | `cargo_lock_sha256` | SHA-256 of `Cargo.lock` (comment lines stripped) |
22//! | `source` | SHA-256 of every `.rs` file under `src/` |
23//!
24//! ## Usage
25//!
26//! In the downstream crate's `build.rs`:
27//!
28//! ```rust,ignore
29//! fn main() {
30//! toolkit_zero::dependency_graph::build::generate_fingerprint()
31//! .expect("fingerprint generation failed");
32//! // optional — pretty-print alongside Cargo.toml for local inspection
33//! toolkit_zero::dependency_graph::build::export(cfg!(debug_assertions))
34//! .expect("fingerprint export failed");
35//! }
36//! ```
37//!
38//! Embed the fingerprint in the binary:
39//!
40//! ```rust,ignore
41//! const BUILD_TIME_FINGERPRINT: &str = include_str!(concat!(env!("OUT_DIR"), "/fingerprint.json"));
42//! ```
43//!
44//! ## Concerns
45//!
46//! * **Not tamper-proof** — the fingerprint resides as plain text in the binary's
47//! read-only data section. It is informational in nature; it does not constitute
48//! a security boundary.
49//! * **Export file** — `export(true)` writes `fingerprint.json` to the crate root.
50//! Add it to `.gitignore` to prevent unintentional commits.
51//! * **Build-time overhead** — `cargo metadata` is executed on every rebuild.
52//! The `cargo:rerun-if-changed` directives restrict this to changes in `src/`,
53//! `Cargo.toml`, or `Cargo.lock`.
54//! * **Path stripping** — absolute paths (`workspace_root`, `manifest_path`,
55//! `src_path`, `path`, and others) are removed from `cargo metadata` output
56//! to ensure the fingerprint is stable across machines and checkout locations.
57//! * **Feature scope** — `build.features` captures the active features of the
58//! crate being built, not toolkit-zero's own features.
59//! * **Compile-time only** — the snapshot does not update at runtime.
60
61use std::{collections::BTreeMap, env, fs, path::Path, process::Command};
62
63use serde_json::{json, Map, Value};
64use sha2::{Digest, Sha256};
65
66// ─── public error ────────────────────────────────────────────────────────────
67
68/// Errors that can occur while generating `fingerprint.json`.
69#[derive(Debug)]
70pub enum BuildTimeFingerprintError {
71 /// `cargo metadata` process failed or returned non-zero.
72 CargoMetadataFailed(String),
73 /// `cargo metadata` stdout was not valid UTF-8.
74 CargoMetadataNotUtf8,
75 /// `cargo metadata` stdout could not be parsed as JSON.
76 CargoMetadataInvalidJson(String),
77 /// `Cargo.lock` was not found at the expected path.
78 CargoLockNotFound(String),
79 /// A filesystem operation failed.
80 IoError(std::io::Error),
81 /// The final JSON could not be serialised.
82 SerializationFailed(String),
83}
84
85impl std::fmt::Display for BuildTimeFingerprintError {
86 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87 match self {
88 Self::CargoMetadataFailed(e) => write!(f, "cargo metadata failed: {e}"),
89 Self::CargoMetadataNotUtf8 => write!(f, "cargo metadata output is not valid UTF-8"),
90 Self::CargoMetadataInvalidJson(e) => write!(f, "cargo metadata output is invalid JSON: {e}"),
91 Self::CargoLockNotFound(p) => write!(f, "Cargo.lock not found at: {p}"),
92 Self::IoError(e) => write!(f, "I/O error: {e}"),
93 Self::SerializationFailed(e) => write!(f, "serialisation failed: {e}"),
94 }
95 }
96}
97
98impl std::error::Error for BuildTimeFingerprintError {}
99
100impl From<std::io::Error> for BuildTimeFingerprintError {
101 fn from(e: std::io::Error) -> Self { Self::IoError(e) }
102}
103
104// ─── public entry point ──────────────────────────────────────────────────────
105
106/// Generate `fingerprint.json` in `$OUT_DIR`.
107///
108/// All inputs are read from the environment variables that Cargo sets for
109/// `build.rs` scripts. The necessary `cargo:rerun-if-changed` directives are
110/// emitted automatically; no additional boilerplate is required in the
111/// calling `build.rs`.
112///
113/// To obtain a pretty-printed copy alongside `Cargo.toml` for local
114/// inspection, also call [`export`]`(true)`.
115///
116/// ```rust,ignore
117/// fn main() {
118/// toolkit_zero::dependency_graph::build::generate_fingerprint()
119/// .expect("fingerprint generation failed");
120/// }
121/// ```
122pub fn generate_fingerprint() -> Result<(), BuildTimeFingerprintError> {
123 // Emit rerun directives — cargo reads these from build script stdout
124 // regardless of which function in the call stack prints them.
125 println!("cargo:rerun-if-changed=src");
126 println!("cargo:rerun-if-changed=Cargo.toml");
127 println!("cargo:rerun-if-changed=Cargo.lock");
128
129 let out_dir = env::var("OUT_DIR").unwrap_or_default();
130
131 let fingerprint = build_fingerprint()?;
132 let compact = serde_json::to_string(&fingerprint)
133 .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
134
135 fs::write(format!("{out_dir}/fingerprint.json"), compact)?;
136 Ok(())
137}
138
139/// Write a pretty-printed `fingerprint.json` alongside the crate's `Cargo.toml`
140/// when `enabled` is `true`.
141///
142/// This file is intended for **local inspection only**. It is distinct from
143/// the compact `fingerprint.json` written to `$OUT_DIR`; the binary always
144/// embeds the `$OUT_DIR` copy. Pass `false`, or condition the call on
145/// `cfg!(debug_assertions)`, to suppress the file in release builds.
146///
147/// # Concerns
148///
149/// The exported file contains the full dependency graph, per-file source
150/// hashes, target triple, and compiler version. **Add `fingerprint.json` to
151/// `.gitignore`** to prevent unintentional commits. If an error occurs and
152/// `enabled` is `true`, the file may be partially written; the error is
153/// propagated to the caller.
154///
155/// ```rust,ignore
156/// fn main() {
157/// toolkit_zero::dependency_graph::build::generate_fingerprint()
158/// .expect("fingerprint generation failed");
159/// toolkit_zero::dependency_graph::build::export(cfg!(debug_assertions))
160/// .expect("fingerprint export failed");
161/// }
162/// ```
163pub fn export(enabled: bool) -> Result<(), BuildTimeFingerprintError> {
164 if !enabled { return Ok(()); }
165
166 let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
167
168 let fingerprint = build_fingerprint()?;
169 let pretty = serde_json::to_string_pretty(&fingerprint)
170 .map_err(|e| BuildTimeFingerprintError::SerializationFailed(e.to_string()))?;
171
172 fs::write(format!("{manifest_dir}/fingerprint.json"), pretty)?;
173 Ok(())
174}
175
176// ─── core fingerprint builder (shared by generate_fingerprint + export) ────────
177
178fn build_fingerprint() -> Result<Value, BuildTimeFingerprintError> {
179 let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
180
181 // ── 1. Package identity ───────────────────────────────────────────────────
182 let pkg_name = env::var("CARGO_PKG_NAME").unwrap_or_default();
183 let pkg_version = env::var("CARGO_PKG_VERSION").unwrap_or_default();
184
185 // ── 2. Build environment ──────────────────────────────────────────────────
186 let profile = env::var("PROFILE").unwrap_or_default();
187 let opt_level = env::var("OPT_LEVEL").unwrap_or_default();
188 let target = env::var("TARGET").unwrap_or_default();
189
190 // Collect all active features (CARGO_FEATURE_<NAME> → "feature-name")
191 let mut features: Vec<String> = env::vars()
192 .filter_map(|(k, _)| {
193 k.strip_prefix("CARGO_FEATURE_")
194 .map(|feat| feat.to_lowercase().replace('_', "-"))
195 })
196 .collect();
197 features.sort_unstable();
198
199 // ── 3. rustc version ──────────────────────────────────────────────────────
200 let rustc_version = Command::new("rustc")
201 .arg("--version")
202 .output()
203 .ok()
204 .and_then(|o| String::from_utf8(o.stdout).ok())
205 .map(|s| s.trim().to_owned())
206 .unwrap_or_else(|| "unknown".to_owned());
207
208 // ── 4. Normalised cargo metadata ──────────────────────────────────────────
209 let cargo_bin = env::var("CARGO").unwrap_or_else(|_| "cargo".to_owned());
210
211 let meta_out = Command::new(&cargo_bin)
212 .args([
213 "metadata",
214 "--format-version=1",
215 "--manifest-path",
216 &format!("{manifest_dir}/Cargo.toml"),
217 ])
218 .output()
219 .map_err(|e| BuildTimeFingerprintError::CargoMetadataFailed(e.to_string()))?;
220
221 if !meta_out.status.success() {
222 let err = String::from_utf8_lossy(&meta_out.stderr).to_string();
223 return Err(BuildTimeFingerprintError::CargoMetadataFailed(err));
224 }
225
226 let meta_str = String::from_utf8(meta_out.stdout)
227 .map_err(|_| BuildTimeFingerprintError::CargoMetadataNotUtf8)?;
228
229 let meta_raw: Value = serde_json::from_str(&meta_str)
230 .map_err(|e| BuildTimeFingerprintError::CargoMetadataInvalidJson(e.to_string()))?;
231
232 let meta_clean = strip_absolute_paths(meta_raw);
233 let meta_normalised = normalise_json(meta_clean);
234
235 // ── 5. Cargo.lock SHA-256 ─────────────────────────────────────────────────
236 let lock_path = format!("{manifest_dir}/Cargo.lock");
237 if !Path::new(&lock_path).exists() {
238 return Err(BuildTimeFingerprintError::CargoLockNotFound(lock_path));
239 }
240 let lock_raw = fs::read(&lock_path)?;
241 let lock_stripped: Vec<u8> = lock_raw
242 .split(|&b| b == b'\n')
243 .filter(|line| !line.starts_with(b"#"))
244 .flat_map(|line| line.iter().chain(std::iter::once(&b'\n')))
245 .copied()
246 .collect();
247 let lock_sha256 = hex_sha256(&lock_stripped);
248
249 // ── 6. Source file hashes ─────────────────────────────────────────────────
250 let src_dir = format!("{manifest_dir}/src");
251 let source_hashes = hash_source_tree(&src_dir, &manifest_dir)?;
252
253 // ── 7. Assemble & normalise ───────────────────────────────────────────────
254 let fingerprint = json!({
255 "package": {
256 "name": pkg_name,
257 "version": pkg_version,
258 },
259 "build": {
260 "features": features,
261 "opt_level": opt_level,
262 "profile": profile,
263 "rustc_version": rustc_version,
264 "target": target,
265 },
266 "cargo_lock_sha256": lock_sha256,
267 "deps": meta_normalised,
268 "source": source_hashes,
269 });
270
271 Ok(normalise_json(fingerprint))
272}
273
274// ─── JSON normalisation ───────────────────────────────────────────────────────
275
276/// Recursively normalise a [`Value`]:
277///
278/// * **Objects** — keys are sorted alphabetically (serde_json's default `Map`
279/// is `BTreeMap`-backed, so collecting into it sorts automatically).
280/// * **Arrays** — items are recursively normalised *and* reordered by a stable
281/// derived key so that cargo-version-dependent ordering differences vanish.
282/// * **Primitives** — unchanged.
283fn normalise_json(value: Value) -> Value {
284 match value {
285 Value::Object(map) => {
286 // BTreeMap-backed Map: inserting via collect() automatically sorts keys.
287 let sorted: Map<String, Value> = map
288 .into_iter()
289 .map(|(k, v)| (k, normalise_json(v)))
290 .collect();
291 Value::Object(sorted)
292 }
293 Value::Array(arr) => {
294 let mut items: Vec<Value> = arr.into_iter().map(normalise_json).collect();
295 items.sort_by(|a, b| array_sort_key(a).cmp(&array_sort_key(b)));
296 Value::Array(items)
297 }
298 other => other,
299 }
300}
301
302/// Derive a stable sort key for an element inside a JSON array.
303///
304/// Preference order:
305/// 1. `"id"` field (cargo package IDs are globally unique and stable)
306/// 2. `"name"` + `"version"` concatenated
307/// 3. Compact JSON serialisation as a last resort
308fn array_sort_key(v: &Value) -> String {
309 if let Some(obj) = v.as_object() {
310 if let Some(id) = obj.get("id").and_then(|v| v.as_str()) {
311 return id.to_owned();
312 }
313 let name = obj.get("name").and_then(|v| v.as_str()).unwrap_or("");
314 let ver = obj.get("version").and_then(|v| v.as_str()).unwrap_or("");
315 if !name.is_empty() {
316 return format!("{name}@{ver}");
317 }
318 }
319 serde_json::to_string(v).unwrap_or_default()
320}
321
322/// Remove fields that carry absolute or machine-specific paths from the
323/// `cargo metadata` JSON so the digest is stable across different machines
324/// and checkout locations.
325///
326/// Removed fields (all carry machine-specific absolute paths):
327/// * `workspace_root` — absolute path to workspace checkout
328/// * `target_directory` / `build_directory` — absolute path to `target/`
329/// * `manifest_path` — per-package absolute `Cargo.toml` path
330/// * `src_path` — per-target absolute source file path
331/// * `workspace_members` / `workspace_default_members` — IDs with `file://` paths
332fn strip_absolute_paths(value: Value) -> Value {
333 match value {
334 Value::Object(mut map) => {
335 for key in &[
336 "workspace_root",
337 "workspace_members",
338 "workspace_default_members",
339 "target_directory",
340 "build_directory",
341 "manifest_path",
342 "src_path",
343 "path",
344 ] {
345 map.remove(*key);
346 }
347 Value::Object(
348 map.into_iter()
349 .map(|(k, v)| (k, strip_absolute_paths(v)))
350 .collect(),
351 )
352 }
353 Value::Array(arr) => {
354 Value::Array(arr.into_iter().map(strip_absolute_paths).collect())
355 }
356 other => other,
357 }
358}
359
360// ─── hashing helpers ─────────────────────────────────────────────────────────
361
362/// SHA-256 of `data`, returned as a lowercase hex string.
363fn hex_sha256(data: &[u8]) -> String {
364 let mut h = Sha256::new();
365 h.update(data);
366 format!("{:x}", h.finalize())
367}
368
369/// Walk `src_dir` recursively, hash every `.rs` file, and return a
370/// `BTreeMap<relative_path, "sha256:<hex>">`.
371///
372/// Paths are relative to `manifest_dir` and always use `/` as the separator.
373fn hash_source_tree(
374 src_dir: &str,
375 manifest_dir: &str,
376) -> Result<BTreeMap<String, String>, BuildTimeFingerprintError> {
377 let mut map = BTreeMap::new();
378 visit_rs_files(Path::new(src_dir), Path::new(manifest_dir), &mut map)?;
379 Ok(map)
380}
381
382fn visit_rs_files(
383 dir: &Path,
384 base: &Path,
385 map: &mut BTreeMap<String, String>,
386) -> Result<(), BuildTimeFingerprintError> {
387 if !dir.exists() {
388 return Ok(());
389 }
390 let mut entries: Vec<_> = fs::read_dir(dir)?.collect::<Result<_, _>>()?;
391 // Sort for determinism across file-systems that don't guarantee readdir order.
392 entries.sort_by_key(|e| e.path());
393
394 for entry in entries {
395 let path = entry.path();
396 if path.is_dir() {
397 visit_rs_files(&path, base, map)?;
398 } else if path.extension().and_then(|e| e.to_str()) == Some("rs") {
399 let rel = path
400 .strip_prefix(base)
401 .unwrap_or(&path)
402 .to_string_lossy()
403 .replace('\\', "/");
404 let contents = fs::read(&path)?;
405 map.insert(rel, format!("sha256:{}", hex_sha256(&contents)));
406 }
407 }
408 Ok(())
409}