ktstr 0.10.0 - Docs.rs

// Generates vmlinux.h from kernel BTF using libbpf's btf_dump API.
// Uses the shared kernel resolver (src/kernel_path.rs) to find the
// BTF source. See resolve_btf() for the full search order.

use std::env;
use std::path::PathBuf;
use std::process::{Command, Stdio};

use libbpf_cargo::SkeletonBuilder;

include!("src/kernel_path.rs");
include!("src/build_helpers.rs");

fn main() {
    let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());

    // Cache invalidation: track the env var that selects a kernel
    // and the build-script inputs (kernel_path resolver, C generator
    // source). Deliberately NOT emitting a `rerun-if-changed` on the
    // BTF source path itself:
    //
    //   1. `vmlinux` is consumed here only as the BTF source for
    //      `vmlinux.h` generation on the C side below, not as an
    //      input that the Rust compiler reads. BPF CO-RE (Compile
    //      Once Run Everywhere) relocates field offsets at LOAD
    //      time against the runtime kernel's BTF, so a field-layout
    //      drift between the compile-time `vmlinux.h` and the
    //      runtime kernel is resolved by libbpf on BPF object load
    //      — there is no compile-time correctness dependency on
    //      the exact byte content of the vmlinux used to generate
    //      `vmlinux.h`.
    //   2. `rerun-if-changed` on the BTF would force build.rs to
    //      re-run on every kernel rebuild. That runs the BPF
    //      skeleton generator unnecessarily when the drift (per
    //      (1)) has no compile-time correctness impact.
    //
    // However, WHEN build.rs does run (triggered by a watched
    // input — KTSTR_KERNEL change, kernel_path.rs edit, or a
    // previously-absent `vmlinux.h`), it SHOULD detect a BTF
    // content change and regenerate. The pre-hash design only
    // regenerated when `vmlinux.h` was absent entirely, which
    // meant a BTF-content change paired with an unrelated build-
    // script trigger would leave stale `vmlinux.h` in place. A
    // SipHasher13 hash of the BTF bytes is written alongside
    // `vmlinux.h` as `vmlinux.btf.hash`; regen fires when the
    // file is absent OR the stored hash differs from the current
    // BTF's hash. Operators who need to force regen unconditionally
    // still have `cargo clean` as the escape hatch. The algorithm
    // mirrors `src/test_support/sidecar.rs::sidecar_variant_hash`
    // so the project uses a single stable hash family.
    println!("cargo:rerun-if-env-changed=KTSTR_KERNEL");
    println!("cargo:rerun-if-changed=src/kernel_path.rs");
    println!("cargo:rerun-if-changed=src/bpf/vmlinux_gen.c");
    let ktstr_kernel = env::var("KTSTR_KERNEL").ok();

    // Generate vmlinux.h from kernel BTF.
    let vmlinux_h = out_dir.join("vmlinux.h");
    let hash_path = out_dir.join("vmlinux.btf.hash");
    // Resolve BTF + compute content hash eagerly. `resolve_btf`
    // returns `Option` to degrade cleanly when no BTF is reachable
    // (no KTSTR_KERNEL + no host BTF): if `vmlinux.h` is already in
    // place from an earlier build, we keep it rather than panicking
    // — matches the CO-RE design (runtime BTF fixes field drift
    // anyway), so a disappearing source is not a build-blocking
    // event. A MISSING `vmlinux.h` still panics below because we
    // have nothing to fall back on.
    let current_btf = resolve_btf(ktstr_kernel.as_deref());
    // Hash the BTF source for drift detection. Fault-tolerant: a
    // BTF path that resolved but whose bytes cannot be read (EACCES,
    // or a race where the file vanished between resolve and read)
    // downgrades to `None` instead of panicking, so we fall back to
    // the existence-only gate for `vmlinux.h`. The eventual regen
    // path below re-reads the bytes via `vmlinux_gen` and fails
    // loudly there if the source is truly unusable.
    let current_hash: Option<String> = current_btf.as_ref().and_then(|p| match std::fs::read(p) {
        Ok(bytes) => Some(format!("{:016x}", siphash_13(&bytes))),
        Err(e) => {
            println!(
                "cargo:warning=BTF source {} present but unreadable \
                     ({e}); skipping hash check, reusing existing vmlinux.h",
                p.display(),
            );
            None
        }
    });
    let stored_hash: Option<String> = std::fs::read_to_string(&hash_path)
        .ok()
        .map(|s| s.trim().to_string());
    // Regen fires on any of three conditions:
    //   - `vmlinux.h` is absent (first build or post-`cargo clean`);
    //   - the stored hash is absent but we have a current hash (the
    //     vmlinux.h was generated by an older build.rs that didn't
    //     track hashes — upgrade in place);
    //   - current and stored hashes differ (real drift).
    // An unreadable BTF with vmlinux.h already in place falls
    // through to "no regen" per `current_hash.is_none()`.
    let should_regen =
        !vmlinux_h.exists() || (current_hash.is_some() && current_hash != stored_hash);
    if should_regen {
        let btf_source = current_btf.unwrap_or_else(|| {
            panic!(
                "no BTF source found. Set KTSTR_KERNEL to a kernel build \
                 directory, or ensure /sys/kernel/btf/vmlinux exists."
            );
        });
        println!("generating vmlinux.h from {}", btf_source.display());

        // libbpf-sys (links = "bpf") emits installed headers at
        // DEP_BPF_INCLUDE with bpf/ prefix (bpf/btf.h, bpf/libbpf.h).
        let libbpf_include =
            PathBuf::from(env::var("DEP_BPF_INCLUDE").expect("DEP_BPF_INCLUDE not set"));

        // Compile the C vmlinux generator + driver into a standalone binary.
        let vmlinux_gen_bin = out_dir.join("vmlinux_gen");
        let driver_src = out_dir.join("vmlinux_gen_main.c");
        std::fs::write(
            &driver_src,
            format!(
                r#"
extern int generate_vmlinux_h(const char *, const char *);
int main(void) {{
    return generate_vmlinux_h("{btf}", "{out}") == 0 ? 0 : 1;
}}
"#,
                btf = btf_source.display(),
                out = vmlinux_h.display(),
            ),
        )
        .expect("write driver source");

        // libbpf-sys with vendored feature installs static libraries
        // (libbpf.a, libelf.a, libz.a) in the parent of DEP_BPF_INCLUDE.
        let libbpf_lib_dir = libbpf_include.parent().unwrap();

        let compiler = cc::Build::new().get_compiler();
        let status = Command::new(compiler.path())
            .args([
                "src/bpf/vmlinux_gen.c",
                driver_src.to_str().unwrap(),
                "-o",
                vmlinux_gen_bin.to_str().unwrap(),
                &format!("-I{}", libbpf_include.display()),
                &format!("-L{}", libbpf_lib_dir.display()),
                "-lbpf",
                "-lelf",
                "-lz",
            ])
            .status()
            .expect("compile vmlinux_gen");
        assert!(status.success(), "failed to compile vmlinux_gen");

        let status = Command::new(&vmlinux_gen_bin)
            .status()
            .expect("run vmlinux_gen");
        assert!(
            status.success(),
            "vmlinux_gen failed — check BTF source: {}",
            btf_source.display()
        );

        // Record the BTF content hash alongside `vmlinux.h`. A
        // future build.rs invocation reads this file and compares
        // against the freshly-hashed BTF; a mismatch triggers
        // regeneration above.
        //
        // Normally `current_hash` was populated at the top of
        // `main`. The one path that leaves it `None` while still
        // reaching this regen branch is: `!vmlinux_h.exists()` AND
        // `std::fs::read(&btf_source)` failed during the eager hash
        // attempt. In that case, the generator above successfully
        // invoked `vmlinux_gen` against `btf_source`, which means
        // libbpf could read it — the earlier read failure was
        // transient or the generator accessed the file via a path
        // libbpf handles differently (e.g. sysfs BTF). Re-read and
        // hash here so the sidecar is always populated alongside a
        // successful regen; on a second-read failure, skip the
        // sidecar (the generator already succeeded — the build is
        // in a good state; a missing sidecar forces the next
        // build.rs run to regenerate conservatively, which is
        // correct).
        let hash_opt: Option<String> = match current_hash.as_deref() {
            Some(h) => Some(h.to_string()),
            None => match std::fs::read(&btf_source) {
                Ok(bytes) => Some(format!("{:016x}", siphash_13(&bytes))),
                Err(e) => {
                    println!(
                        "cargo:warning=post-regen BTF re-read failed ({e}); \
                         skipping hash sidecar — next build.rs run will \
                         regenerate conservatively"
                    );
                    None
                }
            },
        };
        if let Some(hash) = hash_opt {
            // Trailing newline so `cat` / editor-open produces a
            // clean single-line display. The reader at the top of
            // main uses `.trim()` on the stored value, so the
            // newline round-trips.
            std::fs::write(&hash_path, format!("{hash}\n"))
                .unwrap_or_else(|e| panic!("write BTF hash sidecar {}: {e}", hash_path.display()));
        }
    }

    // arm64 bpf_tracing.h casts pt_regs through struct user_pt_regs,
    // a UAPI type that kernel BTF may omit. Append it if absent so
    // PT_REGS_PARMn_CORE compiles on arm64 hosts.
    if cfg!(target_arch = "aarch64") {
        let content = std::fs::read_to_string(&vmlinux_h).expect("read vmlinux.h");
        if !content.contains("struct user_pt_regs {") {
            use std::io::Write;
            let mut f = std::fs::OpenOptions::new()
                .append(true)
                .open(&vmlinux_h)
                .expect("open vmlinux.h for append");
            writeln!(
                f,
                "\n/* Added by build.rs: arm64 UAPI type needed by bpf_tracing.h */\n\
                 struct user_pt_regs {{\n\
                 \t__u64 regs[31];\n\
                 \t__u64 sp;\n\
                 \t__u64 pc;\n\
                 \t__u64 pstate;\n\
                 }};\n"
            )
            .expect("append user_pt_regs to vmlinux.h");
        }
    }

    let clang_args = [
        format!("-I{}", out_dir.display()),
        format!("-I{}", "src/bpf"),
    ];

    // Build the kprobe BPF skeleton.
    let skel_path = out_dir.join("probe_skel.rs");
    SkeletonBuilder::new()
        .source("src/bpf/probe.bpf.c")
        .obj(out_dir.join("probe.o"))
        .clang_args(clang_args.clone())
        .reference_obj(true)
        .build_and_generate(&skel_path)
        .expect("build probe BPF skeleton");

    // Build the fentry BPF skeleton (separate for independent loading).
    let fentry_skel_path = out_dir.join("fentry_probe_skel.rs");
    SkeletonBuilder::new()
        .source("src/bpf/fentry_probe.bpf.c")
        .obj(out_dir.join("fentry_probe.o"))
        .clang_args(clang_args)
        .reference_obj(true)
        .build_and_generate(&fentry_skel_path)
        .expect("build fentry probe BPF skeleton");

    println!("cargo::rerun-if-changed=src/bpf/probe.bpf.c");
    println!("cargo::rerun-if-changed=src/bpf/fentry_probe.bpf.c");
    println!("cargo::rerun-if-changed=src/bpf/intf.h");

    // Generate ALL_SHIFTS registry from src/budget.rs so the
    // budget-feature tests can assert exhaustive classification
    // coverage. Scans `const NAME_SHIFT: u32 = N;` declarations and
    // emits a `pub(crate) const ALL_SHIFTS: &[(u32, &str)]` slice
    // into OUT_DIR. The test in budget.rs takes the union of its
    // one-bit and multi-bit shift enumerations and asserts equality
    // with this slice — a new SHIFT constant added without updating
    // either enumeration fails the union check.
    generate_shift_registry(&out_dir);

    // Fingerprint the cast-analysis source so the on-disk cast cache
    // (src/vmm/cast_analysis_load/persist.rs) self-invalidates whenever
    // the analyzer changes — with no manual SCHEMA_VERSION bump. Without
    // this, an analyzer-behavior change reuses a stale cached result and
    // masks a just-fixed analyzer bug as a flake (the 2026-06-01
    // arena_confirmed-drop bug hid this way for hours). The fn emits
    // `rerun-if-changed` for the watched dirs so cargo recomputes the env
    // when the analyzer source changes.
    println!(
        "cargo:rustc-env=KTSTR_CAST_ANALYZER_FINGERPRINT={:016x}",
        cast_analyzer_fingerprint()
    );

    // Fingerprint the whole Cargo.lock so the cast-analysis cache
    // self-invalidates on any dependency bump: persist::cache_path folds
    // this into the cache key. A btf-rs (BTF parsing) or libbpf-rs /
    // libbpf-sys (BPF-opcode constants) version change can alter the cast
    // map with no ktstr source change, so the analyzer-source fingerprint
    // alone would serve a stale result. Only the cast cache folds this in;
    // kernels / models / disk_template are dependency-independent.
    println!(
        "cargo:rustc-env=KTSTR_CARGO_LOCK_FINGERPRINT={:016x}",
        cargo_lock_fingerprint()
    );

    // Build busybox from source for guest shell mode.
    //
    // Hermeticity contract:
    //
    //  - The tarball is fetched ONCE per OUT_DIR and cached at
    //    `$OUT_DIR/busybox`. `cargo clean` forces a re-fetch.
    //  - The fetched bytes are SHA-256-verified against
    //    [`BUSYBOX_TARBALL_SHA256`] before extraction. A mismatch
    //    panics with the actual vs expected hash so the operator
    //    can decide between "the upstream changed (regenerate the
    //    pin)" and "the download was tampered (investigate)".
    //  - `KTSTR_BUSYBOX_TARBALL=<path>` points the build at a
    //    pre-fetched local tarball — for air-gapped CI runners and
    //    hermetic CI caches. The SHA pin still applies; the local
    //    path is a transport substitute, not a verification bypass.
    //  - `KTSTR_SKIP_BUSYBOX_BUILD=1` writes a 0-byte placeholder at
    //    `$OUT_DIR/busybox` and skips the compile entirely. Shell
    //    mode is unavailable in the resulting binary;
    //    `cargo_ktstr::blobs::install_env` detects the empty blob
    //    and leaves `KTSTR_BUSYBOX_PATH` unset so consumers fail
    //    with a clear "shell mode unavailable" rather than an
    //    opaque "exec format error" on the 0-byte file. Mirrors
    //    the existing `KTSTR_SKIP_WPROF_BUILD` escape hatch below.
    //
    // The pre-pin git-clone fallback was removed alongside this
    // refactor: a clone bypasses the SHA gate (no tarball to
    // verify), and `KTSTR_BUSYBOX_TARBALL` covers the
    // tarball-fetch-failed case more cleanly.
    let busybox_bin = out_dir.join("busybox");
    println!("cargo:rerun-if-env-changed=KTSTR_SKIP_BUSYBOX_BUILD");
    println!("cargo:rerun-if-env-changed=KTSTR_BUSYBOX_TARBALL");
    let skip_busybox = std::env::var("KTSTR_SKIP_BUSYBOX_BUILD")
        .ok()
        .filter(|v| !v.is_empty())
        .is_some();
    if skip_busybox {
        println!(
            "cargo:warning=KTSTR_SKIP_BUSYBOX_BUILD set — writing 0-byte \
             $OUT_DIR/busybox placeholder; shell mode will be unavailable \
             in the resulting cargo-ktstr binary"
        );
        if !busybox_bin.exists() {
            std::fs::write(&busybox_bin, b"").unwrap_or_else(|e| {
                panic!(
                    "write 0-byte busybox placeholder {}: {e}",
                    busybox_bin.display()
                )
            });
        }
    } else if !busybox_bin.exists() {
        println!("cargo:warning=compiling busybox (first build only)...");

        // Check required tools before attempting build.
        if Command::new("make").arg("--version").output().is_err() {
            panic!(
                "busybox build requires 'make' — install build-essential \
                 (Debian/Ubuntu) or base-devel (Fedora/Arch)"
            );
        }
        if Command::new("gcc").arg("--version").output().is_err() {
            panic!(
                "busybox build requires 'gcc' — install build-essential \
                 (Debian/Ubuntu) or base-devel (Fedora/Arch)"
            );
        }

        let busybox_src = out_dir.join("busybox-src");

        // Recover from interrupted download: if the directory exists but
        // has no Makefile, the previous extraction was incomplete.
        if busybox_src.exists() && !busybox_src.join("Makefile").exists() {
            std::fs::remove_dir_all(&busybox_src).expect("remove incomplete busybox-src");
        }

        // Source the tarball: from a local path when
        // KTSTR_BUSYBOX_TARBALL is set, otherwise from the pinned
        // upstream URL with retry. Either path lands in
        // `tarball_bytes` which is then SHA-verified before any
        // extraction touches the filesystem.
        if !busybox_src.join("Makefile").exists() {
            const TARBALL_URL: &str =
                "https://github.com/mirror/busybox/archive/refs/tags/1_36_1.tar.gz";
            let tarball_bytes = match std::env::var("KTSTR_BUSYBOX_TARBALL")
                .ok()
                .filter(|v| !v.is_empty())
            {
                Some(local) => {
                    println!(
                        "cargo:warning=KTSTR_BUSYBOX_TARBALL set — reading {local} \
                         instead of fetching from {TARBALL_URL}"
                    );
                    std::fs::read(&local).unwrap_or_else(|e| {
                        panic!(
                            "read KTSTR_BUSYBOX_TARBALL={local}: {e} — the env \
                             var must point at a readable tarball matching the \
                             pinned SHA-256"
                        )
                    })
                }
                None => fetch_busybox_tarball(TARBALL_URL),
            };

            verify_busybox_tarball_sha256(&tarball_bytes);

            // Extract verified bytes into busybox-src/.
            let extract_dir = out_dir.join("busybox-extract");
            if extract_dir.exists() {
                let _ = std::fs::remove_dir_all(&extract_dir);
            }
            let gz = flate2::read::GzDecoder::new(std::io::Cursor::new(&tarball_bytes[..]));
            let mut archive = tar::Archive::new(gz);
            archive
                .unpack(&extract_dir)
                .unwrap_or_else(|e| panic!("extract busybox tarball: {e}"));
            let inner = extract_dir.join("busybox-1_36_1");
            std::fs::rename(&inner, &busybox_src).unwrap_or_else(|e| {
                panic!(
                    "expected extracted directory {} — tarball layout may have changed: {e}",
                    inner.display()
                )
            });
            std::fs::remove_dir_all(&extract_dir).ok();
        }

        // Configure busybox.
        let status = Command::new("make")
            .arg("defconfig")
            .current_dir(&busybox_src)
            .stdout(Stdio::inherit())
            .stderr(Stdio::inherit())
            .status()
            .expect("make defconfig");
        assert!(status.success(), "busybox make defconfig failed");

        // Enable static linking, disable CONFIG_TC (requires iproute2 headers).
        let config_path = busybox_src.join(".config");
        let config = std::fs::read_to_string(&config_path).expect("read busybox .config");
        let config = config
            .replace("# CONFIG_STATIC is not set", "CONFIG_STATIC=y")
            .replace("CONFIG_TC=y", "# CONFIG_TC is not set");
        std::fs::write(&config_path, config).expect("write patched busybox .config");

        // Resolve patched config non-interactively. Busybox's Kbuild
        // lacks olddefconfig; pipe empty input to oldconfig so every
        // NEW prompt accepts its default without blocking on stdin.
        let status = Command::new("make")
            .arg("oldconfig")
            .current_dir(&busybox_src)
            .stdin(Stdio::null())
            .stdout(Stdio::inherit())
            .stderr(Stdio::inherit())
            .status()
            .expect("make oldconfig");
        assert!(status.success(), "busybox make oldconfig failed");

        // Build busybox.  Single-threaded `-j1`: busybox is a pure-C
        // build dominated by gcc invocations that are already
        // parallelisable inside gcc's own job server when invoked
        // from a parallel parent; for a one-shot build out of a
        // build.rs the wall-time difference between `-j1` and
        // `-jN` is small (single-digit seconds on a developer box),
        // and `-j1` keeps the build deterministic + race-free
        // across hosts.
        let status = Command::new("make")
            .arg("-j1")
            .current_dir(&busybox_src)
            .stdout(Stdio::inherit())
            .stderr(Stdio::inherit())
            .status()
            .expect("busybox make");
        assert!(status.success(), "busybox build failed");

        // Copy binary to OUT_DIR.
        std::fs::copy(busybox_src.join("busybox"), &busybox_bin)
            .expect("copy busybox binary to OUT_DIR");
    }

    // wprof build: gated behind the `wprof` cargo feature (default
    // off). When disabled, a 0-byte placeholder at $OUT_DIR/wprof
    // satisfies the `include_bytes!` site in cargo_ktstr/blobs.rs.
    // The KTSTR_SKIP_WPROF_BUILD env var remains as a secondary
    // escape hatch for builds that enable the feature but want to
    // skip the clone/compile (CI caching, cross-compilation, etc.).
    let wprof_bin = out_dir.join("wprof");
    #[cfg(not(feature = "wprof"))]
    if !wprof_bin.exists() {
        std::fs::write(&wprof_bin, b"").unwrap_or_else(|e| {
            panic!(
                "write 0-byte wprof placeholder {}: {e}",
                wprof_bin.display()
            )
        });
    }
    #[cfg(feature = "wprof")]
    {
        println!("cargo:rerun-if-env-changed=KTSTR_SKIP_WPROF_BUILD");
        let skip_wprof = std::env::var("KTSTR_SKIP_WPROF_BUILD")
            .ok()
            .filter(|v| !v.is_empty())
            .is_some();

        if skip_wprof {
            println!(
                "cargo:warning=KTSTR_SKIP_WPROF_BUILD set — writing 0-byte \
             $OUT_DIR/wprof placeholder; do NOT use the resulting \
             cargo-ktstr binary for wprof capture"
            );
            if !wprof_bin.exists() {
                std::fs::write(&wprof_bin, b"").unwrap_or_else(|e| {
                    panic!(
                        "write 0-byte wprof placeholder {}: {e}",
                        wprof_bin.display()
                    )
                });
            }
        } else if !wprof_bin.exists() {
            println!("cargo:warning=cloning + compiling wprof (first build only)...");

            for tool in ["git", "make", "gcc", "clang"] {
                if Command::new(tool).arg("--version").output().is_err() {
                    panic!(
                        "wprof build requires '{tool}' on PATH — install via your \
                     distro's package manager (build-essential / base-devel for \
                     make+gcc; clang for BPF skeleton compile; git for \
                     submodule clone)"
                    );
                }
            }

            // Clone into OUT_DIR like busybox — re-clones on `cargo
            // clean` and stays per-workspace-isolated (matches the
            // shape of the other vendored binary).
            //
            // Cargo's workspace discovery walks UP from any nested
            // Cargo.toml. The wprof Makefile shells out to cargo at
            // exactly TWO sub-crates per `wprof-src/src/Makefile`:
            // L125 `cd $(LIBBLAZESYM_SRC) && $(CARGO) build` and L133
            // `cd $(LIBDEMANGLE_SRC) && $(CARGO) build`. blazesym IS
            // self-contained (its own `[workspace]` + `[workspace.lints]`
            // at the root of `wprof-src/blazesym/Cargo.toml`) so cargo's
            // walk terminates there naturally — no patch needed.
            // demangle (`wprof-src/src/demangle/Cargo.toml`) has neither
            // `[workspace]` nor lints inheritance, so the upward walk
            // would reach ktstr-root's `[workspace]` via target/ —
            // failing with "current package believes it's in a workspace
            // when it's not." The sentinel patch at L656+ appends an
            // empty `[workspace]` table to demangle's manifest, which
            // breaks the walk without losing any inheritance (demangle
            // has no `[lints] workspace = true`).
            //
            // vmlinux.h/ also has a Cargo.toml but the Makefile
            // references vmlinux.h ONLY as a header source
            // (`VMLINUX := ../vmlinux.h/include/$(ARCH)/vmlinux.h` at
            // L41), never via `cd vmlinux.h && cargo X`. If a future
            // wprof Makefile change adds such an invocation, vmlinux.h's
            // Cargo.toml will need the same sentinel patch.
            //
            // Tradeoff acknowledged: `cargo clean && cargo build`
            // re-clones the FULL wprof tree (~590MB working tree of
            // which ~20MB is .git after `--depth=1 --shallow-submodules`)
            // — measured 60+ seconds wall time on slow CI links.
            // Within a single cargo invocation, build.rs runs ONCE per
            // (package, profile, feature-combo) thanks to cargo's
            // build-script dedup, so multi-target builds against the
            // same ktstr package amortise the clone. Across different
            // cargo invocations (e.g. dev iteration switching between
            // debug and release), each invocation does its own clone.
            // The cost is acceptable in exchange for: (1) per-workspace
            // isolation — different ktstr checkouts can't accidentally
            // share a stale wprof version (the prior cache tracked
            // upstream HEAD with no pin), (2) `cargo clean` consistency
            // — no out-of-band `~/.cache/ktstr/wprof-src` rm needed,
            // (3) drop of ~70 lines of flock + XDG-resolution
            // infrastructure. Operators who want incremental builds
            // should prefer `cargo build -p ktstr` over `cargo clean`.
            let wprof_src = out_dir.join("wprof-src");
            // .git/HEAD is the strongest single-file signal for clone
            // completeness vs the prior Makefile-only check. A partial
            // `git clone` that fails mid-checkout leaves the working
            // tree empty/incomplete (Makefile possibly absent) but
            // .git/HEAD is created EARLIER, during init. Require BOTH:
            // .git/HEAD (init reached) AND src/Makefile (working tree
            // populated). Failing either means the cache is half-baked
            // and needs to be wiped before re-clone. The
            // `is_wprof_clone_complete` predicate (build_helpers.rs)
            // encodes this rule + has unit-test coverage for each
            // failure case.
            let wprof_makefile = wprof_src.join("src").join("Makefile");
            if wprof_src.exists() && !is_wprof_clone_complete(&wprof_src) {
                std::fs::remove_dir_all(&wprof_src).expect("remove incomplete wprof-src");
            }

            if !wprof_makefile.exists() {
                let git_url = "https://github.com/anakryiko/wprof.git";
                // Recursive clone over flaky networks fails partway
                // through one of the submodules (libbpf, bpftool,
                // blazesym, vmlinux.h, usdt, strobelight-libs) — the
                // shallow `git clone --depth=1 --shallow-submodules`
                // is one-shot; if it errors, the dir is left in an
                // incomplete state. Retry with bounded attempts +
                // exponential backoff via the shared
                // `retry_with_backoff` helper (also used by the
                // busybox tarball download with `MAX_TARBALL_ATTEMPTS
                // = 4`). Both call sites share backoff timing,
                // attempt counting, and log wording.
                //
                // Per-attempt cleanup of partial wprof_src lives
                // INSIDE the closure (see L545+).
                println!(
                    "cargo:warning=cloning {git_url} into {} (recursive — \
                 pulls libbpf, bpftool, blazesym, vmlinux.h, usdt, \
                 strobelight-libs)",
                    wprof_src.display()
                );
                const MAX_CLONE_ATTEMPTS: u32 = 4;
                let clone_attempt = |i: u32| -> Result<(), String> {
                    // After a failed attempt, wprof_src may be in a
                    // partial-clone state — git refuses to clone into a
                    // non-empty dir. Wipe before retry; swallow cleanup
                    // errors with a log so the retry still proceeds (if
                    // the partial state genuinely blocks the next clone,
                    // git will surface the error in this iteration's
                    // status). First attempt skips because the outer
                    // !exists() check above guaranteed the dir is empty.
                    if i > 1
                        && let Err(e) = std::fs::remove_dir_all(&wprof_src)
                    {
                        println!(
                            "cargo:warning=wprof partial-clone cleanup before attempt {i} \
                         failed: {e}; continuing to next attempt anyway"
                        );
                    }
                    // GIT_CONFIG_GLOBAL=/dev/null +
                    // GIT_CONFIG_SYSTEM=/dev/null bypass any host-level
                    // `~/.gitconfig` / `/etc/gitconfig` rewriting
                    // (e.g. `url.<base>.insteadOf`) that would re-route
                    // the public github.com URL through a private proxy.
                    // Build.rs must work reproducibly on any host AND
                    // must never bake host-private endpoints into the
                    // build graph. Repository URL stays the upstream
                    // public one.
                    //
                    // GIT_TERMINAL_PROMPT=0 + GIT_ASKPASS=/bin/false
                    // prevent git from blocking the build on a stdin
                    // credential prompt when an HTTP 401/403 hits.
                    // A retry that hangs on a prompt
                    // is worse than no retry — fail fast and let the
                    // outer panic surface the error.
                    //
                    // http.lowSpeedLimit=1000 + http.lowSpeedTime=60
                    // bound each attempt: git aborts the transfer if
                    // throughput stays below 1 KB/s for 60 s. Without
                    // this, a half-open TCP connection (NAT timeout,
                    // blackholed route) hangs git until the OS TCP
                    // keepalive fires — typically minutes to hours
                    // per attempt. Passing via `-c key=value` rather
                    // than env vars keeps the setting scoped to this
                    // single invocation.
                    let status = Command::new("git")
                        .env("GIT_CONFIG_GLOBAL", "/dev/null")
                        .env("GIT_CONFIG_SYSTEM", "/dev/null")
                        .env("GIT_TERMINAL_PROMPT", "0")
                        .env("GIT_ASKPASS", "/bin/false")
                        .arg("-c")
                        .arg("http.lowSpeedLimit=1000")
                        .arg("-c")
                        .arg("http.lowSpeedTime=60")
                        .arg("clone")
                        .arg("--recurse-submodules")
                        .arg("--depth=1")
                        .arg("--shallow-submodules")
                        .arg(git_url)
                        .arg(&wprof_src)
                        .stdout(Stdio::inherit())
                        .stderr(Stdio::inherit())
                        .status()
                        .expect("spawn git clone for wprof");
                    if status.success() {
                        Ok(())
                    } else {
                        Err(format!("git clone exited {status}"))
                    }
                };
                if let Err(err) =
                    retry_with_backoff("wprof git clone", MAX_CLONE_ATTEMPTS, clone_attempt)
                {
                    panic!(
                        "wprof git clone failed after {MAX_CLONE_ATTEMPTS} attempts \
                     (last error: {err}). Check network connectivity to \
                     {git_url}; if the cache directory is in an \
                     unrecoverable state, `rm -rf {}` and re-run `cargo build`.",
                        wprof_src.display()
                    );
                }
            }

            // Patch wprof-src/src/demangle/Cargo.toml with a sentinel
            // `[workspace]` table to break the upward workspace walk
            // before invoking make. The Makefile shells out to
            // `cd demangle && cargo build`; without the sentinel,
            // cargo walks UP from demangle and finds the ktstr
            // workspace at the repository root (because OUT_DIR is
            // under target/), failing with "current package believes
            // it's in a workspace when it's not." An empty `[workspace]`
            // table tells cargo to stop the walk at demangle — and
            // since demangle has no lints inheritance, no semantics
            // are affected.
            //
            // Idempotent: subsequent builds SHORT-CIRCUIT when the
            // exact-line `[workspace]` declaration is already present
            // (gate at L671). The append path only fires on the first
            // build after a clean clone. The check matches lines
            // EXACTLY (not substring) to avoid false-positives on
            // `[workspace.lints]` or commented `# [workspace]` —
            // either would trick a substring check into skipping the
            // append even though the real sentinel table isn't there.
            let demangle_manifest = wprof_src.join("src").join("demangle").join("Cargo.toml");
            if demangle_manifest.exists() {
                let existing = std::fs::read_to_string(&demangle_manifest)
                    .unwrap_or_else(|e| panic!("read {}: {e}", demangle_manifest.display()));
                let already_patched = existing.lines().any(|l| l.trim() == "[workspace]");
                if !already_patched {
                    use std::io::Write;
                    let mut f = std::fs::OpenOptions::new()
                        .append(true)
                        .open(&demangle_manifest)
                        .unwrap_or_else(|e| {
                            panic!("open {} for append: {e}", demangle_manifest.display())
                        });
                    f.write_all(b"\n[workspace]\n").unwrap_or_else(|e| {
                        panic!("append [workspace] to {}: {e}", demangle_manifest.display())
                    });
                }
            }

            // Build wprof.  Single-threaded `-j1` instead of `-j{nproc}`:
            // the upstream wprof Makefile has a missing prerequisite
            // edge between the `libdemangle_c.a` build (a recursive
            // `cargo build` inside the demangle sub-crate) and the
            // sibling `cp` that copies the produced archive into
            // wprof's OUTPUT dir.  Under `-jN` the `cp` races the
            // cargo build and fires before the .a exists, surfacing
            // as `cp: cannot stat .../libdemangle_c.a` → `wprof build
            // failed`.  `-j1` serialises the recipe so the dependency
            // ordering the Makefile *intends* is the ordering it gets.
            // The wall-time cost is small in practice: the dominant
            // builds (blazesym, demangle) are individual `cargo build`
            // invocations that already parallelise internally per
            // CARGO_BUILD_JOBS / `--jobs`, so `make`'s outer
            // parallelism would only overlap distinct cargo
            // invocations against each other — which is exactly the
            // pattern that triggers the race.
            let status = Command::new("make")
                .arg("-j1")
                .current_dir(wprof_src.join("src"))
                .stdout(Stdio::inherit())
                .stderr(Stdio::inherit())
                .status()
                .expect("spawn make for wprof");
            assert!(status.success(), "wprof build failed");

            // The wprof Makefile emits the binary at src/wprof (the
            // submodule-init + libbpf-link pattern in
            // github.com/anakryiko/wprof/src/Makefile).
            let built_bin = wprof_src.join("src").join("wprof");
            assert!(
                built_bin.exists(),
                "wprof build succeeded but binary not found at expected path: {}",
                built_bin.display()
            );
            std::fs::copy(&built_bin, &wprof_bin).expect("copy wprof binary to OUT_DIR");
        }
    } // #[cfg(feature = "wprof")]
}

/// SHA-256 hex digest of the upstream busybox-1.36.1 release tarball
/// (`busybox-1_36_1.tar.gz` from the `mirror/busybox` github archive).
///
/// **Sentinel value**: `""` means the pin is not yet recorded for this
/// checkout. In that case [`verify_busybox_tarball_sha256`] emits the
/// computed digest as a `cargo:warning` and continues — first-build
/// integration. To activate the verification gate, replace the empty
/// string with the printed digest, then commit. Subsequent builds
/// fail on mismatch.
///
/// **Rotation**: bumping the busybox version requires updating BOTH
/// the URL in the `fetch_busybox_tarball` call site AND this pin in
/// lockstep — a partial edit produces a SHA mismatch on the next
/// fetch (fail-loud, not silent-pull-wrong-bytes).
///
/// **Why a custom pin instead of cargo's vendoring**: cargo's
/// vendoring covers crate sources, not arbitrary C-source tarballs
/// downloaded by a build script. The verification has to live in
/// `build.rs` itself.
const BUSYBOX_TARBALL_SHA256: &str = "";

/// Fetch the upstream busybox tarball with retry; return the raw
/// gzip-compressed bytes (NOT yet SHA-verified — caller passes the
/// returned buffer through [`verify_busybox_tarball_sha256`] before
/// extracting). Extracted from the prior in-line download so the
/// `KTSTR_BUSYBOX_TARBALL` operator override can read a local file
/// through the same downstream pipeline.
fn fetch_busybox_tarball(url: &str) -> Vec<u8> {
    // Authenticated GitHub requests get 1000/hr per token vs the
    // 60/hr IP-based unauth limit. GitHub Actions auto-issues
    // GITHUB_TOKEN per job; outside CI the env var is typically
    // absent and the request goes unauth, which still works for
    // public repos at low rate.
    let github_token = std::env::var("GITHUB_TOKEN").ok();
    let attempt = |attempt_idx: u32| -> Result<Vec<u8>, String> {
        // `timeout()` bounds the whole request including the body
        // when read via `.bytes()` (which uses `wait::timeout`
        // internally per `reqwest::blocking::Response::bytes`),
        // but does NOT apply when reading the response via the
        // `Read` trait -- streaming bypasses reqwest's timeout
        // machinery so a slow-drip server can hang the build
        // indefinitely. Buffer the body so the timeout actually
        // fires.
        //
        // Proxy support: reqwest automatically reads proxy configuration
        // from environment variables (HTTP_PROXY, HTTPS_PROXY, NO_PROXY
        // and their lowercase variants). In corporate or restricted
        // network environments, ensure these variables are set if a
        // proxy is required to reach github.com.
        let mut client_builder = reqwest::blocking::Client::builder()
            .timeout(std::time::Duration::from_secs(120))
            .connect_timeout(std::time::Duration::from_secs(30))
            .user_agent(concat!("ktstr-build/", env!("CARGO_PKG_VERSION")));

        // Explicitly configure proxy from environment if set.
        // reqwest reads these automatically, but we configure explicitly
        // to ensure proxy is used and to provide better error messages.
        // Supports: HTTP_PROXY, HTTPS_PROXY, NO_PROXY (and lowercase variants)
        if let Ok(proxy_url) = std::env::var("HTTPS_PROXY")
            .or_else(|_| std::env::var("https_proxy"))
            .or_else(|_| std::env::var("HTTP_PROXY"))
            .or_else(|_| std::env::var("http_proxy"))
        {
            let proxy = reqwest::Proxy::all(&proxy_url)
                .map_err(|e| format!("invalid proxy URL {proxy_url}: {e}"))?;
            client_builder = client_builder.proxy(proxy);
        }

        let client = client_builder
            .build()
            .map_err(|e| format!("http client: {e}"))?;
        let mut req = client.get(url);
        if let Some(ref token) = github_token {
            req = req.bearer_auth(token);
        }
        let resp = req
            .send()
            .and_then(|r| r.error_for_status())
            .map_err(|e| format!("attempt {attempt_idx} request: {e}"))?;
        let body = resp
            .bytes()
            .map_err(|e| format!("attempt {attempt_idx} body: {e}"))?;
        Ok(body.to_vec())
    };

    println!("cargo:warning=downloading busybox source tarball from {url}");
    const MAX_TARBALL_ATTEMPTS: u32 = 4;
    retry_with_backoff("busybox tarball download", MAX_TARBALL_ATTEMPTS, attempt).unwrap_or_else(
        |e| {
            panic!(
                "failed to obtain busybox source after {MAX_TARBALL_ATTEMPTS} attempts.\n\
             tarball ({url}): {e}\n\
             Remediation:\n\
               • Check network connectivity (the build script needs HTTPS\n\
                 access to github.com to fetch the upstream tarball).\n\
               • If behind a proxy, ensure HTTP_PROXY/HTTPS_PROXY environment\n\
                 variables are set (e.g., export HTTPS_PROXY=http://proxy:8080).\n\
               • Or set KTSTR_BUSYBOX_TARBALL=<path> to point at a\n\
                 pre-fetched local copy of {url} — useful for air-gapped\n\
                 CI runners and hermetic build environments.\n\
               • Or set KTSTR_SKIP_BUSYBOX_BUILD=1 to skip the busybox\n\
                 compile entirely (shell mode will be unavailable in the\n\
                 resulting cargo-ktstr binary).",
            )
        },
    )
}

/// Verify the downloaded busybox tarball against [`BUSYBOX_TARBALL_SHA256`].
///
/// Three outcomes:
///
///   - **Pin empty**: log the computed digest as a `cargo:warning` and
///     continue. First-build bootstrap path — the operator pastes the
///     printed value into `BUSYBOX_TARBALL_SHA256` to lock the pin.
///   - **Pin matches**: silent pass.
///   - **Pin mismatches**: panic with both digests. The operator
///     investigates: a regenerated upstream archive (github does this
///     rarely; cf. the 2023 git-archive checksum change) requires a
///     pin refresh, whereas an unexplained mismatch on a fixed pin
///     indicates supply-chain tampering and warrants investigation
///     before the bytes hit the build.
fn verify_busybox_tarball_sha256(tarball_bytes: &[u8]) {
    use sha2::{Digest, Sha256};
    let actual = {
        let mut hasher = Sha256::new();
        hasher.update(tarball_bytes);
        hex_encode_lowercase(&hasher.finalize())
    };
    if BUSYBOX_TARBALL_SHA256.is_empty() {
        println!(
            "cargo:warning=BUSYBOX_TARBALL_SHA256 is unset — first-build \
             bootstrap. Computed SHA-256: {actual}\n\
             To lock the pin: update BUSYBOX_TARBALL_SHA256 in build.rs to\n\
             this value and commit. Subsequent builds will fail on mismatch."
        );
        return;
    }
    if !BUSYBOX_TARBALL_SHA256.eq_ignore_ascii_case(&actual) {
        panic!(
            "busybox tarball SHA-256 mismatch.\n\
             expected: {BUSYBOX_TARBALL_SHA256}\n\
             actual:   {actual}\n\
             \n\
             Diagnose:\n\
               • If the upstream archive was regenerated (rare — github\n\
                 changed archive generation in early 2023, otherwise these\n\
                 tarballs are stable for years), update BUSYBOX_TARBALL_SHA256\n\
                 in build.rs to the new digest after independently verifying\n\
                 the source.\n\
               • Otherwise treat as a supply-chain alert: compare against\n\
                 the upstream SHA published by the busybox maintainers\n\
                 before continuing."
        );
    }
}

/// Lowercase hex-encode a byte slice. Inlined to avoid pulling `hex`
/// into `[build-dependencies]` for a single 32-byte digest.
fn hex_encode_lowercase(bytes: &[u8]) -> String {
    use std::fmt::Write;
    let mut s = String::with_capacity(bytes.len() * 2);
    for b in bytes {
        write!(&mut s, "{b:02x}").expect("write to String never fails");
    }
    s
}

/// Scan src/budget.rs for `const NAME_SHIFT: u32 = N;` declarations
/// and emit a `pub(crate) const ALL_SHIFTS: &[(u32, &str)]` slice
/// into `OUT_DIR/shift_registry.rs`. The slice is sorted by value
/// for stable test output.
///
/// Pattern: line.trim() starts with `const `, contains `: u32 = `
/// literal, name part ends with `_SHIFT`, value part parses as u32
/// (trailing `;` stripped). All four conditions must hold; a line
/// failing any one is skipped.
///
/// This is a deliberate text-scan, not a full Rust parser. Trade-offs:
/// - Full-line comments (`//`, `/* */`, `///`) start with `/`, not
///   `const` — never false-positive. Inline trailing comments on a
///   const line (e.g. `const X_SHIFT: u32 = 5; // foo`) leave the
///   comment text past the `;`; `trim_end_matches(';')` strips only
///   the trailing `;` so the parse-as-u32 step panics fail-loud
///   rather than silently dropping the entry.
/// - String literals containing `SHIFT:` live inside non-const lines
///   — never false-positive. EXCEPTION: a raw multi-line string
///   literal `r#"\nconst FOO_SHIFT: u32 = 4;\n"#` containing a
///   const-shaped line would false-positive (line.trim() yields the
///   raw const text). Low probability — budget.rs holds no such
///   literals today — and surfaces loudly: the false-positive grows
///   the registry by an entry no hand-classified enumeration
///   references, so the test's `unclassified` arm fires (asserts
///   `ALL_SHIFTS.filter(!classified.contains(v))` is empty), NOT a
///   silent drop.
/// - Macro-generated constants emit no source text — invisible to the
///   scan (false negative; documented by naming convention).
/// - `static FOO_SHIFT` and lowercase-named constants — both invisible
///   (false negative; violates Rust convention anyway).
/// - Const expressions whose RHS is non-integer (e.g.
///   `const X_SHIFT: u32 = OTHER + 1;`) — fail-loud panic, not silent
///   drop.
/// - The `: u32 = ` split anchor is rustfmt-canonical (single space
///   each side). A future rustfmt change to multi-space or no-space
///   formatting would cause the scan to miss every existing SHIFT
///   const. The test fails loudly on the first build after such a
///   change: registry shrinks, so each hand-classified SHIFT value
///   appears in `phantom_one_bit` (one_bit_values.difference(&registry))
///   or `phantom_multi_bit` (multi_bit_values.difference(&registry)),
///   tripping the phantom assertion. The regression surfaces
///   immediately, not on the next addition.
///
/// The hand-classified test enumerations in `src/budget.rs::tests`
/// are the consumer; the `all_shifts_classified_in_exactly_one_enumeration`
/// test asserts the union of the two hand-spelled lists equals this
/// generated set.
fn generate_shift_registry(out_dir: &std::path::Path) {
    use std::fmt::Write;
    println!("cargo::rerun-if-changed=src/budget.rs");
    let budget_rs = std::fs::read_to_string("src/budget.rs")
        .expect("read src/budget.rs for shift-registry scan");
    let mut shifts: Vec<(u32, String)> = Vec::new();
    for line in budget_rs.lines() {
        let line = line.trim();
        let Some(rest) = line.strip_prefix("const ") else {
            continue;
        };
        let Some((name_part, val_part)) = rest.split_once(": u32 = ") else {
            continue;
        };
        let name = name_part.trim();
        if !name.ends_with("_SHIFT") {
            continue;
        }
        let val_str = val_part.trim_end_matches(';').trim();
        let val: u32 = val_str.parse().unwrap_or_else(|e| {
            panic!("shift-registry scan: parse `{val_str}` as u32 for {name}: {e}")
        });
        shifts.push((val, name.to_string()));
    }
    shifts.sort_by_key(|(v, _)| *v);

    let mut out = String::from(
        "// Generated by build.rs. Lists every `const *_SHIFT: u32 = N;`\n\
         // declaration in src/budget.rs, sorted by shift value. The\n\
         // budget tests assert their hand-classified one-bit and\n\
         // multi-bit enumerations cover every entry so a new SHIFT\n\
         // cannot land without being classified into the right test.\n\
         pub(crate) const ALL_SHIFTS: &[(u32, &str)] = &[\n",
    );
    for (v, name) in &shifts {
        writeln!(out, "    ({v}, \"{name}\"),").expect("write shift entry");
    }
    out.push_str("];\n");
    std::fs::write(out_dir.join("shift_registry.rs"), out).expect("write shift_registry.rs");
}

/// 64-bit SipHash-1-3 of `bytes`. Used to detect BTF content drift
/// between `vmlinux.h` regenerations.
///
/// Algorithm mirrors `src/test_support/sidecar.rs::sidecar_variant_hash`
/// — `SipHasher13::new_with_keys(0, 0)` + `h.write(bytes)` +
/// `h.finish()`. Zero keys are deliberate: this is a drift hash, not
/// a DoS-mitigation hash, and stable (key-less) output lets a future
/// build.rs invocation compare against a sidecar written by a prior
/// run without coordinating on a key. SipHasher13 is faster than
/// SipHasher24 at the cost of reduced crypto strength — acceptable
/// because the hash is a build-artifact sidecar, not a signed
/// manifest.
fn siphash_13(bytes: &[u8]) -> u64 {
    use siphasher::sip::SipHasher13;
    use std::hash::Hasher;
    let mut h = SipHasher13::new_with_keys(0, 0);
    h.write(bytes);
    h.finish()
}

/// SipHasher13 fingerprint of every non-test `.rs` file under the
/// cast-analysis source dirs: the analyzer in `src/monitor/cast_analysis`,
/// its on-demand loader in `src/vmm/cast_analysis_load`,
/// `src/monitor/sdt_alloc` (whose `discover_payload_btf_id` +
/// `MAX_BTF_ID_PROBE` resolve the cached `alloc_size_types`), and
/// `src/monitor/btf_render` + `src/monitor/bpf_map` (whose
/// `peel_modifiers` / `type_size` / `resolve_to_struct_id` resolve every
/// cast's terminal type, 20+ call sites in cast_analysis/mod.rs). The
/// hash is folded into the disk-cache key (`persist.rs::cache_path`) so
/// the cache self-invalidates on any analyzer change without a manual
/// `SCHEMA_VERSION` bump. Files named `tests.rs` are excluded; inline
/// `#[cfg(test)]` modules in the watched `.rs` files are still hashed, so
/// a test-only edit to such a file does invalidate the cache — the safe,
/// over-conservative direction (never a stale serve). Each watched dir
/// gets a `rerun-if-changed` so cargo re-runs build.rs (recomputing the
/// env) when the analyzer source changes; a missing watched dir is a
/// hard error (see the loop body), not a silent skip. Crate-version
/// drift (btf-rs / libbpf) is handled separately by
/// [`cargo_lock_fingerprint`], which is folded alongside this into the
/// cast cache key — so this fingerprint covers only the analyzer's own
/// source.
fn cast_analyzer_fingerprint() -> u64 {
    use siphasher::sip::SipHasher13;
    use std::hash::Hasher;
    let mut files: Vec<PathBuf> = Vec::new();
    for dir in [
        "src/monitor/cast_analysis",
        "src/vmm/cast_analysis_load",
        // sdt_alloc feeds the cached output's `alloc_size_types` via
        // `discover_payload_btf_id` + `MAX_BTF_ID_PROBE` (see
        // cast_analysis_load::build_cast_analysis_from_bytes's alloc-size
        // resolution loop), so a change there alters the cached result and
        // must invalidate it -- same footgun class this fingerprint closes.
        "src/monitor/sdt_alloc",
        // cast_analysis resolves every cast's terminal type through
        // btf_render::{peel_modifiers,peel_modifiers_with_id,type_size}
        // and bpf_map::resolve_to_struct_id (20+ call sites in
        // cast_analysis/mod.rs); a change to either module's modifier-peel
        // / struct-resolve traversal alters the cached cast map for an
        // unchanged binary -- same footgun. Their callees stay within
        // btf-rs (a crate dep) + std, so the watched-source closure ends
        // here; btf-rs / libbpf crate-version drift is caught by the
        // whole-Cargo.lock fingerprint folded into the cast cache key
        // (cargo_lock_fingerprint + persist::cache_path).
        // Whole-subtree rather than per-fn because extracting
        // individual fns needs a parser; the extra invalidations on
        // unrelated edits in these modules are cheap (one BPF-object
        // re-analysis) and these modules already invalidate other caches
        // when they change.
        "src/monitor/btf_render",
        "src/monitor/bpf_map",
    ] {
        println!("cargo:rerun-if-changed={dir}");
        // Fail loud if a watched dir is missing: a typo or a layout move
        // would otherwise silently drop that dir's contribution and
        // resurrect the stale-cache footgun this fingerprint exists to
        // close. collect_fingerprint_files tolerates a missing dir for its
        // recursion case, so the top-level existence guard lives here.
        let path = std::path::Path::new(dir);
        assert!(
            path.is_dir(),
            "cast-analysis fingerprint dir missing: {dir} (layout moved? update build.rs)"
        );
        collect_fingerprint_files(path, &mut files);
    }
    // Sort for a deterministic hash independent of readdir order.
    files.sort();
    let mut h = SipHasher13::new_with_keys(0, 0);
    for f in &files {
        // Hash the path too so a rename (without content change) still
        // perturbs the fingerprint.
        h.write(f.to_string_lossy().as_bytes());
        let bytes = std::fs::read(f)
            .unwrap_or_else(|e| panic!("read {} for analyzer fingerprint: {e}", f.display()));
        h.write(&bytes);
    }
    h.finish()
}

/// SipHasher13 fingerprint of the entire `Cargo.lock`, emitted by
/// build.rs as `KTSTR_CARGO_LOCK_FINGERPRINT` and folded into the
/// cast-analysis cache key (see
/// `vmm::cast_analysis_load::persist::cache_path`). A dependency bump —
/// a `btf-rs` (BTF parsing) or `libbpf-rs` / `libbpf-sys` (BPF-opcode
/// constants) version change — can alter the cast map with no ktstr
/// source change, so the analyzer-source fingerprint alone would serve a
/// stale result. Only the cast cache folds this in; the kernels / models
/// / disk_template caches are produced by external tools and are
/// dependency-independent. Hashing the WHOLE lockfile invalidates the
/// cast cache on any dependency bump, even unrelated crates — the safe
/// over-conservative direction (never a stale serve), costing one cast
/// re-analysis per scheduler binary per lockfile change.
/// `rerun-if-changed` re-runs build.rs on a lockfile bump.
fn cargo_lock_fingerprint() -> u64 {
    use siphasher::sip::SipHasher13;
    use std::hash::Hasher;
    println!("cargo:rerun-if-changed=Cargo.lock");
    // Fail loud on an unreadable Cargo.lock rather than hashing the
    // empty-string default (a constant that would let machines with
    // different dependency sets share a cache entry) — mirrors
    // cast_analyzer_fingerprint's panic-on-read-failure posture.
    let lock = std::fs::read_to_string("Cargo.lock")
        .unwrap_or_else(|e| panic!("read Cargo.lock for dependency fingerprint: {e}"));
    let mut h = SipHasher13::new_with_keys(0, 0);
    h.write(lock.as_bytes());
    h.finish()
}

/// Recursively collect non-test `.rs` files under `dir` into `out`.
/// A missing dir returns no files (tolerant for the recursion case);
/// the caller asserts each top-level watched dir exists, so a typo'd or
/// moved analyzer dir fails the build loudly rather than silently
/// dropping its fingerprint contribution.
fn collect_fingerprint_files(dir: &std::path::Path, out: &mut Vec<PathBuf>) {
    let Ok(entries) = std::fs::read_dir(dir) else {
        return;
    };
    for entry in entries.flatten() {
        let path = entry.path();
        if path.is_dir() {
            collect_fingerprint_files(&path, out);
        } else if path.extension().and_then(|e| e.to_str()) == Some("rs")
            && path.file_name().and_then(|n| n.to_str()) != Some("tests.rs")
        {
            out.push(path);
        }
    }
}