ktstr 0.10.0

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
// Generates vmlinux.h from kernel BTF using libbpf's btf_dump API.
// Uses the shared kernel resolver (src/kernel_path.rs) to find the
// BTF source. See resolve_btf() for the full search order.

use std::env;
use std::path::PathBuf;
use std::process::{Command, Stdio};

use libbpf_cargo::SkeletonBuilder;

include!("src/kernel_path.rs");
include!("src/build_helpers.rs");

fn main() {
    let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());

    // Cache invalidation: track the env var that selects a kernel
    // and the build-script inputs (kernel_path resolver, C generator
    // source). Deliberately NOT emitting a `rerun-if-changed` on the
    // BTF source path itself:
    //
    //   1. `vmlinux` is consumed here only as the BTF source for
    //      `vmlinux.h` generation on the C side below, not as an
    //      input that the Rust compiler reads. BPF CO-RE (Compile
    //      Once Run Everywhere) relocates field offsets at LOAD
    //      time against the runtime kernel's BTF, so a field-layout
    //      drift between the compile-time `vmlinux.h` and the
    //      runtime kernel is resolved by libbpf on BPF object load
    //      — there is no compile-time correctness dependency on
    //      the exact byte content of the vmlinux used to generate
    //      `vmlinux.h`.
    //   2. `rerun-if-changed` on the BTF would force build.rs to
    //      re-run on every kernel rebuild. That runs the BPF
    //      skeleton generator unnecessarily when the drift (per
    //      (1)) has no compile-time correctness impact.
    //
    // However, WHEN build.rs does run (triggered by a watched
    // input — KTSTR_KERNEL change, kernel_path.rs edit, or a
    // previously-absent `vmlinux.h`), it SHOULD detect a BTF
    // content change and regenerate. The pre-hash design only
    // regenerated when `vmlinux.h` was absent entirely, which
    // meant a BTF-content change paired with an unrelated build-
    // script trigger would leave stale `vmlinux.h` in place. A
    // SipHasher13 hash of the BTF bytes is written alongside
    // `vmlinux.h` as `vmlinux.btf.hash`; regen fires when the
    // file is absent OR the stored hash differs from the current
    // BTF's hash. Operators who need to force regen unconditionally
    // still have `cargo clean` as the escape hatch. The algorithm
    // mirrors `src/test_support/sidecar.rs::sidecar_variant_hash`
    // so the project uses a single stable hash family.
    println!("cargo:rerun-if-env-changed=KTSTR_KERNEL");
    println!("cargo:rerun-if-changed=src/kernel_path.rs");
    println!("cargo:rerun-if-changed=src/bpf/vmlinux_gen.c");
    let ktstr_kernel = env::var("KTSTR_KERNEL").ok();

    // Generate vmlinux.h from kernel BTF.
    let vmlinux_h = out_dir.join("vmlinux.h");
    let hash_path = out_dir.join("vmlinux.btf.hash");
    // Resolve BTF + compute content hash eagerly. `resolve_btf`
    // returns `Option` to degrade cleanly when no BTF is reachable
    // (no KTSTR_KERNEL + no host BTF): if `vmlinux.h` is already in
    // place from an earlier build, we keep it rather than panicking
    // — matches the CO-RE design (runtime BTF fixes field drift
    // anyway), so a disappearing source is not a build-blocking
    // event. A MISSING `vmlinux.h` still panics below because we
    // have nothing to fall back on.
    let current_btf = resolve_btf(ktstr_kernel.as_deref());
    // Hash the BTF source for drift detection. Fault-tolerant: a
    // BTF path that resolved but whose bytes cannot be read (EACCES,
    // or a race where the file vanished between resolve and read)
    // downgrades to `None` instead of panicking, so we fall back to
    // the existence-only gate for `vmlinux.h`. The eventual regen
    // path below re-reads the bytes via `vmlinux_gen` and fails
    // loudly there if the source is truly unusable.
    let current_hash: Option<String> = current_btf.as_ref().and_then(|p| match std::fs::read(p) {
        Ok(bytes) => Some(format!("{:016x}", siphash_13(&bytes))),
        Err(e) => {
            println!(
                "cargo:warning=BTF source {} present but unreadable \
                     ({e}); skipping hash check, reusing existing vmlinux.h",
                p.display(),
            );
            None
        }
    });
    let stored_hash: Option<String> = std::fs::read_to_string(&hash_path)
        .ok()
        .map(|s| s.trim().to_string());
    // Regen fires on any of three conditions:
    //   - `vmlinux.h` is absent (first build or post-`cargo clean`);
    //   - the stored hash is absent but we have a current hash (the
    //     vmlinux.h was generated by an older build.rs that didn't
    //     track hashes — upgrade in place);
    //   - current and stored hashes differ (real drift).
    // An unreadable BTF with vmlinux.h already in place falls
    // through to "no regen" per `current_hash.is_none()`.
    let should_regen =
        !vmlinux_h.exists() || (current_hash.is_some() && current_hash != stored_hash);
    if should_regen {
        let btf_source = current_btf.unwrap_or_else(|| {
            panic!(
                "no BTF source found. Set KTSTR_KERNEL to a kernel build \
                 directory, or ensure /sys/kernel/btf/vmlinux exists."
            );
        });
        println!("generating vmlinux.h from {}", btf_source.display());

        // libbpf-sys (links = "bpf") emits installed headers at
        // DEP_BPF_INCLUDE with bpf/ prefix (bpf/btf.h, bpf/libbpf.h).
        let libbpf_include =
            PathBuf::from(env::var("DEP_BPF_INCLUDE").expect("DEP_BPF_INCLUDE not set"));

        // Compile the C vmlinux generator + driver into a standalone binary.
        let vmlinux_gen_bin = out_dir.join("vmlinux_gen");
        let driver_src = out_dir.join("vmlinux_gen_main.c");
        std::fs::write(
            &driver_src,
            format!(
                r#"
extern int generate_vmlinux_h(const char *, const char *);
int main(void) {{
    return generate_vmlinux_h("{btf}", "{out}") == 0 ? 0 : 1;
}}
"#,
                btf = btf_source.display(),
                out = vmlinux_h.display(),
            ),
        )
        .expect("write driver source");

        // libbpf-sys with vendored feature installs static libraries
        // (libbpf.a, libelf.a, libz.a) in the parent of DEP_BPF_INCLUDE.
        let libbpf_lib_dir = libbpf_include.parent().unwrap();

        let compiler = cc::Build::new().get_compiler();
        let status = Command::new(compiler.path())
            .args([
                "src/bpf/vmlinux_gen.c",
                driver_src.to_str().unwrap(),
                "-o",
                vmlinux_gen_bin.to_str().unwrap(),
                &format!("-I{}", libbpf_include.display()),
                &format!("-L{}", libbpf_lib_dir.display()),
                "-lbpf",
                "-lelf",
                "-lz",
            ])
            .status()
            .expect("compile vmlinux_gen");
        assert!(status.success(), "failed to compile vmlinux_gen");

        let status = Command::new(&vmlinux_gen_bin)
            .status()
            .expect("run vmlinux_gen");
        assert!(
            status.success(),
            "vmlinux_gen failed — check BTF source: {}",
            btf_source.display()
        );

        // Record the BTF content hash alongside `vmlinux.h`. A
        // future build.rs invocation reads this file and compares
        // against the freshly-hashed BTF; a mismatch triggers
        // regeneration above.
        //
        // Normally `current_hash` was populated at the top of
        // `main`. The one path that leaves it `None` while still
        // reaching this regen branch is: `!vmlinux_h.exists()` AND
        // `std::fs::read(&btf_source)` failed during the eager hash
        // attempt. In that case, the generator above successfully
        // invoked `vmlinux_gen` against `btf_source`, which means
        // libbpf could read it — the earlier read failure was
        // transient or the generator accessed the file via a path
        // libbpf handles differently (e.g. sysfs BTF). Re-read and
        // hash here so the sidecar is always populated alongside a
        // successful regen; on a second-read failure, skip the
        // sidecar (the generator already succeeded — the build is
        // in a good state; a missing sidecar forces the next
        // build.rs run to regenerate conservatively, which is
        // correct).
        let hash_opt: Option<String> = match current_hash.as_deref() {
            Some(h) => Some(h.to_string()),
            None => match std::fs::read(&btf_source) {
                Ok(bytes) => Some(format!("{:016x}", siphash_13(&bytes))),
                Err(e) => {
                    println!(
                        "cargo:warning=post-regen BTF re-read failed ({e}); \
                         skipping hash sidecar — next build.rs run will \
                         regenerate conservatively"
                    );
                    None
                }
            },
        };
        if let Some(hash) = hash_opt {
            // Trailing newline so `cat` / editor-open produces a
            // clean single-line display. The reader at the top of
            // main uses `.trim()` on the stored value, so the
            // newline round-trips.
            std::fs::write(&hash_path, format!("{hash}\n"))
                .unwrap_or_else(|e| panic!("write BTF hash sidecar {}: {e}", hash_path.display()));
        }
    }

    // arm64 bpf_tracing.h casts pt_regs through struct user_pt_regs,
    // a UAPI type that kernel BTF may omit. Append it if absent so
    // PT_REGS_PARMn_CORE compiles on arm64 hosts.
    if cfg!(target_arch = "aarch64") {
        let content = std::fs::read_to_string(&vmlinux_h).expect("read vmlinux.h");
        if !content.contains("struct user_pt_regs {") {
            use std::io::Write;
            let mut f = std::fs::OpenOptions::new()
                .append(true)
                .open(&vmlinux_h)
                .expect("open vmlinux.h for append");
            writeln!(
                f,
                "\n/* Added by build.rs: arm64 UAPI type needed by bpf_tracing.h */\n\
                 struct user_pt_regs {{\n\
                 \t__u64 regs[31];\n\
                 \t__u64 sp;\n\
                 \t__u64 pc;\n\
                 \t__u64 pstate;\n\
                 }};\n"
            )
            .expect("append user_pt_regs to vmlinux.h");
        }
    }

    let clang_args = [
        format!("-I{}", out_dir.display()),
        format!("-I{}", "src/bpf"),
    ];

    // Build the kprobe BPF skeleton.
    let skel_path = out_dir.join("probe_skel.rs");
    SkeletonBuilder::new()
        .source("src/bpf/probe.bpf.c")
        .obj(out_dir.join("probe.o"))
        .clang_args(clang_args.clone())
        .reference_obj(true)
        .build_and_generate(&skel_path)
        .expect("build probe BPF skeleton");

    // Build the fentry BPF skeleton (separate for independent loading).
    let fentry_skel_path = out_dir.join("fentry_probe_skel.rs");
    SkeletonBuilder::new()
        .source("src/bpf/fentry_probe.bpf.c")
        .obj(out_dir.join("fentry_probe.o"))
        .clang_args(clang_args)
        .reference_obj(true)
        .build_and_generate(&fentry_skel_path)
        .expect("build fentry probe BPF skeleton");

    println!("cargo::rerun-if-changed=src/bpf/probe.bpf.c");
    println!("cargo::rerun-if-changed=src/bpf/fentry_probe.bpf.c");
    println!("cargo::rerun-if-changed=src/bpf/intf.h");

    // Generate ALL_SHIFTS registry from src/budget.rs so the
    // budget-feature tests can assert exhaustive classification
    // coverage. Scans `const NAME_SHIFT: u32 = N;` declarations and
    // emits a `pub(crate) const ALL_SHIFTS: &[(u32, &str)]` slice
    // into OUT_DIR. The test in budget.rs takes the union of its
    // one-bit and multi-bit shift enumerations and asserts equality
    // with this slice — a new SHIFT constant added without updating
    // either enumeration fails the union check.
    generate_shift_registry(&out_dir);

    // Fingerprint the cast-analysis source so the on-disk cast cache
    // (src/vmm/cast_analysis_load/persist.rs) self-invalidates whenever
    // the analyzer changes — with no manual SCHEMA_VERSION bump. Without
    // this, an analyzer-behavior change reuses a stale cached result and
    // masks a just-fixed analyzer bug as a flake (the 2026-06-01
    // arena_confirmed-drop bug hid this way for hours). The fn emits
    // `rerun-if-changed` for the watched dirs so cargo recomputes the env
    // when the analyzer source changes.
    println!(
        "cargo:rustc-env=KTSTR_CAST_ANALYZER_FINGERPRINT={:016x}",
        cast_analyzer_fingerprint()
    );

    // Fingerprint the whole Cargo.lock so the cast-analysis cache
    // self-invalidates on any dependency bump: persist::cache_path folds
    // this into the cache key. A btf-rs (BTF parsing) or libbpf-rs /
    // libbpf-sys (BPF-opcode constants) version change can alter the cast
    // map with no ktstr source change, so the analyzer-source fingerprint
    // alone would serve a stale result. Only the cast cache folds this in;
    // kernels / models / disk_template are dependency-independent.
    println!(
        "cargo:rustc-env=KTSTR_CARGO_LOCK_FINGERPRINT={:016x}",
        cargo_lock_fingerprint()
    );

    // Build busybox from source for guest shell mode.
    //
    // Hermeticity contract:
    //
    //  - The tarball is fetched ONCE per OUT_DIR and cached at
    //    `$OUT_DIR/busybox`. `cargo clean` forces a re-fetch.
    //  - The fetched bytes are SHA-256-verified against
    //    [`BUSYBOX_TARBALL_SHA256`] before extraction. A mismatch
    //    panics with the actual vs expected hash so the operator
    //    can decide between "the upstream changed (regenerate the
    //    pin)" and "the download was tampered (investigate)".
    //  - `KTSTR_BUSYBOX_TARBALL=<path>` points the build at a
    //    pre-fetched local tarball — for air-gapped CI runners and
    //    hermetic CI caches. The SHA pin still applies; the local
    //    path is a transport substitute, not a verification bypass.
    //  - `KTSTR_SKIP_BUSYBOX_BUILD=1` writes a 0-byte placeholder at
    //    `$OUT_DIR/busybox` and skips the compile entirely. Shell
    //    mode is unavailable in the resulting binary;
    //    `cargo_ktstr::blobs::install_env` detects the empty blob
    //    and leaves `KTSTR_BUSYBOX_PATH` unset so consumers fail
    //    with a clear "shell mode unavailable" rather than an
    //    opaque "exec format error" on the 0-byte file. Mirrors
    //    the existing `KTSTR_SKIP_WPROF_BUILD` escape hatch below.
    //
    // The pre-pin git-clone fallback was removed alongside this
    // refactor: a clone bypasses the SHA gate (no tarball to
    // verify), and `KTSTR_BUSYBOX_TARBALL` covers the
    // tarball-fetch-failed case more cleanly.
    let busybox_bin = out_dir.join("busybox");
    println!("cargo:rerun-if-env-changed=KTSTR_SKIP_BUSYBOX_BUILD");
    println!("cargo:rerun-if-env-changed=KTSTR_BUSYBOX_TARBALL");
    let skip_busybox = std::env::var("KTSTR_SKIP_BUSYBOX_BUILD")
        .ok()
        .filter(|v| !v.is_empty())
        .is_some();
    if skip_busybox {
        println!(
            "cargo:warning=KTSTR_SKIP_BUSYBOX_BUILD set — writing 0-byte \
             $OUT_DIR/busybox placeholder; shell mode will be unavailable \
             in the resulting cargo-ktstr binary"
        );
        if !busybox_bin.exists() {
            std::fs::write(&busybox_bin, b"").unwrap_or_else(|e| {
                panic!(
                    "write 0-byte busybox placeholder {}: {e}",
                    busybox_bin.display()
                )
            });
        }
    } else if !busybox_bin.exists() {
        println!("cargo:warning=compiling busybox (first build only)...");

        // Check required tools before attempting build.
        if Command::new("make").arg("--version").output().is_err() {
            panic!(
                "busybox build requires 'make' — install build-essential \
                 (Debian/Ubuntu) or base-devel (Fedora/Arch)"
            );
        }
        if Command::new("gcc").arg("--version").output().is_err() {
            panic!(
                "busybox build requires 'gcc' — install build-essential \
                 (Debian/Ubuntu) or base-devel (Fedora/Arch)"
            );
        }

        let busybox_src = out_dir.join("busybox-src");

        // Recover from interrupted download: if the directory exists but
        // has no Makefile, the previous extraction was incomplete.
        if busybox_src.exists() && !busybox_src.join("Makefile").exists() {
            std::fs::remove_dir_all(&busybox_src).expect("remove incomplete busybox-src");
        }

        // Source the tarball: from a local path when
        // KTSTR_BUSYBOX_TARBALL is set, otherwise from the pinned
        // upstream URL with retry. Either path lands in
        // `tarball_bytes` which is then SHA-verified before any
        // extraction touches the filesystem.
        if !busybox_src.join("Makefile").exists() {
            const TARBALL_URL: &str =
                "https://github.com/mirror/busybox/archive/refs/tags/1_36_1.tar.gz";
            let tarball_bytes = match std::env::var("KTSTR_BUSYBOX_TARBALL")
                .ok()
                .filter(|v| !v.is_empty())
            {
                Some(local) => {
                    println!(
                        "cargo:warning=KTSTR_BUSYBOX_TARBALL set — reading {local} \
                         instead of fetching from {TARBALL_URL}"
                    );
                    std::fs::read(&local).unwrap_or_else(|e| {
                        panic!(
                            "read KTSTR_BUSYBOX_TARBALL={local}: {e} — the env \
                             var must point at a readable tarball matching the \
                             pinned SHA-256"
                        )
                    })
                }
                None => fetch_busybox_tarball(TARBALL_URL),
            };

            verify_busybox_tarball_sha256(&tarball_bytes);

            // Extract verified bytes into busybox-src/.
            let extract_dir = out_dir.join("busybox-extract");
            if extract_dir.exists() {
                let _ = std::fs::remove_dir_all(&extract_dir);
            }
            let gz = flate2::read::GzDecoder::new(std::io::Cursor::new(&tarball_bytes[..]));
            let mut archive = tar::Archive::new(gz);
            archive
                .unpack(&extract_dir)
                .unwrap_or_else(|e| panic!("extract busybox tarball: {e}"));
            let inner = extract_dir.join("busybox-1_36_1");
            std::fs::rename(&inner, &busybox_src).unwrap_or_else(|e| {
                panic!(
                    "expected extracted directory {} — tarball layout may have changed: {e}",
                    inner.display()
                )
            });
            std::fs::remove_dir_all(&extract_dir).ok();
        }

        // Configure busybox.
        let status = Command::new("make")
            .arg("defconfig")
            .current_dir(&busybox_src)
            .stdout(Stdio::inherit())
            .stderr(Stdio::inherit())
            .status()
            .expect("make defconfig");
        assert!(status.success(), "busybox make defconfig failed");

        // Enable static linking, disable CONFIG_TC (requires iproute2 headers).
        let config_path = busybox_src.join(".config");
        let config = std::fs::read_to_string(&config_path).expect("read busybox .config");
        let config = config
            .replace("# CONFIG_STATIC is not set", "CONFIG_STATIC=y")
            .replace("CONFIG_TC=y", "# CONFIG_TC is not set");
        std::fs::write(&config_path, config).expect("write patched busybox .config");

        // Resolve patched config non-interactively. Busybox's Kbuild
        // lacks olddefconfig; pipe empty input to oldconfig so every
        // NEW prompt accepts its default without blocking on stdin.
        let status = Command::new("make")
            .arg("oldconfig")
            .current_dir(&busybox_src)
            .stdin(Stdio::null())
            .stdout(Stdio::inherit())
            .stderr(Stdio::inherit())
            .status()
            .expect("make oldconfig");
        assert!(status.success(), "busybox make oldconfig failed");

        // Build busybox.  Single-threaded `-j1`: busybox is a pure-C
        // build dominated by gcc invocations that are already
        // parallelisable inside gcc's own job server when invoked
        // from a parallel parent; for a one-shot build out of a
        // build.rs the wall-time difference between `-j1` and
        // `-jN` is small (single-digit seconds on a developer box),
        // and `-j1` keeps the build deterministic + race-free
        // across hosts.
        let status = Command::new("make")
            .arg("-j1")
            .current_dir(&busybox_src)
            .stdout(Stdio::inherit())
            .stderr(Stdio::inherit())
            .status()
            .expect("busybox make");
        assert!(status.success(), "busybox build failed");

        // Copy binary to OUT_DIR.
        std::fs::copy(busybox_src.join("busybox"), &busybox_bin)
            .expect("copy busybox binary to OUT_DIR");
    }

    // wprof build: gated behind the `wprof` cargo feature (default
    // off). When disabled, a 0-byte placeholder at $OUT_DIR/wprof
    // satisfies the `include_bytes!` site in cargo_ktstr/blobs.rs.
    // The KTSTR_SKIP_WPROF_BUILD env var remains as a secondary
    // escape hatch for builds that enable the feature but want to
    // skip the clone/compile (CI caching, cross-compilation, etc.).
    let wprof_bin = out_dir.join("wprof");
    #[cfg(not(feature = "wprof"))]
    if !wprof_bin.exists() {
        std::fs::write(&wprof_bin, b"").unwrap_or_else(|e| {
            panic!(
                "write 0-byte wprof placeholder {}: {e}",
                wprof_bin.display()
            )
        });
    }
    #[cfg(feature = "wprof")]
    {
        println!("cargo:rerun-if-env-changed=KTSTR_SKIP_WPROF_BUILD");
        let skip_wprof = std::env::var("KTSTR_SKIP_WPROF_BUILD")
            .ok()
            .filter(|v| !v.is_empty())
            .is_some();

        if skip_wprof {
            println!(
                "cargo:warning=KTSTR_SKIP_WPROF_BUILD set — writing 0-byte \
             $OUT_DIR/wprof placeholder; do NOT use the resulting \
             cargo-ktstr binary for wprof capture"
            );
            if !wprof_bin.exists() {
                std::fs::write(&wprof_bin, b"").unwrap_or_else(|e| {
                    panic!(
                        "write 0-byte wprof placeholder {}: {e}",
                        wprof_bin.display()
                    )
                });
            }
        } else if !wprof_bin.exists() {
            println!("cargo:warning=cloning + compiling wprof (first build only)...");

            for tool in ["git", "make", "gcc", "clang"] {
                if Command::new(tool).arg("--version").output().is_err() {
                    panic!(
                        "wprof build requires '{tool}' on PATH — install via your \
                     distro's package manager (build-essential / base-devel for \
                     make+gcc; clang for BPF skeleton compile; git for \
                     submodule clone)"
                    );
                }
            }

            // Clone into OUT_DIR like busybox — re-clones on `cargo
            // clean` and stays per-workspace-isolated (matches the
            // shape of the other vendored binary).
            //
            // Cargo's workspace discovery walks UP from any nested
            // Cargo.toml. The wprof Makefile shells out to cargo at
            // exactly TWO sub-crates per `wprof-src/src/Makefile`:
            // L125 `cd $(LIBBLAZESYM_SRC) && $(CARGO) build` and L133
            // `cd $(LIBDEMANGLE_SRC) && $(CARGO) build`. blazesym IS
            // self-contained (its own `[workspace]` + `[workspace.lints]`
            // at the root of `wprof-src/blazesym/Cargo.toml`) so cargo's
            // walk terminates there naturally — no patch needed.
            // demangle (`wprof-src/src/demangle/Cargo.toml`) has neither
            // `[workspace]` nor lints inheritance, so the upward walk
            // would reach ktstr-root's `[workspace]` via target/ —
            // failing with "current package believes it's in a workspace
            // when it's not." The sentinel patch at L656+ appends an
            // empty `[workspace]` table to demangle's manifest, which
            // breaks the walk without losing any inheritance (demangle
            // has no `[lints] workspace = true`).
            //
            // vmlinux.h/ also has a Cargo.toml but the Makefile
            // references vmlinux.h ONLY as a header source
            // (`VMLINUX := ../vmlinux.h/include/$(ARCH)/vmlinux.h` at
            // L41), never via `cd vmlinux.h && cargo X`. If a future
            // wprof Makefile change adds such an invocation, vmlinux.h's
            // Cargo.toml will need the same sentinel patch.
            //
            // Tradeoff acknowledged: `cargo clean && cargo build`
            // re-clones the FULL wprof tree (~590MB working tree of
            // which ~20MB is .git after `--depth=1 --shallow-submodules`)
            // — measured 60+ seconds wall time on slow CI links.
            // Within a single cargo invocation, build.rs runs ONCE per
            // (package, profile, feature-combo) thanks to cargo's
            // build-script dedup, so multi-target builds against the
            // same ktstr package amortise the clone. Across different
            // cargo invocations (e.g. dev iteration switching between
            // debug and release), each invocation does its own clone.
            // The cost is acceptable in exchange for: (1) per-workspace
            // isolation — different ktstr checkouts can't accidentally
            // share a stale wprof version (the prior cache tracked
            // upstream HEAD with no pin), (2) `cargo clean` consistency
            // — no out-of-band `~/.cache/ktstr/wprof-src` rm needed,
            // (3) drop of ~70 lines of flock + XDG-resolution
            // infrastructure. Operators who want incremental builds
            // should prefer `cargo build -p ktstr` over `cargo clean`.
            let wprof_src = out_dir.join("wprof-src");
            // .git/HEAD is the strongest single-file signal for clone
            // completeness vs the prior Makefile-only check. A partial
            // `git clone` that fails mid-checkout leaves the working
            // tree empty/incomplete (Makefile possibly absent) but
            // .git/HEAD is created EARLIER, during init. Require BOTH:
            // .git/HEAD (init reached) AND src/Makefile (working tree
            // populated). Failing either means the cache is half-baked
            // and needs to be wiped before re-clone. The
            // `is_wprof_clone_complete` predicate (build_helpers.rs)
            // encodes this rule + has unit-test coverage for each
            // failure case.
            let wprof_makefile = wprof_src.join("src").join("Makefile");
            if wprof_src.exists() && !is_wprof_clone_complete(&wprof_src) {
                std::fs::remove_dir_all(&wprof_src).expect("remove incomplete wprof-src");
            }

            if !wprof_makefile.exists() {
                let git_url = "https://github.com/anakryiko/wprof.git";
                // Recursive clone over flaky networks fails partway
                // through one of the submodules (libbpf, bpftool,
                // blazesym, vmlinux.h, usdt, strobelight-libs) — the
                // shallow `git clone --depth=1 --shallow-submodules`
                // is one-shot; if it errors, the dir is left in an
                // incomplete state. Retry with bounded attempts +
                // exponential backoff via the shared
                // `retry_with_backoff` helper (also used by the
                // busybox tarball download with `MAX_TARBALL_ATTEMPTS
                // = 4`). Both call sites share backoff timing,
                // attempt counting, and log wording.
                //
                // Per-attempt cleanup of partial wprof_src lives
                // INSIDE the closure (see L545+).
                println!(
                    "cargo:warning=cloning {git_url} into {} (recursive — \
                 pulls libbpf, bpftool, blazesym, vmlinux.h, usdt, \
                 strobelight-libs)",
                    wprof_src.display()
                );
                const MAX_CLONE_ATTEMPTS: u32 = 4;
                let clone_attempt = |i: u32| -> Result<(), String> {
                    // After a failed attempt, wprof_src may be in a
                    // partial-clone state — git refuses to clone into a
                    // non-empty dir. Wipe before retry; swallow cleanup
                    // errors with a log so the retry still proceeds (if
                    // the partial state genuinely blocks the next clone,
                    // git will surface the error in this iteration's
                    // status). First attempt skips because the outer
                    // !exists() check above guaranteed the dir is empty.
                    if i > 1
                        && let Err(e) = std::fs::remove_dir_all(&wprof_src)
                    {
                        println!(
                            "cargo:warning=wprof partial-clone cleanup before attempt {i} \
                         failed: {e}; continuing to next attempt anyway"
                        );
                    }
                    // GIT_CONFIG_GLOBAL=/dev/null +
                    // GIT_CONFIG_SYSTEM=/dev/null bypass any host-level
                    // `~/.gitconfig` / `/etc/gitconfig` rewriting
                    // (e.g. `url.<base>.insteadOf`) that would re-route
                    // the public github.com URL through a private proxy.
                    // Build.rs must work reproducibly on any host AND
                    // must never bake host-private endpoints into the
                    // build graph. Repository URL stays the upstream
                    // public one.
                    //
                    // GIT_TERMINAL_PROMPT=0 + GIT_ASKPASS=/bin/false
                    // prevent git from blocking the build on a stdin
                    // credential prompt when an HTTP 401/403 hits.
                    // A retry that hangs on a prompt
                    // is worse than no retry — fail fast and let the
                    // outer panic surface the error.
                    //
                    // http.lowSpeedLimit=1000 + http.lowSpeedTime=60
                    // bound each attempt: git aborts the transfer if
                    // throughput stays below 1 KB/s for 60 s. Without
                    // this, a half-open TCP connection (NAT timeout,
                    // blackholed route) hangs git until the OS TCP
                    // keepalive fires — typically minutes to hours
                    // per attempt. Passing via `-c key=value` rather
                    // than env vars keeps the setting scoped to this
                    // single invocation.
                    let status = Command::new("git")
                        .env("GIT_CONFIG_GLOBAL", "/dev/null")
                        .env("GIT_CONFIG_SYSTEM", "/dev/null")
                        .env("GIT_TERMINAL_PROMPT", "0")
                        .env("GIT_ASKPASS", "/bin/false")
                        .arg("-c")
                        .arg("http.lowSpeedLimit=1000")
                        .arg("-c")
                        .arg("http.lowSpeedTime=60")
                        .arg("clone")
                        .arg("--recurse-submodules")
                        .arg("--depth=1")
                        .arg("--shallow-submodules")
                        .arg(git_url)
                        .arg(&wprof_src)
                        .stdout(Stdio::inherit())
                        .stderr(Stdio::inherit())
                        .status()
                        .expect("spawn git clone for wprof");
                    if status.success() {
                        Ok(())
                    } else {
                        Err(format!("git clone exited {status}"))
                    }
                };
                if let Err(err) =
                    retry_with_backoff("wprof git clone", MAX_CLONE_ATTEMPTS, clone_attempt)
                {
                    panic!(
                        "wprof git clone failed after {MAX_CLONE_ATTEMPTS} attempts \
                     (last error: {err}). Check network connectivity to \
                     {git_url}; if the cache directory is in an \
                     unrecoverable state, `rm -rf {}` and re-run `cargo build`.",
                        wprof_src.display()
                    );
                }
            }

            // Patch wprof-src/src/demangle/Cargo.toml with a sentinel
            // `[workspace]` table to break the upward workspace walk
            // before invoking make. The Makefile shells out to
            // `cd demangle && cargo build`; without the sentinel,
            // cargo walks UP from demangle and finds the ktstr
            // workspace at the repository root (because OUT_DIR is
            // under target/), failing with "current package believes
            // it's in a workspace when it's not." An empty `[workspace]`
            // table tells cargo to stop the walk at demangle — and
            // since demangle has no lints inheritance, no semantics
            // are affected.
            //
            // Idempotent: subsequent builds SHORT-CIRCUIT when the
            // exact-line `[workspace]` declaration is already present
            // (gate at L671). The append path only fires on the first
            // build after a clean clone. The check matches lines
            // EXACTLY (not substring) to avoid false-positives on
            // `[workspace.lints]` or commented `# [workspace]` —
            // either would trick a substring check into skipping the
            // append even though the real sentinel table isn't there.
            let demangle_manifest = wprof_src.join("src").join("demangle").join("Cargo.toml");
            if demangle_manifest.exists() {
                let existing = std::fs::read_to_string(&demangle_manifest)
                    .unwrap_or_else(|e| panic!("read {}: {e}", demangle_manifest.display()));
                let already_patched = existing.lines().any(|l| l.trim() == "[workspace]");
                if !already_patched {
                    use std::io::Write;
                    let mut f = std::fs::OpenOptions::new()
                        .append(true)
                        .open(&demangle_manifest)
                        .unwrap_or_else(|e| {
                            panic!("open {} for append: {e}", demangle_manifest.display())
                        });
                    f.write_all(b"\n[workspace]\n").unwrap_or_else(|e| {
                        panic!("append [workspace] to {}: {e}", demangle_manifest.display())
                    });
                }
            }

            // Build wprof.  Single-threaded `-j1` instead of `-j{nproc}`:
            // the upstream wprof Makefile has a missing prerequisite
            // edge between the `libdemangle_c.a` build (a recursive
            // `cargo build` inside the demangle sub-crate) and the
            // sibling `cp` that copies the produced archive into
            // wprof's OUTPUT dir.  Under `-jN` the `cp` races the
            // cargo build and fires before the .a exists, surfacing
            // as `cp: cannot stat .../libdemangle_c.a` → `wprof build
            // failed`.  `-j1` serialises the recipe so the dependency
            // ordering the Makefile *intends* is the ordering it gets.
            // The wall-time cost is small in practice: the dominant
            // builds (blazesym, demangle) are individual `cargo build`
            // invocations that already parallelise internally per
            // CARGO_BUILD_JOBS / `--jobs`, so `make`'s outer
            // parallelism would only overlap distinct cargo
            // invocations against each other — which is exactly the
            // pattern that triggers the race.
            let status = Command::new("make")
                .arg("-j1")
                .current_dir(wprof_src.join("src"))
                .stdout(Stdio::inherit())
                .stderr(Stdio::inherit())
                .status()
                .expect("spawn make for wprof");
            assert!(status.success(), "wprof build failed");

            // The wprof Makefile emits the binary at src/wprof (the
            // submodule-init + libbpf-link pattern in
            // github.com/anakryiko/wprof/src/Makefile).
            let built_bin = wprof_src.join("src").join("wprof");
            assert!(
                built_bin.exists(),
                "wprof build succeeded but binary not found at expected path: {}",
                built_bin.display()
            );
            std::fs::copy(&built_bin, &wprof_bin).expect("copy wprof binary to OUT_DIR");
        }
    } // #[cfg(feature = "wprof")]
}

/// SHA-256 hex digest of the upstream busybox-1.36.1 release tarball
/// (`busybox-1_36_1.tar.gz` from the `mirror/busybox` github archive).
///
/// **Sentinel value**: `""` means the pin is not yet recorded for this
/// checkout. In that case [`verify_busybox_tarball_sha256`] emits the
/// computed digest as a `cargo:warning` and continues — first-build
/// integration. To activate the verification gate, replace the empty
/// string with the printed digest, then commit. Subsequent builds
/// fail on mismatch.
///
/// **Rotation**: bumping the busybox version requires updating BOTH
/// the URL in the `fetch_busybox_tarball` call site AND this pin in
/// lockstep — a partial edit produces a SHA mismatch on the next
/// fetch (fail-loud, not silent-pull-wrong-bytes).
///
/// **Why a custom pin instead of cargo's vendoring**: cargo's
/// vendoring covers crate sources, not arbitrary C-source tarballs
/// downloaded by a build script. The verification has to live in
/// `build.rs` itself.
const BUSYBOX_TARBALL_SHA256: &str = "";

/// Fetch the upstream busybox tarball with retry; return the raw
/// gzip-compressed bytes (NOT yet SHA-verified — caller passes the
/// returned buffer through [`verify_busybox_tarball_sha256`] before
/// extracting). Extracted from the prior in-line download so the
/// `KTSTR_BUSYBOX_TARBALL` operator override can read a local file
/// through the same downstream pipeline.
fn fetch_busybox_tarball(url: &str) -> Vec<u8> {
    // Authenticated GitHub requests get 1000/hr per token vs the
    // 60/hr IP-based unauth limit. GitHub Actions auto-issues
    // GITHUB_TOKEN per job; outside CI the env var is typically
    // absent and the request goes unauth, which still works for
    // public repos at low rate.
    let github_token = std::env::var("GITHUB_TOKEN").ok();
    let attempt = |attempt_idx: u32| -> Result<Vec<u8>, String> {
        // `timeout()` bounds the whole request including the body
        // when read via `.bytes()` (which uses `wait::timeout`
        // internally per `reqwest::blocking::Response::bytes`),
        // but does NOT apply when reading the response via the
        // `Read` trait -- streaming bypasses reqwest's timeout
        // machinery so a slow-drip server can hang the build
        // indefinitely. Buffer the body so the timeout actually
        // fires.
        //
        // Proxy support: reqwest automatically reads proxy configuration
        // from environment variables (HTTP_PROXY, HTTPS_PROXY, NO_PROXY
        // and their lowercase variants). In corporate or restricted
        // network environments, ensure these variables are set if a
        // proxy is required to reach github.com.
        let mut client_builder = reqwest::blocking::Client::builder()
            .timeout(std::time::Duration::from_secs(120))
            .connect_timeout(std::time::Duration::from_secs(30))
            .user_agent(concat!("ktstr-build/", env!("CARGO_PKG_VERSION")));

        // Explicitly configure proxy from environment if set.
        // reqwest reads these automatically, but we configure explicitly
        // to ensure proxy is used and to provide better error messages.
        // Supports: HTTP_PROXY, HTTPS_PROXY, NO_PROXY (and lowercase variants)
        if let Ok(proxy_url) = std::env::var("HTTPS_PROXY")
            .or_else(|_| std::env::var("https_proxy"))
            .or_else(|_| std::env::var("HTTP_PROXY"))
            .or_else(|_| std::env::var("http_proxy"))
        {
            let proxy = reqwest::Proxy::all(&proxy_url)
                .map_err(|e| format!("invalid proxy URL {proxy_url}: {e}"))?;
            client_builder = client_builder.proxy(proxy);
        }

        let client = client_builder
            .build()
            .map_err(|e| format!("http client: {e}"))?;
        let mut req = client.get(url);
        if let Some(ref token) = github_token {
            req = req.bearer_auth(token);
        }
        let resp = req
            .send()
            .and_then(|r| r.error_for_status())
            .map_err(|e| format!("attempt {attempt_idx} request: {e}"))?;
        let body = resp
            .bytes()
            .map_err(|e| format!("attempt {attempt_idx} body: {e}"))?;
        Ok(body.to_vec())
    };

    println!("cargo:warning=downloading busybox source tarball from {url}");
    const MAX_TARBALL_ATTEMPTS: u32 = 4;
    retry_with_backoff("busybox tarball download", MAX_TARBALL_ATTEMPTS, attempt).unwrap_or_else(
        |e| {
            panic!(
                "failed to obtain busybox source after {MAX_TARBALL_ATTEMPTS} attempts.\n\
             tarball ({url}): {e}\n\
             Remediation:\n\
               • Check network connectivity (the build script needs HTTPS\n\
                 access to github.com to fetch the upstream tarball).\n\
               • If behind a proxy, ensure HTTP_PROXY/HTTPS_PROXY environment\n\
                 variables are set (e.g., export HTTPS_PROXY=http://proxy:8080).\n\
               • Or set KTSTR_BUSYBOX_TARBALL=<path> to point at a\n\
                 pre-fetched local copy of {url} — useful for air-gapped\n\
                 CI runners and hermetic build environments.\n\
               • Or set KTSTR_SKIP_BUSYBOX_BUILD=1 to skip the busybox\n\
                 compile entirely (shell mode will be unavailable in the\n\
                 resulting cargo-ktstr binary).",
            )
        },
    )
}

/// Verify the downloaded busybox tarball against [`BUSYBOX_TARBALL_SHA256`].
///
/// Three outcomes:
///
///   - **Pin empty**: log the computed digest as a `cargo:warning` and
///     continue. First-build bootstrap path — the operator pastes the
///     printed value into `BUSYBOX_TARBALL_SHA256` to lock the pin.
///   - **Pin matches**: silent pass.
///   - **Pin mismatches**: panic with both digests. The operator
///     investigates: a regenerated upstream archive (github does this
///     rarely; cf. the 2023 git-archive checksum change) requires a
///     pin refresh, whereas an unexplained mismatch on a fixed pin
///     indicates supply-chain tampering and warrants investigation
///     before the bytes hit the build.
fn verify_busybox_tarball_sha256(tarball_bytes: &[u8]) {
    use sha2::{Digest, Sha256};
    let actual = {
        let mut hasher = Sha256::new();
        hasher.update(tarball_bytes);
        hex_encode_lowercase(&hasher.finalize())
    };
    if BUSYBOX_TARBALL_SHA256.is_empty() {
        println!(
            "cargo:warning=BUSYBOX_TARBALL_SHA256 is unset — first-build \
             bootstrap. Computed SHA-256: {actual}\n\
             To lock the pin: update BUSYBOX_TARBALL_SHA256 in build.rs to\n\
             this value and commit. Subsequent builds will fail on mismatch."
        );
        return;
    }
    if !BUSYBOX_TARBALL_SHA256.eq_ignore_ascii_case(&actual) {
        panic!(
            "busybox tarball SHA-256 mismatch.\n\
             expected: {BUSYBOX_TARBALL_SHA256}\n\
             actual:   {actual}\n\
             \n\
             Diagnose:\n\
               • If the upstream archive was regenerated (rare — github\n\
                 changed archive generation in early 2023, otherwise these\n\
                 tarballs are stable for years), update BUSYBOX_TARBALL_SHA256\n\
                 in build.rs to the new digest after independently verifying\n\
                 the source.\n\
               • Otherwise treat as a supply-chain alert: compare against\n\
                 the upstream SHA published by the busybox maintainers\n\
                 before continuing."
        );
    }
}

/// Lowercase hex-encode a byte slice. Inlined to avoid pulling `hex`
/// into `[build-dependencies]` for a single 32-byte digest.
fn hex_encode_lowercase(bytes: &[u8]) -> String {
    use std::fmt::Write;
    let mut s = String::with_capacity(bytes.len() * 2);
    for b in bytes {
        write!(&mut s, "{b:02x}").expect("write to String never fails");
    }
    s
}

/// Scan src/budget.rs for `const NAME_SHIFT: u32 = N;` declarations
/// and emit a `pub(crate) const ALL_SHIFTS: &[(u32, &str)]` slice
/// into `OUT_DIR/shift_registry.rs`. The slice is sorted by value
/// for stable test output.
///
/// Pattern: line.trim() starts with `const `, contains `: u32 = `
/// literal, name part ends with `_SHIFT`, value part parses as u32
/// (trailing `;` stripped). All four conditions must hold; a line
/// failing any one is skipped.
///
/// This is a deliberate text-scan, not a full Rust parser. Trade-offs:
/// - Full-line comments (`//`, `/* */`, `///`) start with `/`, not
///   `const` — never false-positive. Inline trailing comments on a
///   const line (e.g. `const X_SHIFT: u32 = 5; // foo`) leave the
///   comment text past the `;`; `trim_end_matches(';')` strips only
///   the trailing `;` so the parse-as-u32 step panics fail-loud
///   rather than silently dropping the entry.
/// - String literals containing `SHIFT:` live inside non-const lines
///   — never false-positive. EXCEPTION: a raw multi-line string
///   literal `r#"\nconst FOO_SHIFT: u32 = 4;\n"#` containing a
///   const-shaped line would false-positive (line.trim() yields the
///   raw const text). Low probability — budget.rs holds no such
///   literals today — and surfaces loudly: the false-positive grows
///   the registry by an entry no hand-classified enumeration
///   references, so the test's `unclassified` arm fires (asserts
///   `ALL_SHIFTS.filter(!classified.contains(v))` is empty), NOT a
///   silent drop.
/// - Macro-generated constants emit no source text — invisible to the
///   scan (false negative; documented by naming convention).
/// - `static FOO_SHIFT` and lowercase-named constants — both invisible
///   (false negative; violates Rust convention anyway).
/// - Const expressions whose RHS is non-integer (e.g.
///   `const X_SHIFT: u32 = OTHER + 1;`) — fail-loud panic, not silent
///   drop.
/// - The `: u32 = ` split anchor is rustfmt-canonical (single space
///   each side). A future rustfmt change to multi-space or no-space
///   formatting would cause the scan to miss every existing SHIFT
///   const. The test fails loudly on the first build after such a
///   change: registry shrinks, so each hand-classified SHIFT value
///   appears in `phantom_one_bit` (one_bit_values.difference(&registry))
///   or `phantom_multi_bit` (multi_bit_values.difference(&registry)),
///   tripping the phantom assertion. The regression surfaces
///   immediately, not on the next addition.
///
/// The hand-classified test enumerations in `src/budget.rs::tests`
/// are the consumer; the `all_shifts_classified_in_exactly_one_enumeration`
/// test asserts the union of the two hand-spelled lists equals this
/// generated set.
fn generate_shift_registry(out_dir: &std::path::Path) {
    use std::fmt::Write;
    println!("cargo::rerun-if-changed=src/budget.rs");
    let budget_rs = std::fs::read_to_string("src/budget.rs")
        .expect("read src/budget.rs for shift-registry scan");
    let mut shifts: Vec<(u32, String)> = Vec::new();
    for line in budget_rs.lines() {
        let line = line.trim();
        let Some(rest) = line.strip_prefix("const ") else {
            continue;
        };
        let Some((name_part, val_part)) = rest.split_once(": u32 = ") else {
            continue;
        };
        let name = name_part.trim();
        if !name.ends_with("_SHIFT") {
            continue;
        }
        let val_str = val_part.trim_end_matches(';').trim();
        let val: u32 = val_str.parse().unwrap_or_else(|e| {
            panic!("shift-registry scan: parse `{val_str}` as u32 for {name}: {e}")
        });
        shifts.push((val, name.to_string()));
    }
    shifts.sort_by_key(|(v, _)| *v);

    let mut out = String::from(
        "// Generated by build.rs. Lists every `const *_SHIFT: u32 = N;`\n\
         // declaration in src/budget.rs, sorted by shift value. The\n\
         // budget tests assert their hand-classified one-bit and\n\
         // multi-bit enumerations cover every entry so a new SHIFT\n\
         // cannot land without being classified into the right test.\n\
         pub(crate) const ALL_SHIFTS: &[(u32, &str)] = &[\n",
    );
    for (v, name) in &shifts {
        writeln!(out, "    ({v}, \"{name}\"),").expect("write shift entry");
    }
    out.push_str("];\n");
    std::fs::write(out_dir.join("shift_registry.rs"), out).expect("write shift_registry.rs");
}

/// 64-bit SipHash-1-3 of `bytes`. Used to detect BTF content drift
/// between `vmlinux.h` regenerations.
///
/// Algorithm mirrors `src/test_support/sidecar.rs::sidecar_variant_hash`
/// — `SipHasher13::new_with_keys(0, 0)` + `h.write(bytes)` +
/// `h.finish()`. Zero keys are deliberate: this is a drift hash, not
/// a DoS-mitigation hash, and stable (key-less) output lets a future
/// build.rs invocation compare against a sidecar written by a prior
/// run without coordinating on a key. SipHasher13 is faster than
/// SipHasher24 at the cost of reduced crypto strength — acceptable
/// because the hash is a build-artifact sidecar, not a signed
/// manifest.
fn siphash_13(bytes: &[u8]) -> u64 {
    use siphasher::sip::SipHasher13;
    use std::hash::Hasher;
    let mut h = SipHasher13::new_with_keys(0, 0);
    h.write(bytes);
    h.finish()
}

/// SipHasher13 fingerprint of every non-test `.rs` file under the
/// cast-analysis source dirs: the analyzer in `src/monitor/cast_analysis`,
/// its on-demand loader in `src/vmm/cast_analysis_load`,
/// `src/monitor/sdt_alloc` (whose `discover_payload_btf_id` +
/// `MAX_BTF_ID_PROBE` resolve the cached `alloc_size_types`), and
/// `src/monitor/btf_render` + `src/monitor/bpf_map` (whose
/// `peel_modifiers` / `type_size` / `resolve_to_struct_id` resolve every
/// cast's terminal type, 20+ call sites in cast_analysis/mod.rs). The
/// hash is folded into the disk-cache key (`persist.rs::cache_path`) so
/// the cache self-invalidates on any analyzer change without a manual
/// `SCHEMA_VERSION` bump. Files named `tests.rs` are excluded; inline
/// `#[cfg(test)]` modules in the watched `.rs` files are still hashed, so
/// a test-only edit to such a file does invalidate the cache — the safe,
/// over-conservative direction (never a stale serve). Each watched dir
/// gets a `rerun-if-changed` so cargo re-runs build.rs (recomputing the
/// env) when the analyzer source changes; a missing watched dir is a
/// hard error (see the loop body), not a silent skip. Crate-version
/// drift (btf-rs / libbpf) is handled separately by
/// [`cargo_lock_fingerprint`], which is folded alongside this into the
/// cast cache key — so this fingerprint covers only the analyzer's own
/// source.
fn cast_analyzer_fingerprint() -> u64 {
    use siphasher::sip::SipHasher13;
    use std::hash::Hasher;
    let mut files: Vec<PathBuf> = Vec::new();
    for dir in [
        "src/monitor/cast_analysis",
        "src/vmm/cast_analysis_load",
        // sdt_alloc feeds the cached output's `alloc_size_types` via
        // `discover_payload_btf_id` + `MAX_BTF_ID_PROBE` (see
        // cast_analysis_load::build_cast_analysis_from_bytes's alloc-size
        // resolution loop), so a change there alters the cached result and
        // must invalidate it -- same footgun class this fingerprint closes.
        "src/monitor/sdt_alloc",
        // cast_analysis resolves every cast's terminal type through
        // btf_render::{peel_modifiers,peel_modifiers_with_id,type_size}
        // and bpf_map::resolve_to_struct_id (20+ call sites in
        // cast_analysis/mod.rs); a change to either module's modifier-peel
        // / struct-resolve traversal alters the cached cast map for an
        // unchanged binary -- same footgun. Their callees stay within
        // btf-rs (a crate dep) + std, so the watched-source closure ends
        // here; btf-rs / libbpf crate-version drift is caught by the
        // whole-Cargo.lock fingerprint folded into the cast cache key
        // (cargo_lock_fingerprint + persist::cache_path).
        // Whole-subtree rather than per-fn because extracting
        // individual fns needs a parser; the extra invalidations on
        // unrelated edits in these modules are cheap (one BPF-object
        // re-analysis) and these modules already invalidate other caches
        // when they change.
        "src/monitor/btf_render",
        "src/monitor/bpf_map",
    ] {
        println!("cargo:rerun-if-changed={dir}");
        // Fail loud if a watched dir is missing: a typo or a layout move
        // would otherwise silently drop that dir's contribution and
        // resurrect the stale-cache footgun this fingerprint exists to
        // close. collect_fingerprint_files tolerates a missing dir for its
        // recursion case, so the top-level existence guard lives here.
        let path = std::path::Path::new(dir);
        assert!(
            path.is_dir(),
            "cast-analysis fingerprint dir missing: {dir} (layout moved? update build.rs)"
        );
        collect_fingerprint_files(path, &mut files);
    }
    // Sort for a deterministic hash independent of readdir order.
    files.sort();
    let mut h = SipHasher13::new_with_keys(0, 0);
    for f in &files {
        // Hash the path too so a rename (without content change) still
        // perturbs the fingerprint.
        h.write(f.to_string_lossy().as_bytes());
        let bytes = std::fs::read(f)
            .unwrap_or_else(|e| panic!("read {} for analyzer fingerprint: {e}", f.display()));
        h.write(&bytes);
    }
    h.finish()
}

/// SipHasher13 fingerprint of the entire `Cargo.lock`, emitted by
/// build.rs as `KTSTR_CARGO_LOCK_FINGERPRINT` and folded into the
/// cast-analysis cache key (see
/// `vmm::cast_analysis_load::persist::cache_path`). A dependency bump —
/// a `btf-rs` (BTF parsing) or `libbpf-rs` / `libbpf-sys` (BPF-opcode
/// constants) version change — can alter the cast map with no ktstr
/// source change, so the analyzer-source fingerprint alone would serve a
/// stale result. Only the cast cache folds this in; the kernels / models
/// / disk_template caches are produced by external tools and are
/// dependency-independent. Hashing the WHOLE lockfile invalidates the
/// cast cache on any dependency bump, even unrelated crates — the safe
/// over-conservative direction (never a stale serve), costing one cast
/// re-analysis per scheduler binary per lockfile change.
/// `rerun-if-changed` re-runs build.rs on a lockfile bump.
fn cargo_lock_fingerprint() -> u64 {
    use siphasher::sip::SipHasher13;
    use std::hash::Hasher;
    println!("cargo:rerun-if-changed=Cargo.lock");
    // Fail loud on an unreadable Cargo.lock rather than hashing the
    // empty-string default (a constant that would let machines with
    // different dependency sets share a cache entry) — mirrors
    // cast_analyzer_fingerprint's panic-on-read-failure posture.
    let lock = std::fs::read_to_string("Cargo.lock")
        .unwrap_or_else(|e| panic!("read Cargo.lock for dependency fingerprint: {e}"));
    let mut h = SipHasher13::new_with_keys(0, 0);
    h.write(lock.as_bytes());
    h.finish()
}

/// Recursively collect non-test `.rs` files under `dir` into `out`.
/// A missing dir returns no files (tolerant for the recursion case);
/// the caller asserts each top-level watched dir exists, so a typo'd or
/// moved analyzer dir fails the build loudly rather than silently
/// dropping its fingerprint contribution.
fn collect_fingerprint_files(dir: &std::path::Path, out: &mut Vec<PathBuf>) {
    let Ok(entries) = std::fs::read_dir(dir) else {
        return;
    };
    for entry in entries.flatten() {
        let path = entry.path();
        if path.is_dir() {
            collect_fingerprint_files(&path, out);
        } else if path.extension().and_then(|e| e.to_str()) == Some("rs")
            && path.file_name().and_then(|n| n.to_str()) != Some("tests.rs")
        {
            out.push(path);
        }
    }
}