ktstr 0.6.0

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
//! Kernel source acquisition: tarball download, git clone, local tree.
//!
//! Three entry points — [`download_tarball`], [`git_clone`], and
//! [`local_source`] — each return an [`AcquiredSource`] carrying the
//! source directory, cache key, and metadata the caller needs to
//! proceed to configuration and build.

use std::io::Read;
use std::num::NonZeroU32;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use std::time::{Duration, Instant};

use anyhow::{Context, Result, anyhow};
use reqwest::blocking::Client;
use sha2::{Digest, Sha256};

/// Process-wide [`reqwest::blocking::Client`] lazily initialized on
/// first access via [`shared_client`]. Keeping a single `Client`
/// instance across the fetch-family reuses its TCP connection pool
/// and TLS session cache across repeated calls to the same host
/// within a CLI run. Cross-host fetches in the same run still
/// re-handshake because reqwest's connection pool keys on host.
static SHARED_CLIENT: OnceLock<Client> = OnceLock::new();

/// Connect-phase timeout for [`shared_client`]: bounds the time spent
/// in the TCP + TLS handshake before reqwest gives up on a peer.
/// Bounds the dead-route case — a CDN edge that accepts the SYN but
/// stalls the handshake, or a route that blackholes outright —
/// without putting any ceiling on the response body's streaming
/// duration once the connection is up.
///
/// No total request `.timeout()` is set: the same client serves both
/// short requests (directory listings, releases.json) and large
/// tarball streams ([`download_stable_tarball`],
/// [`download_rc_tarball`]), where a 130–180 MiB compressed payload
/// over a slow uplink can take minutes of wall-clock to deliver.
/// Capping that with a per-request timeout would abort legitimate
/// downloads; bounding only the connect phase preserves the
/// dead-route guarantee while letting
/// the body stream as long as the upstream is making forward
/// progress.
const SHARED_CLIENT_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);

/// Return the process-wide shared [`reqwest::blocking::Client`]. First
/// call constructs it via `Client::builder()` with
/// `SHARED_CLIENT_CONNECT_TIMEOUT` applied; every subsequent call
/// returns a reference to the same instance. This helper is for
/// top-level CLI entries that want the default client.
///
/// Tests that need to verify a network round-trip (rather than a
/// cache hit) must NOT pass `shared_client()` to a cache-routed
/// helper (`cached_releases`, `cached_releases_with`,
/// [`fetch_latest_stable_version`], [`fetch_version_for_prefix`]) —
/// `RELEASES_CACHE` may already be populated by a peer test, in
/// which case the helper returns cached data and the network is
/// never touched. Construct a local `Client` and pass it to the
/// cache-routed helper to skip the cache; the pointer-equality gate
/// in `cached_releases_with` routes a non-singleton client to a
/// direct `fetch_releases` call against `RELEASES_URL` (the
/// production URL — the bypass skips the cache, NOT the URL). For
/// full URL injection (e.g. localhost mock server testing), call
/// either `fetch_releases` directly with the mock URL — see
/// `fetch_releases_against_localhost_mock_returns_parsed` — or use
/// the cache-aware seam `cached_releases_with_url`, which routes
/// the non-singleton bypass branch through the supplied URL while
/// preserving the singleton/cache routing identical to
/// `cached_releases_with`.
///
/// # Panics
///
/// Panics on the first call if `Client::builder().build()` fails to
/// construct a client. The documented failure modes are TLS backend
/// initialization (e.g. rustls/native-tls subsystem unreachable) and
/// are treated as setup bugs rather than runtime errors. The
/// `expect` here, rather than propagating the error, mirrors the
/// inherited behavior of `reqwest::blocking::Client::new()` (which
/// is itself an infallible wrapper around `builder().build().expect`).
pub fn shared_client() -> &'static Client {
    SHARED_CLIENT.get_or_init(|| {
        Client::builder()
            .connect_timeout(SHARED_CLIENT_CONNECT_TIMEOUT)
            .build()
            .expect("build shared reqwest client")
    })
}

/// Process-wide cache of the parsed `releases.json` payload.
/// Populated by [`cached_releases_with`] on its first successful
/// singleton-path fetch; every subsequent singleton call returns a
/// clone of the cached vector without re-issuing the HTTP request.
/// Lifetime matches the process — `releases.json` does not change
/// underneath a single CLI invocation, so a per-process cache
/// cannot serve stale data in any way the user would notice.
///
/// Failures are NOT cached: a transient kernel.org outage that
/// errors the first call must allow a later caller to retry, since
/// the underlying network condition may have cleared. Storing
/// `Vec<Release>` rather than `Result<Vec<Release>>` enforces this
/// at the type level — there's no way to populate the cache with
/// a failure.
///
/// Companion to [`SHARED_CLIENT`]: both amortize per-invocation
/// network cost across the resolve pipeline. Without this cache,
/// `cargo ktstr test --kernel 6.10..6.12 --kernel 6.14..6.16`
/// fetches `releases.json` twice — once per Range spec — under
/// the rayon par_iter that drives `resolve_kernel_set`. With
/// the cache the first Range to reach `expand_kernel_range`
/// populates the slot; the second observes the populated slot
/// and skips the network entirely.
static RELEASES_CACHE: OnceLock<Vec<Release>> = OnceLock::new();

/// Fetch `releases.json` via the process-wide [`shared_client`],
/// routing through [`RELEASES_CACHE`].
///
/// Thin wrapper for callers that don't already thread a `&Client`
/// — top-level CLI entries like [`crate::cli::expand_kernel_range`]
/// (under the rayon-driven `cargo ktstr` resolve pipeline) and
/// `crate::cli::fetch_active_prefixes` (the EOL-annotation pass).
/// Caching, race semantics, and fault-injection routing are all
/// documented on [`cached_releases_with`].
pub(crate) fn cached_releases() -> Result<Vec<Release>> {
    cached_releases_with(shared_client())
}

/// Pointer-equality against the [`OnceLock`]-backed
/// [`shared_client`] singleton is the correct predicate because
/// `shared_client()` returns a stable `&'static Client` address.
/// The [`cached_releases_with`] gate uses this predicate to
/// decide whether to consult [`RELEASES_CACHE`]: the singleton
/// hits the cache, every other (test-constructed) `Client`
/// bypasses it and exercises the underlying [`fetch_releases`]
/// path.
///
/// Caveat: `shared_client().clone()` produces a distinct
/// `Client` at a different address even though it shares the
/// singleton's connection pool via the inner `Arc`, so the
/// clone bypasses the cache. Always pass `shared_client()`
/// directly — never a clone — when cache routing is desired.
///
/// Side-effect-free when [`SHARED_CLIENT`] is uninitialized:
/// no client can equal a not-yet-allocated singleton, so we
/// return `false` without triggering `get_or_init` — tests
/// that pass a local `Client` before any production code path
/// has touched the singleton skip the construction entirely.
fn is_shared_client(client: &Client) -> bool {
    match SHARED_CLIENT.get() {
        Some(singleton) => std::ptr::eq(client, singleton),
        None => false,
    }
}

/// Unified cache-aware entry point for `releases.json`. Routes
/// the process-wide [`shared_client`] singleton through
/// [`RELEASES_CACHE`]; any other (test-constructed) `Client`
/// bypasses [`RELEASES_CACHE`] and calls [`fetch_releases`] with
/// [`RELEASES_URL`] directly — the cache is skipped but the
/// production URL is used.
///
/// Used by every in-file caller that already threads a `&Client`
/// — [`fetch_latest_stable_version`], [`fetch_version_for_prefix`],
/// [`latest_in_series`] — so production callers reuse
/// [`RELEASES_CACHE`] and tests still get cache-bypass via the
/// pointer-equality gate. [`cached_releases`] is the no-`Client`
/// wrapper for top-level CLI entries.
///
/// Tests that need URL injection on the bypass branch (e.g.
/// localhost mock server testing) call
/// [`cached_releases_with_url`] directly with their mock URL —
/// the URL-injectable form preserves identical routing
/// semantics. This wrapper is the production entry point and
/// pins the URL to [`RELEASES_URL`]; production code MUST go
/// through this wrapper. A singleton call with a non-RELEASES_URL
/// would otherwise populate [`RELEASES_CACHE`] with
/// non-production data and corrupt every later production
/// call — the singleton-path branch in
/// [`cached_releases_with_url`] guards against this in both
/// dev (`debug_assert!`) and release builds (fall back to
/// bypass), but routing every production call through this
/// wrapper makes the misuse impossible by construction.
/// Caching, race semantics, and the bypass-vs-cache routing
/// are fully documented on [`cached_releases_with_url`].
fn cached_releases_with(client: &Client) -> Result<Vec<Release>> {
    cached_releases_with_url(client, RELEASES_URL)
}

/// URL-injectable form of [`cached_releases_with`]. Production
/// always reaches this through the [`cached_releases_with`]
/// wrapper, which pins `url` to [`RELEASES_URL`]; the explicit
/// `url` parameter exists so the bypass-branch test can route
/// the non-singleton path through a localhost
/// [`std::net::TcpListener`]-backed mock instead of hitting real
/// kernel.org. Without this seam, the bypass test would either
/// (a) require a real network round-trip on every run, or
/// (b) accept a 5s timeout penalty on offline hosts to surface
/// `Err` as a bypass-confirmation signal — both costs the seam
/// eliminates.
///
/// Cache contract is identical to [`cached_releases_with`]:
/// non-singleton clients bypass [`RELEASES_CACHE`] and call
/// [`fetch_releases`] with `url`; the singleton routes through
/// the cache only when `url == RELEASES_URL` (consulting via
/// `OnceLock::get`, populating via `OnceLock::set` on miss). A
/// singleton call with a non-RELEASES_URL trips the
/// `debug_assert!` in dev builds and falls back to the bypass
/// behavior in release builds — fetches directly via `url`,
/// returns the result, never touches [`RELEASES_CACHE`]. The
/// cache only ever stores data fetched from the singleton +
/// RELEASES_URL combination, so a test that injects a mock URL
/// on either branch cannot pollute the production cache.
///
/// Failures are propagated without populating [`RELEASES_CACHE`],
/// so a transient kernel.org outage on the first call lets the
/// next caller retry. Storing `Vec<Release>` (not
/// `Result<Vec<Release>>`) enforces this at the type level.
///
/// Concurrent population on the singleton path is safe via the
/// `OnceLock::set` race: the loser's `set` returns `Err(clone)`
/// (the cloned vector that was passed in is moved back), the
/// returned `Err` is discarded via `let _ = …`, and the loser
/// returns its own original `fresh` vector. Both winner and
/// loser return content-equivalent data since both fetched the
/// same `releases.json`. Worst case under concurrent first
/// calls: both callers issue the network round-trip, only one
/// populates [`RELEASES_CACHE`]; every later call — from any
/// thread — observes the populated slot via the `get` fast-path
/// and skips the network.
fn cached_releases_with_url(client: &Client, url: &str) -> Result<Vec<Release>> {
    // Non-singleton clients bypass the cache (test fault injection).
    if !is_shared_client(client) {
        return fetch_releases(client, url);
    }
    // Cache-poison guard: the singleton path populates
    // RELEASES_CACHE on miss. A test author that mistakenly
    // passes a non-production URL with shared_client() would
    // fill the cache with non-production data and corrupt every
    // later production call (which reaches the cache via
    // get-fast-path). Catch the misuse at debug-build time —
    // production callers always thread RELEASES_URL through the
    // `cached_releases_with` wrapper, so the assertion is a
    // no-op for them; only a future test author wiring this
    // function up with shared_client() and a mock URL would trip
    // it.
    debug_assert!(
        url == RELEASES_URL,
        "cached_releases_with_url: shared_client() must use RELEASES_URL \
         to avoid RELEASES_CACHE pollution — got url={url:?}, expected \
         RELEASES_URL ({RELEASES_URL:?}). Tests that need URL injection \
         must pass a non-singleton Client (which takes the bypass branch \
         above and never touches the cache).",
    );
    // Release-build guard: `debug_assert!` is stripped in
    // optimized builds, so a non-RELEASES_URL on the singleton
    // path would otherwise reach the populate-on-miss path below
    // and persistently poison RELEASES_CACHE for every later
    // production caller. Mirror the bypass-branch behavior
    // (fetch directly, do not touch the cache) so the misuse
    // degrades to a slow per-call fetch instead of a permanently
    // wrong cache. The debug_assert above still fires loudly in
    // dev builds; this branch only catches the misuse that
    // slipped through to release.
    if url != RELEASES_URL {
        return fetch_releases(client, url);
    }
    if let Some(cached) = RELEASES_CACHE.get() {
        return Ok(cached.clone());
    }
    let fresh = fetch_releases(client, url)?;
    // Race-loss: `set` returns `Err(clone)` carrying back the
    // clone we passed in; we discard it and return the original
    // `fresh` below. See the rustdoc above for full semantics.
    let _ = RELEASES_CACHE.set(fresh.clone());
    Ok(fresh)
}

/// Downloaded/cloned kernel source ready for building.
#[non_exhaustive]
pub struct AcquiredSource {
    /// Path to the kernel source directory.
    pub source_dir: PathBuf,
    /// Cache key for this source (e.g. "6.14.2-tarball-x86_64-kc{kconfig_hash}").
    pub cache_key: String,
    /// Version string if known (e.g. "6.14.2", "6.15-rc3").
    pub version: Option<String>,
    /// How the source was acquired, with per-variant payload
    /// (git hash/ref for `Git`, source tree path and git hash for
    /// `Local`).
    pub kernel_source: crate::cache::KernelSource,
    /// Whether the source is a temporary directory that should be
    /// cleaned up after building.
    pub is_temp: bool,
    /// For local sources: whether the working tree is dirty.
    /// Dirty trees must not be cached.
    pub is_dirty: bool,
    /// For local sources: whether the source is an actual git
    /// repository. `true` when `gix::discover` succeeded and the
    /// crate could compute index + worktree dirty state; `false`
    /// for non-git source trees (tarball-extracted, rsync'd,
    /// hand-assembled) where dirty detection is impossible and
    /// the source is always cache-skipped pessimistically. Lets
    /// the cache-skip hint branch on whether `commit` / `stash`
    /// are actionable remediations (they aren't for non-git
    /// sources).
    ///
    /// For non-local sources (tarball, git clone) the field is
    /// set to `true` by convention — these paths are always
    /// `is_dirty = false`, so the cache-skip branch that reads
    /// `is_git` is never reached and the value is inert. Pinning
    /// to `true` (rather than leaving the field meaningless)
    /// keeps the invariant "is_git is meaningful only when
    /// is_dirty is true, but always set" so a future code path
    /// that reaches `is_git` outside the cache-skip context does
    /// not trip on an `is_git = false` under a known-good source.
    pub is_git: bool,
}

/// Target architecture string and boot image name.
pub fn arch_info() -> (&'static str, &'static str) {
    #[cfg(target_arch = "x86_64")]
    {
        ("x86_64", "bzImage")
    }
    #[cfg(target_arch = "aarch64")]
    {
        ("aarch64", "Image")
    }
}

/// Parse a version string into its major version for URL construction.
///
/// "6.14.2" -> 6, "6.15-rc3" -> 6.
fn major_version(version: &str) -> Result<u32> {
    let major_str = version
        .split('.')
        .next()
        .ok_or_else(|| anyhow!("invalid version: {version}"))?;
    major_str
        .parse::<u32>()
        .with_context(|| format!("invalid major version in {version}"))
}

/// Determine if a version string represents an RC release.
///
/// RC releases use a different URL pattern and gzip compression
/// (vs xz for stable).
fn is_rc(version: &str) -> bool {
    version.contains("-rc")
}

/// One (`moniker`, `version`) row from kernel.org's `releases.json`.
///
/// A named struct instead of a bare `(String, String)` tuple so every
/// call site reads its field by name (`r.moniker`, `r.version`) rather
/// than positional destructuring — the two strings are trivially
/// swappable at a tuple-destructure call site, and a silent swap
/// would mis-drive `is_skippable_release_moniker` while the
/// now-misnamed "moniker" string flows into `version_prefix`
/// downstream. Naming the fields removes that class of bug at the
/// type-checker level and shows up in IDE hints on every iteration
/// site.
///
/// Both fields are owned `String` (not `&str`) because the values are
/// parsed out of a `reqwest::Response` body whose lifetime ends when
/// `fetch_releases` returns; downstream callers iterate the vector
/// long after that borrow would dangle.
#[derive(Clone, Debug)]
pub(crate) struct Release {
    /// releases.json `moniker` field — stable / longterm / mainline /
    /// linux-next / etc. Consumed by
    /// [`is_skippable_release_moniker`] and by
    /// [`fetch_latest_stable_version`]'s stable/longterm filter.
    pub moniker: String,
    /// releases.json `version` field — e.g. `"6.14.2"`, `"6.15-rc3"`,
    /// `"6.16-rc2-next-20260420"`. Consumed by
    /// [`version_tuple`], [`patch_level`], and
    /// `cli::version_prefix`.
    pub version: String,
}

/// Is this releases.json moniker one that the version-resolution
/// pipeline should skip?
///
/// `linux-next` is a rolling integration branch whose version strings
/// carry a date suffix rather than a stable tag, so it does not fit
/// the major.minor.patch resolution model used by `latest_in_series`,
/// `fetch_version_for_prefix`, and `cli::fetch_active_prefixes`. The
/// release iteration in all three sites filters it out; this helper
/// is the single point of truth for that decision so a future moniker
/// that also warrants skipping can be added in one place.
pub(crate) fn is_skippable_release_moniker(moniker: &str) -> bool {
    moniker == "linux-next"
}

/// Find the latest version in the same major.minor series from releases.json.
///
/// Returns `Some("6.14.10")` for prefix `"6.14"` if that series exists in
/// releases.json. Returns `None` if the series is not found (EOL or invalid).
fn latest_in_series(client: &Client, version: &str) -> Option<String> {
    let prefix = {
        let parts: Vec<&str> = version.split('.').collect();
        if parts.len() >= 2 {
            format!("{}.{}", parts[0], parts[1])
        } else {
            return None;
        }
    };

    // Routes through [`RELEASES_CACHE`] for the singleton; see
    // [`cached_releases_with`] for the bypass gate.
    let releases = cached_releases_with(client).ok()?;
    let mut best: Option<(String, (u32, u32, u32))> = None;
    for r in &releases {
        if is_skippable_release_moniker(&r.moniker) {
            continue;
        }
        if !r.version.starts_with(&prefix) {
            continue;
        }
        if r.version.len() != prefix.len() && r.version.as_bytes()[prefix.len()] != b'.' {
            continue;
        }
        if let Some(tuple) = version_tuple(&r.version)
            && (best.is_none() || tuple > best.as_ref().unwrap().1)
        {
            best = Some((r.version.clone(), tuple));
        }
    }
    best.map(|(v, _)| v)
}

/// Build a user-facing error message for a version that was not found.
///
/// Suggests the latest version in the same major.minor series when
/// releases.json contains one.
fn version_not_found_msg(client: &Client, version: &str) -> String {
    let parts: Vec<&str> = version.split('.').collect();
    let prefix = if parts.len() >= 2 {
        format!("{}.{}", parts[0], parts[1])
    } else {
        version.to_string()
    };
    match latest_in_series(client, version) {
        Some(latest) if latest != version => {
            format!("version {version} not found. latest {prefix}.x: {latest}")
        }
        _ => format!("version {version} not found"),
    }
}

/// Reject responses where the server returned HTML instead of a binary
/// archive. Some CDN error pages return 200 with text/html.
fn reject_html_response(response: &reqwest::blocking::Response, url: &str) -> Result<()> {
    if let Some(ct) = response.headers().get(reqwest::header::CONTENT_TYPE)
        && let Ok(ct_str) = ct.to_str()
        && ct_str.contains("text/html")
    {
        anyhow::bail!(
            "download {url}: server returned HTML instead of tarball (URL may be invalid)"
        );
    }
    Ok(())
}

/// Print download size from Content-Length header if available.
///
/// `cli_label` prefixes the diagnostic line so the message matches the
/// binary the user invoked (`"ktstr"` vs `"cargo ktstr"`).
fn print_download_size(response: &reqwest::blocking::Response, url: &str, cli_label: &str) {
    if let Some(len) = response.content_length() {
        let mib = len as f64 / (1024.0 * 1024.0);
        eprintln!("{cli_label}: downloading {url} ({mib:.1} MiB)");
    } else {
        eprintln!("{cli_label}: downloading {url}");
    }
}

/// Maximum tolerated stretch of "no body bytes received" before a
/// streaming download is declared stalled. Catches a TCP connection
/// that completed handshake (so connect_timeout doesn't fire) but
/// then silently stops delivering body data — a common CDN failure
/// mode where keepalive holds the socket open while the upstream
/// origin is unreachable. The 60s value is generous enough that a
/// real slow uplink delivering chunks every few seconds never
/// triggers it, but tight enough that a wedged connection surfaces
/// before the run's overall test timeout.
const DOWNLOAD_NO_PROGRESS_TIMEOUT: Duration = Duration::from_secs(60);

/// Streaming `Read` adapter for kernel tarball downloads.
///
/// Wraps the [`reqwest::blocking::Response`] body to do two things
/// the bare response cannot:
///
/// 1. **Body-progress watchdog.** Tracks `last_progress` (the
///    instant of the last successful read with `n > 0`) and errors
///    when more than [`DOWNLOAD_NO_PROGRESS_TIMEOUT`] elapses
///    between byte-producing reads. Without this, a CDN edge that
///    keepalives the socket but stops delivering body bytes would
///    leave the download blocked indefinitely (reqwest's per-read
///    timeout reset on every empty wakeup, and the connect-phase
///    timeout already passed during handshake). The check fires
///    BEFORE the inner `read()` so a stalled inner reader cannot
///    out-block the watchdog.
///
/// 2. **Streaming SHA-256.** Updates a [`Sha256`] hasher with every
///    byte that flows past, so the caller can verify the finalized
///    digest against an expected value (parsed out of
///    `sha256sums.asc`) without a second pass over the data. The
///    hasher only sees bytes that were actually consumed by the
///    decoder + tar extractor, which is the same set of bytes that
///    landed on disk — so a partial download that errored midway
///    produces a hash over only what we successfully streamed,
///    preventing false-positive verifications on truncated input.
///
/// Sits between [`reqwest::blocking::Response`] and the
/// decompression layer (`XzDecoder` / `GzDecoder`); both
/// decompressors expose `into_inner()` so the wrapper can be
/// recovered after extraction completes (see
/// [`Self::finalize`]).
struct DownloadStream<R: Read> {
    /// Underlying reqwest response body. Owned because `XzDecoder`
    /// and `GzDecoder` take ownership of their inner reader, so
    /// the wrapper must hold the response by value rather than by
    /// reference.
    inner: R,
    /// Running SHA-256 hasher updated on every byte-producing read.
    /// Consumed by [`DownloadStream::finalize`] (which takes `self`
    /// by value); the call site recovers the wrapper from inside
    /// the decoder + tar archive chain via `into_inner` before
    /// finalizing.
    hasher: Sha256,
    /// Total body bytes read so far. Surfaced in the watchdog
    /// error message so an operator triaging "no progress" can see
    /// how many bytes did arrive before the stall — distinguishing
    /// "connection dropped after a few bytes" from "connection
    /// dropped after most of the payload".
    bytes_total: u64,
    /// `Instant` of the last successful read with `n > 0`. Set at
    /// construction (not on first read) so a connection that wins
    /// the handshake but never delivers any body bytes still
    /// trips the watchdog after [`DOWNLOAD_NO_PROGRESS_TIMEOUT`]
    /// rather than waiting for an indeterminate pre-data window.
    last_progress: Instant,
    /// Tolerated stretch of zero-progress time. Pinned at
    /// construction from [`DOWNLOAD_NO_PROGRESS_TIMEOUT`]; held in
    /// the struct rather than read from the constant on every
    /// `read()` so a future per-call override (e.g. shorter
    /// timeouts in tests) lands without touching the watchdog
    /// logic.
    no_progress_timeout: Duration,
}

impl<R: Read> DownloadStream<R> {
    /// Construct a fresh streaming wrapper around `inner` with the
    /// production no-progress budget. `last_progress` is set to
    /// "now" so the watchdog clock starts at construction; the
    /// downstream decoder may take an indeterminate amount of time
    /// between construction and the first `read()`, but ANY actual
    /// progress resets the clock.
    fn new(inner: R) -> Self {
        Self {
            inner,
            hasher: Sha256::new(),
            bytes_total: 0,
            last_progress: Instant::now(),
            no_progress_timeout: DOWNLOAD_NO_PROGRESS_TIMEOUT,
        }
    }

    /// Consume the wrapper and return `(hex_digest, bytes_total)`.
    /// Lowercase hex matches the format kernel.org publishes in
    /// `sha256sums.asc`, so the caller can do a direct
    /// `eq_ignore_ascii_case` comparison without re-encoding.
    fn finalize(self) -> (String, u64) {
        (hex::encode(self.hasher.finalize()), self.bytes_total)
    }
}

impl<R: Read> Read for DownloadStream<R> {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        // Watchdog gate: trip BEFORE delegating to the inner reader
        // so a stalled inner read does not get a fresh chance to
        // run after the no-progress window has already expired. The
        // wrapper cannot interrupt a `read()` that is currently
        // blocked in a syscall — that protection comes from the
        // per-request timeout configured via
        // `RequestBuilder::timeout` — but it can refuse to issue
        // the next call once the cumulative no-progress window
        // crosses the bound.
        let elapsed = self.last_progress.elapsed();
        if elapsed > self.no_progress_timeout {
            return Err(std::io::Error::new(
                std::io::ErrorKind::TimedOut,
                format!(
                    "download stalled: no body bytes for {}s after {} bytes received",
                    elapsed.as_secs(),
                    self.bytes_total,
                ),
            ));
        }
        match self.inner.read(buf) {
            Ok(0) => {
                // EOF: do NOT update last_progress — a 0-byte read
                // is not progress, and updating here would let a
                // decoder that polls past EOF reset the watchdog
                // indefinitely.
                Ok(0)
            }
            Ok(n) => {
                self.hasher.update(&buf[..n]);
                self.bytes_total += n as u64;
                self.last_progress = Instant::now();
                Ok(n)
            }
            Err(e) => Err(e),
        }
    }
}

/// Per-request body-stream timeout passed to
/// [`reqwest::blocking::RequestBuilder::timeout`] for tarball
/// downloads. The blocking client treats this as a per-`read()`
/// deadline (reset on every successful read), so it complements the
/// [`DownloadStream`] watchdog: reqwest's deadline kills a single
/// stalled syscall, and the watchdog observes the cumulative
/// no-progress window across multiple reads. Set generously
/// (5 minutes) because a slow but progressing connection can
/// legitimately take that long for a single read on a large CDN
/// chunk; the watchdog provides the tighter 60s no-progress bound.
const DOWNLOAD_REQUEST_READ_TIMEOUT: Duration = Duration::from_secs(300);

/// Total request timeout for [`fetch_stable_sha256sums`]: bounds
/// the wall-clock window for the single small-body GET that
/// retrieves the cleartext-signed checksum manifest. The body is
/// the `sha256sums.asc` cleartext block — typically a few KiB of
/// `<hash>  <filename>` lines plus a PGP signature trailer — so a
/// tight 30 s ceiling fits the realistic case (sub-second on a
/// healthy CDN edge) while still bounding the failure mode this
/// guards against: a stalled CDN that accepts the connection but
/// never delivers bytes. Without a per-request timeout the
/// shared client only carries [`SHARED_CLIENT_CONNECT_TIMEOUT`]
/// (handshake-only), so a stalled body read would hang the build
/// indefinitely. The caller treats any error from this function
/// as "no expected hash available" and downgrades verification
/// to a warning, so a 30 s timeout that fires on a hung CDN
/// surfaces as an unverified-but-progressing download rather
/// than a wedged build.
const SHA256SUMS_REQUEST_TIMEOUT: Duration = Duration::from_secs(30);

/// Fetch the cleartext SHA-256 manifest published alongside stable
/// kernel tarballs at
/// `https://cdn.kernel.org/pub/linux/kernel/v{major}.x/sha256sums.asc`.
///
/// Returns the file body as a `String` on success. Any error
/// (transport failure, non-2xx status, non-UTF-8 body) is
/// propagated; the caller treats failure as "no expected hash
/// available" and downgrades verification to a warning.
fn fetch_stable_sha256sums(client: &Client, major: u32) -> Result<String> {
    let url = format!("https://cdn.kernel.org/pub/linux/kernel/v{major}.x/sha256sums.asc");
    tracing::info!(%url, "fetching kernel tarball sha256sums (requires network)");
    let response = client
        .get(&url)
        .timeout(SHA256SUMS_REQUEST_TIMEOUT)
        .send()
        .with_context(|| format!("fetch {url}"))?;
    if !response.status().is_success() {
        anyhow::bail!("fetch {url}: HTTP {}", response.status());
    }
    response
        .text()
        .with_context(|| format!("read body of {url}"))
}

/// Extract the SHA-256 hex digest for `target_filename` from the
/// cleartext-signed `sha256sums.asc` body.
///
/// kernel.org publishes `sha256sums.asc` as a PGP-cleartext-signed
/// document: a `-----BEGIN PGP SIGNED MESSAGE-----` header, an
/// optional `Hash:` line, a blank line, the cleartext body
/// (`<64-hex-chars>  <filename>` per line), then a
/// `-----BEGIN PGP SIGNATURE-----` block. We only need the
/// cleartext body — signature verification is a separate concern
/// (the user-facing instruction is "If no expected hash available,
/// log warning", not "require signature").
///
/// Returns `Some(lowercase_hex)` on first match. Returns `None` if
/// the target filename does not appear in the manifest (e.g. the
/// upstream rotated or removed the entry).
fn parse_sha256_for_file(manifest: &str, target_filename: &str) -> Option<String> {
    // Strip the PGP signature trailer if present. Everything after
    // the signature marker is binary noise that never contains
    // checksum lines.
    let body = manifest
        .split_once("-----BEGIN PGP SIGNATURE-----")
        .map(|(before, _)| before)
        .unwrap_or(manifest);
    for line in body.lines() {
        let line = line.trim();
        // sha256sum format: `<64-hex-chars><whitespace><filename>`.
        // Split on whitespace; require exactly two tokens and a
        // 64-char hex first token.
        let mut parts = line.split_whitespace();
        let Some(hash) = parts.next() else { continue };
        let Some(name) = parts.next() else { continue };
        if name != target_filename {
            continue;
        }
        if hash.len() != 64 || !hash.chars().all(|c| c.is_ascii_hexdigit()) {
            continue;
        }
        return Some(hash.to_ascii_lowercase());
    }
    None
}

/// Verify `actual_hex` against `expected_hex` (case-insensitive).
/// Returns `Ok(())` on match, `Err` with a diagnostic message on
/// mismatch. Pulled out of the call site so the comparison logic
/// has one home and the diagnostic carries both digests in lowercase
/// hex for direct copy-paste reuse.
fn verify_sha256(actual_hex: &str, expected_hex: &str, url: &str) -> Result<()> {
    if actual_hex.eq_ignore_ascii_case(expected_hex) {
        Ok(())
    } else {
        anyhow::bail!(
            "sha256 mismatch for {url}: expected {}, got {}. \
             If cdn.kernel.org updated this tarball in-place, \
             retry with --skip-sha256 to bypass verification.",
            expected_hex.to_ascii_lowercase(),
            actual_hex.to_ascii_lowercase(),
        );
    }
}

/// Resolve the expected SHA-256 digest for a stable tarball from
/// cdn.kernel.org's `sha256sums.asc` manifest.
///
/// Three outcomes:
/// - `Some(hex)` — manifest fetched and the entry for `tarball_name`
///   was parsed cleanly.
/// - `None` with no warning (only when `skip_sha256 = true`) —
///   operator explicitly opted out of verification; emits a single
///   security-sensitive bypass warning instead.
/// - `None` with a per-cause warning (manifest fetch failed, or
///   manifest fetched but entry missing) — best-effort fallback so
///   a transient cdn.kernel.org outage / schema drift does not
///   gate the whole download.
///
/// The fallback path is deliberately permissive: we trade strict
/// authentication for build availability. A network-path attacker
/// who can deny `sha256sums.asc` while serving a poisoned
/// `linux-{version}.tar.xz` could exploit this; operators who
/// require strict verification should pin the source via `--source`
/// or `--git` rather than the download path. The bypass warnings
/// surface on the operator's diagnostic stream so the lost
/// guarantee is visible to ops triage.
///
/// Extracted from [`download_stable_tarball`] so the gate is
/// directly unit-testable without mocking network calls — the
/// caller-supplied `client` reaches a `Client::get` only when
/// `skip_sha256 == false`, so a `skip_sha256 = true` test does not
/// need a configured `Client`.
fn resolve_expected_sha256(
    client: &Client,
    major: u32,
    tarball_name: &str,
    skip_sha256: bool,
) -> Option<String> {
    if skip_sha256 {
        tracing::warn!(
            tarball = %tarball_name,
            "--skip-sha256: bypassing checksum verification — the \
             downloaded tarball will not be authenticated against \
             cdn.kernel.org's sha256sums.asc manifest. Use only when \
             upstream has updated a tarball in-place and the manifest \
             is mismatched.",
        );
        return None;
    }
    // Best-effort expected-hash lookup: any failure (network,
    // status, parse, missing entry) downgrades to a warning so the
    // download still proceeds. The warning surfaces the cause so an
    // operator triaging "kernel build went weird" can spot that
    // verification was skipped.
    match fetch_stable_sha256sums(client, major) {
        Ok(manifest) => match parse_sha256_for_file(&manifest, tarball_name) {
            Some(hex) => Some(hex),
            None => {
                tracing::warn!(
                    tarball = %tarball_name,
                    "sha256sums.asc fetched but no entry for {tarball_name}; \
                     download will proceed without checksum verification. \
                     Pass --skip-sha256 to bypass the manifest fetch when \
                     the entry is known to be absent.",
                );
                None
            }
        },
        Err(err) => {
            tracing::warn!(
                error = %format!("{err:#}"),
                "failed to fetch sha256sums.asc; download will proceed \
                 without checksum verification. Pass --skip-sha256 to \
                 bypass the manifest fetch when the manifest is known \
                 to be unavailable.",
            );
            None
        }
    }
}

/// Download a stable kernel tarball (.tar.xz) from cdn.kernel.org.
///
/// Streams the body through a [`DownloadStream`] watchdog so a
/// stalled connection (no body bytes for
/// [`DOWNLOAD_NO_PROGRESS_TIMEOUT`]) surfaces as an error rather
/// than blocking indefinitely. Computes SHA-256 over the streamed
/// bytes and verifies against the digest in
/// `sha256sums.asc` for the matching `linux-{version}.tar.xz`
/// entry; if the manifest fetch / parse fails (transient outage,
/// schema drift, missing entry), logs a warning and continues
/// without verification rather than failing the whole download.
///
/// `skip_sha256 = true` bypasses the manifest fetch entirely and
/// emits a single bypass warning. Intended for the case where
/// cdn.kernel.org has updated a tarball in-place (a new point
/// release reusing the same URL) and the manifest is stale or
/// mismatched. Unverified downloads are a security-sensitive
/// fallback — the bypass warning surfaces the lost guarantee on
/// the operator's diagnostic stream.
fn download_stable_tarball(
    client: &Client,
    version: &str,
    dest_dir: &Path,
    cli_label: &str,
    skip_sha256: bool,
) -> Result<PathBuf> {
    let major = major_version(version)?;
    let tarball_name = format!("linux-{version}.tar.xz");
    let url = format!("https://cdn.kernel.org/pub/linux/kernel/v{major}.x/{tarball_name}");

    let expected_sha256 = resolve_expected_sha256(client, major, &tarball_name, skip_sha256);

    tracing::info!(%url, "downloading stable kernel tarball (requires network)");
    let response = client
        .get(&url)
        .timeout(DOWNLOAD_REQUEST_READ_TIMEOUT)
        .send()
        .with_context(|| format!("download {url}"))?;
    if !response.status().is_success() {
        if response.status() == reqwest::StatusCode::NOT_FOUND {
            anyhow::bail!("{}", version_not_found_msg(client, version));
        }
        anyhow::bail!("download {url}: HTTP {}", response.status());
    }
    reject_html_response(&response, &url)?;
    print_download_size(&response, &url, cli_label);

    eprintln!("{cli_label}: extracting tarball (xz)");
    // Stage extraction inside `dest_dir` (same filesystem) so the
    // final `fs::rename` into place is atomic and a verification
    // failure leaves `dest_dir` untouched. A bad mirror that serves
    // a wrong-version archive — or sneaks stray top-level entries
    // alongside `linux-{version}/` — gets caught after extraction
    // but before anything lands in `dest_dir`. The TempDir's Drop
    // sweeps every entry the malicious archive deposited.
    let staging =
        tempfile::TempDir::new_in(dest_dir).with_context(|| "create extraction staging dir")?;
    let stream = DownloadStream::new(response);
    let decoder = xz2::read::XzDecoder::new(stream);
    let mut archive = tar::Archive::new(decoder);
    archive
        .unpack(staging.path())
        .with_context(|| "extract tarball")?;

    // Recover the watchdog wrapper from inside the decoder/archive
    // chain to read the streaming digest. `into_inner` on tar +
    // xz2 each peel one layer of the chain. Done after a successful
    // unpack so we don't compute over a partial stream.
    let stream = archive.into_inner().into_inner();
    let (actual_hex, bytes_total) = stream.finalize();
    if let Some(expected) = expected_sha256.as_deref() {
        verify_sha256(&actual_hex, expected, &url)?;
        eprintln!("{cli_label}: sha256 verified ({bytes_total} bytes, hash {actual_hex})");
    } else if !skip_sha256 {
        // Skip path already emitted its bespoke bypass warning
        // before the download; firing again here under "no
        // expected sha256 available" would mislead — that wording
        // implies a fallback, not an explicit operator opt-out.
        tracing::warn!(
            url = %url,
            bytes = bytes_total,
            sha256 = %actual_hex,
            "no expected sha256 available for {url}; computed digest \
             {actual_hex} over {bytes_total} bytes is unverified",
        );
    }

    let source_dir = promote_staged_kernel_tree(&staging, dest_dir, version)?;
    Ok(source_dir)
}

/// Verify a kernel tarball's staged extraction contains exactly one
/// top-level entry named `linux-{version}/` and atomically rename it
/// into `dest_dir/linux-{version}`. Bails — leaving `dest_dir`
/// untouched — when the staging dir holds a stray entry, when the
/// expected inner directory is missing, or when the rename fails.
/// The caller's `TempDir` outlives this helper, so its Drop sweeps
/// any residual staging contents whether this returns Ok or Err.
fn promote_staged_kernel_tree(
    staging: &tempfile::TempDir,
    dest_dir: &Path,
    version: &str,
) -> Result<PathBuf> {
    let expected_name = format!("linux-{version}");
    let mut found_inner = false;
    for entry in std::fs::read_dir(staging.path()).with_context(|| "read staging dir entries")? {
        let entry = entry.with_context(|| "iterate staging dir entry")?;
        let name = entry.file_name();
        if name == std::ffi::OsStr::new(&expected_name) {
            found_inner = true;
        } else {
            anyhow::bail!(
                "tarball contains unexpected top-level entry {name:?}; \
                 expected only {expected_name}/"
            );
        }
    }
    if !found_inner {
        anyhow::bail!("expected directory {expected_name} after extraction");
    }
    let inner = staging.path().join(&expected_name);
    let source_dir = dest_dir.join(&expected_name);
    std::fs::rename(&inner, &source_dir)
        .with_context(|| format!("rename {} -> {}", inner.display(), source_dir.display()))?;
    Ok(source_dir)
}

/// Download an RC kernel tarball (.tar.gz) from git.kernel.org.
///
/// Streams the body through a [`DownloadStream`] watchdog so a
/// stalled connection surfaces as an error rather than blocking
/// indefinitely. RC tarballs are dynamically generated by gitweb
/// at request time and have no published `sha256sums` manifest, so
/// this path always logs a warning that the digest is unverified —
/// it is computed and surfaced for diagnostic value (operators can
/// pin it manually) but never compared to an authoritative source.
fn download_rc_tarball(
    client: &Client,
    version: &str,
    dest_dir: &Path,
    cli_label: &str,
) -> Result<PathBuf> {
    let url = format!("https://git.kernel.org/torvalds/t/linux-{version}.tar.gz");
    tracing::info!(%url, "downloading RC kernel tarball (requires network)");

    let response = client
        .get(&url)
        .timeout(DOWNLOAD_REQUEST_READ_TIMEOUT)
        .send()
        .with_context(|| format!("download {url}"))?;
    if response.status() == reqwest::StatusCode::NOT_FOUND {
        anyhow::bail!(
            "RC tarball not found: {url}\n  \
             RC releases are removed from git.kernel.org after the stable version ships."
        );
    }
    if !response.status().is_success() {
        anyhow::bail!("download {url}: HTTP {}", response.status());
    }
    reject_html_response(&response, &url)?;
    print_download_size(&response, &url, cli_label);

    eprintln!("{cli_label}: extracting tarball (gzip)");
    // Stage extraction inside `dest_dir` (same filesystem) so the
    // final atomic rename keeps `dest_dir` clean when a bad mirror
    // serves a wrong-version archive or sneaks stray top-level
    // entries past the archive boundary. RC tarballs have no
    // upstream sha256 manifest, so structural verification is the
    // only defence against a hostile gitweb response.
    let staging =
        tempfile::TempDir::new_in(dest_dir).with_context(|| "create extraction staging dir")?;
    let stream = DownloadStream::new(response);
    let decoder = flate2::read::GzDecoder::new(stream);
    let mut archive = tar::Archive::new(decoder);
    archive
        .unpack(staging.path())
        .with_context(|| "extract tarball")?;

    // Surface the streamed digest as a warning. RC tarballs have
    // no upstream manifest, so verification is impossible — but
    // emitting the hash gives an operator a value they can
    // capture for offline pinning if they want to detect drift on
    // re-fetch.
    let stream = archive.into_inner().into_inner();
    let (actual_hex, bytes_total) = stream.finalize();
    tracing::warn!(
        url = %url,
        bytes = bytes_total,
        sha256 = %actual_hex,
        "no expected sha256 available for {url} (RC tarballs are \
         dynamically generated by git.kernel.org and have no \
         published manifest); computed digest {actual_hex} over \
         {bytes_total} bytes is unverified",
    );

    let source_dir = promote_staged_kernel_tree(&staging, dest_dir, version)?;
    Ok(source_dir)
}

/// Download a kernel tarball (stable or RC) and extract it.
///
/// `cli_label` prefixes diagnostic status output (e.g. `"ktstr"` or
/// `"cargo ktstr"`).
///
/// `skip_sha256` propagates to `download_stable_tarball` only —
/// stable tarballs publish a `sha256sums.asc` manifest the flag
/// bypasses. RC tarballs (`download_rc_tarball`) have no published
/// manifest so verification is impossible regardless of the flag;
/// the RC path always runs unverified and emits its own warning,
/// so `skip_sha256` is a no-op on the RC arm. `--source` and
/// `--git` callers do not reach this function at all.
pub fn download_tarball(
    client: &Client,
    version: &str,
    dest_dir: &Path,
    cli_label: &str,
    skip_sha256: bool,
) -> Result<AcquiredSource> {
    let (arch, _) = arch_info();
    let source_dir = if is_rc(version) {
        download_rc_tarball(client, version, dest_dir, cli_label)?
    } else {
        download_stable_tarball(client, version, dest_dir, cli_label, skip_sha256)?
    };

    Ok(AcquiredSource {
        source_dir,
        cache_key: format!("{version}-tarball-{arch}-kc{}", crate::cache_key_suffix()),
        version: Some(version.to_string()),
        kernel_source: crate::cache::KernelSource::Tarball,
        is_temp: true,
        is_dirty: false,
        is_git: true,
    })
}

/// Parse the patch level from a kernel version string.
/// "6.12.8" → Some(8), "7.0" → Some(0), "abc" → None.
fn patch_level(version: &str) -> Option<u32> {
    let parts: Vec<&str> = version.split('.').collect();
    match parts.len() {
        2 => Some(0), // "7.0" has patch level 0
        3 => parts[2].parse().ok(),
        _ => None,
    }
}

/// Production URL for `releases.json`. Tests call [`fetch_releases`] directly with a localhost mock URL.
pub(crate) const RELEASES_URL: &str = "https://www.kernel.org/releases.json";

/// Fetch `releases.json` from `url` and return a vector of
/// [`Release`] records. Issues an HTTP GET unconditionally — no
/// cache consultation.
///
/// Production callers reach this function via
/// [`cached_releases_with`] (or [`cached_releases`]) which pass
/// [`RELEASES_URL`]; the cache helper only invokes
/// `fetch_releases` on a cache miss for the singleton path or on
/// the bypass branch for non-singleton clients. Tests that need
/// to exercise the underlying GET directly — without the cache
/// layer — call this function with a locally-constructed `Client`
/// and a localhost URL pointed at a TcpListener-backed mock that
/// returns canned `releases.json` content.
pub(crate) fn fetch_releases(client: &Client, url: &str) -> Result<Vec<Release>> {
    tracing::info!(%url, "fetching kernel.org releases index (requires network)");
    let response = client
        .get(url)
        .send()
        .with_context(|| format!("fetch {url}"))?;
    if !response.status().is_success() {
        anyhow::bail!("fetch {url}: HTTP {}", response.status());
    }
    let body = response.text().with_context(|| "read response body")?;
    parse_releases_body(&body)
}

fn parse_releases_body(body: &str) -> Result<Vec<Release>> {
    let json: serde_json::Value =
        serde_json::from_str(body).with_context(|| "parse releases.json")?;
    let releases = json
        .get("releases")
        .and_then(|r| r.as_array())
        .ok_or_else(|| anyhow!("releases.json: missing releases array"))?;
    let input_rows = releases.len();
    let parsed: Vec<Release> = releases
        .iter()
        .filter_map(|r| {
            let moniker = r.get("moniker")?.as_str()?;
            let version = r.get("version")?.as_str()?;
            Some(Release {
                moniker: moniker.to_string(),
                version: version.to_string(),
            })
        })
        .collect();
    // Per-row tolerance: a corrupt row is silently dropped via the
    // filter_map `?` chain so a single bad entry does not abort the
    // whole fetch (see `fetch_releases_row_missing_moniker_drops_row`
    // and siblings). The drop is also a hazard: the truncated vector
    // gets cached in [`RELEASES_CACHE`] for the rest of the process
    // lifetime via the singleton path, so a transient malformed row
    // at fetch time persists as a partial snapshot for every later
    // cache-hit caller. Surface the drop count so an operator
    // tailing logs sees that releases.json arrived partial — without
    // this, the symptom (a missing version on resolve) is invisible
    // until it propagates as "version not found" elsewhere.
    let dropped = input_rows - parsed.len();
    if dropped > 0 {
        tracing::warn!(
            input_rows,
            parsed_rows = parsed.len(),
            dropped,
            "releases.json: dropped {dropped} of {input_rows} row(s) \
             missing moniker/version (or non-string values); cached \
             snapshot will reflect this for the process lifetime"
        );
    }
    Ok(parsed)
}

/// Fetch the latest stable kernel version from kernel.org.
///
/// Selects from the `releases` array (moniker "stable" or "longterm"),
/// requiring patch version >= 8 to avoid brand-new major versions
/// that may have build issues on CI runners.
///
/// When `client` is the process-wide [`shared_client`] singleton,
/// routes through `RELEASES_CACHE`; other clients bypass the
/// cache via pointer-equality and exercise `fetch_releases`
/// directly — see `cached_releases_with` for details.
///
/// `cli_label` prefixes diagnostic status output (e.g. `"ktstr"` or
/// `"cargo ktstr"`).
pub fn fetch_latest_stable_version(client: &Client, cli_label: &str) -> Result<String> {
    eprintln!("{cli_label}: fetching latest kernel version");
    let releases = cached_releases_with(client)?;

    let mut best: Option<&str> = None;
    for r in &releases {
        if r.moniker != "stable" && r.moniker != "longterm" {
            continue;
        }
        if patch_level(&r.version).unwrap_or(0) < 8 {
            continue;
        }
        // Pick the first matching release — releases.json is ordered
        // newest first, so the first stable with patch >= 8 is the best.
        best = Some(r.version.as_str());
        break;
    }

    let version =
        best.ok_or_else(|| anyhow!("no stable kernel with patch >= 8 found in releases.json"))?;
    eprintln!("{cli_label}: latest stable kernel: {version}");
    Ok(version.to_string())
}

/// Parse a version string into numeric components for comparison.
/// "6.14.2" → Some((6, 14, 2)), "6.14" → Some((6, 14, 0)),
/// "7.0" → Some((7, 0, 0)). Returns None for unparseable versions.
fn version_tuple(version: &str) -> Option<(u32, u32, u32)> {
    let parts: Vec<&str> = version.split('.').collect();
    match parts.len() {
        2 => {
            let major = parts[0].parse().ok()?;
            let minor = parts[1].parse().ok()?;
            Some((major, minor, 0))
        }
        3 => {
            let major = parts[0].parse().ok()?;
            let minor = parts[1].parse().ok()?;
            let patch = parts[2].parse().ok()?;
            Some((major, minor, patch))
        }
        _ => None,
    }
}

/// Return true when `s` is a kernel major.minor prefix like
/// `"6.14"` (as opposed to a full patch version `"6.14.2"` or an rc
/// tag `"6.15-rc3"`). Callers use this to decide whether the input
/// needs prefix resolution via [`fetch_version_for_prefix`].
///
/// Accepts any string with fewer than 2 dots and no `-rc` substring,
/// so `"7"` (single-segment) and `""` both return true. This matches
/// the historical inline check used by kernel-build dispatchers.
pub fn is_major_minor_prefix(s: &str) -> bool {
    s.matches('.').count() < 2 && !s.contains("-rc")
}

/// Resolve the highest version matching a prefix.
///
/// E.g., "6.12" → "6.12.81", "6" → "6.19.12" (highest 6.x.y).
///
/// Scans all monikers in releases.json except linux-next. If no
/// match is found (EOL series), fetches the cdn.kernel.org directory
/// listing to find the highest patch version with a tarball.
///
/// When `client` is the process-wide [`shared_client`] singleton,
/// routes through `RELEASES_CACHE`; other clients bypass the
/// cache via pointer-equality and exercise `fetch_releases`
/// directly — see `cached_releases_with` for details. Cache
/// scope is releases.json only; the EOL-series directory-listing
/// fallback in `probe_latest_patch` always hits the network.
///
/// `cli_label` prefixes diagnostic status output (e.g. `"ktstr"` or
/// `"cargo ktstr"`).
pub fn fetch_version_for_prefix(client: &Client, prefix: &str, cli_label: &str) -> Result<String> {
    eprintln!("{cli_label}: fetching latest {prefix}.x kernel version");
    let releases = cached_releases_with(client)?;

    let mut best: Option<(&str, (u32, u32, u32))> = None;
    for r in &releases {
        if is_skippable_release_moniker(&r.moniker) {
            continue;
        }
        if !r.version.starts_with(prefix) {
            continue;
        }
        if r.version.len() != prefix.len() && r.version.as_bytes()[prefix.len()] != b'.' {
            continue;
        }
        let Some(tuple) = version_tuple(&r.version) else {
            continue;
        };
        if best.is_none() || tuple > best.unwrap().1 {
            best = Some((r.version.as_str(), tuple));
        }
    }

    if let Some((version, _)) = best {
        eprintln!("{cli_label}: latest {prefix}.x kernel: {version}");
        return Ok(version.to_string());
    }

    eprintln!("{cli_label}: {prefix}.x not in releases.json (EOL series), probing cdn.kernel.org");
    probe_latest_patch(client, prefix, cli_label)
}

/// Find the latest patch version for an EOL series by fetching the
/// CDN directory listing.
///
/// GETs the `v{major}.x/` directory index from cdn.kernel.org and
/// extracts `linux-{prefix}.{patch}.tar.xz` filenames to find the
/// highest patch. One GET replaces the former parallel-HEAD probe
/// which failed in CI environments that block or mishandle HEAD
/// requests to the CDN.
fn probe_latest_patch(client: &Client, prefix: &str, cli_label: &str) -> Result<String> {
    let major = major_version(prefix)?;
    let url = format!("https://cdn.kernel.org/pub/linux/kernel/v{major}.x/");
    eprintln!("{cli_label}: fetching directory listing from {url}");
    let body = client
        .get(&url)
        .send()
        .with_context(|| format!("GET {url}"))?
        .error_for_status()
        .with_context(|| format!("GET {url}"))?
        .text()
        .with_context(|| format!("reading body from {url}"))?;

    let needle = format!("linux-{prefix}.");
    let mut best_patch: Option<u32> = None;
    for line in body.lines() {
        let Some(pos) = line.find(&needle) else {
            continue;
        };
        let after = &line[pos + needle.len()..];
        let Some(dot) = after.find(".tar.xz") else {
            continue;
        };
        let patch_str = &after[..dot];
        if let Ok(patch) = patch_str.parse::<u32>()
            && best_patch.is_none_or(|b| patch > b)
        {
            best_patch = Some(patch);
        }
    }

    match best_patch {
        Some(patch) => {
            let version = format!("{prefix}.{patch}");
            eprintln!("{cli_label}: latest {prefix}.x kernel (from cdn listing): {version}");
            Ok(version)
        }
        None => {
            anyhow::bail!(
                "no tarball matching {prefix}.x found in cdn.kernel.org \
                 directory listing at {url}"
            );
        }
    }
}

/// Clone a git repository with shallow depth.
///
/// `cli_label` prefixes diagnostic status output (e.g. `"ktstr"` or
/// `"cargo ktstr"`).
pub fn git_clone(
    url: &str,
    git_ref: &str,
    dest_dir: &Path,
    cli_label: &str,
) -> Result<AcquiredSource> {
    let (arch, _) = arch_info();
    eprintln!("{cli_label}: cloning {url} (ref: {git_ref}, depth: 1)");

    let clone_dir = dest_dir.join("linux");

    let mut prep = gix::prepare_clone(url, &clone_dir)
        .with_context(|| "prepare clone")?
        .with_shallow(gix::remote::fetch::Shallow::DepthAtRemote(
            NonZeroU32::new(1).expect("1 is nonzero"),
        ))
        .with_ref_name(Some(git_ref))
        .with_context(|| "set ref name")?;

    let (mut checkout, _outcome) = prep
        .fetch_then_checkout(
            gix::progress::Discard,
            &std::sync::atomic::AtomicBool::new(false),
        )
        .with_context(|| "clone fetch")?;

    let (_repo, _outcome) = checkout
        .main_worktree(
            gix::progress::Discard,
            &std::sync::atomic::AtomicBool::new(false),
        )
        .with_context(|| "checkout")?;

    let repo = gix::open(&clone_dir).with_context(|| "open cloned repo")?;
    let head = repo.head_id().with_context(|| "read HEAD")?;
    let short_hash = format!("{}", head).chars().take(7).collect::<String>();

    let cache_key = format!(
        "{git_ref}-git-{short_hash}-{arch}-kc{}",
        crate::cache_key_suffix()
    );

    Ok(AcquiredSource {
        source_dir: clone_dir,
        cache_key,
        version: None,
        kernel_source: crate::cache::KernelSource::git(short_hash, git_ref),
        is_temp: true,
        is_dirty: false,
        is_git: true,
    })
}

/// Use a local kernel source tree.
///
/// Dirty detection uses gix `tree_index_status` (HEAD-vs-index) and
/// `status().into_index_worktree_iter()` (index-vs-worktree) to check
/// for modifications to tracked files. Submodule checks are skipped
/// entirely. Untracked files do not affect the dirty flag.
///
/// When the tree is dirty, the HEAD commit does not describe the
/// source actually being built, so `git_hash` is dropped — no
/// commit identifies a dirty worktree. `is_dirty=true` carries that
/// fact forward; callers (see [`crate::cli`]) use it to bypass the
/// kernel cache entirely.
///
/// No diagnostic output: all operator-visible messaging for a
/// local source is routed through `kernel_build_pipeline`'s
/// cache-skip hint (`DIRTY_TREE_CACHE_SKIP_HINT` /
/// `NON_GIT_TREE_CACHE_SKIP_HINT`), which has the full context
/// to emit a single informational line rather than two redundant
/// warnings. Sibling entries (`download_tarball`, `git_clone`)
/// still take a `cli_label` because they genuinely print
/// progress lines — `local_source` does not.
pub fn local_source(source_path: &Path) -> Result<AcquiredSource> {
    let (arch, _) = arch_info();

    if !source_path.is_dir() {
        anyhow::bail!("{}: not a directory", source_path.display());
    }

    let canonical = source_path
        .canonicalize()
        .with_context(|| format!("canonicalize {}", source_path.display()))?;

    let LocalSourceState {
        short_hash,
        is_dirty,
        is_git,
    } = inspect_local_source_state(&canonical)?;

    // User .config is folded into the cache key so two builds of the
    // same HEAD with different `.config` files do NOT collide on the
    // same key — see [`config_hash_for_key`] for the encoding.
    // Read at `local_source` time (rather than at the post-build
    // store site) so cache LOOKUP and cache STORE see the same key.
    let user_config_hash = config_hash_for_key(&canonical);

    let cache_key =
        compose_local_cache_key(arch, &short_hash, &canonical, user_config_hash.as_deref());

    Ok(AcquiredSource {
        source_dir: canonical.clone(),
        cache_key,
        version: None,
        kernel_source: crate::cache::KernelSource::Local {
            source_tree_path: Some(canonical),
            git_hash: short_hash,
        },
        is_temp: false,
        is_dirty,
        is_git,
    })
}

/// Result of [`inspect_local_source_state`] — git hash and dirty/git
/// classification of a canonical source-tree path. Pulled out of
/// [`local_source`] so the post-build dirty re-check (a second call
/// from [`crate::cli::kernel_build_pipeline`]) reuses the exact same
/// gix path.
#[derive(Debug, Clone)]
pub struct LocalSourceState {
    /// HEAD short hash (7 chars). `None` when the tree is dirty
    /// (HEAD doesn't describe the actual source) or non-git (no
    /// HEAD at all). Mirrors the `git_hash` field on
    /// [`AcquiredSource::kernel_source`] for [`crate::cache::KernelSource::Local`].
    pub short_hash: Option<String>,
    /// Tracked-file dirt: HEAD-vs-index disagreement OR
    /// index-vs-worktree disagreement. Always `true` for non-git
    /// trees (dirty detection is impossible without git, so the
    /// pessimistic stance is dirty).
    pub is_dirty: bool,
    /// `true` when `gix::discover` succeeded (the tree is a git
    /// repo); `false` otherwise. Lets the cache-skip hint branch
    /// on whether `commit` / `stash` is actionable.
    pub is_git: bool,
}

/// Inspect a canonical source-tree path for git hash + dirty state.
///
/// Submodule checks are skipped (false positives on kernel trees
/// with uninitialized submodules). The non-git arm returns
/// `(None, true, false)` so the caller's cache-skip hint can
/// distinguish "dirty git repo" from "not a git repo at all".
///
/// Called twice per build by [`crate::cli::kernel_build_pipeline`]:
/// once at acquire time (via [`local_source`]) and again after
/// `make` returns to detect mid-build worktree edits, branch flips,
/// or commits that would otherwise let a racing-write build land in
/// the cache under a stale identity. Both calls share the same gix
/// path so the post-build comparison is apples-to-apples.
///
/// Non-atomic against concurrent git operations: the probe runs
/// six sequential gix calls (`discover` → `head_id` → `head_tree`
/// → `index_or_empty` → `tree_index_status` → `status`), each a
/// separate filesystem read with no transactional bracket. A
/// concurrent `git commit`, `git add`, or worktree write between
/// any two calls can produce internally-inconsistent results —
/// e.g. `head_id` reads commit C0, a peer commit lands C1, then
/// `head_tree` reads C1's root tree and the diff against the
/// post-add index reports unexpected dirt. Git itself serializes
/// its own writes via per-resource lockfiles under `.git/`
/// (`index.lock` for staging operations, `HEAD.lock` and
/// `refs/heads/<branch>.lock` for ref updates), so peer `git`
/// processes wait on whichever lockfile their operation touches;
/// the genuinely-unsynchronized class is worktree-only writes
/// (autoformatter, IDE-on-save) which the index-worktree status
/// step catches regardless of timing.
///
/// The disposition is intentionally pessimistic so inconsistency is
/// safe: any `Err` propagates to the caller, which treats it as a
/// rebuild signal (`MidWaitState::ProbeFailed` in the mid-wait
/// caller); any spurious dirty signal falls into DirtyEdit /
/// HashAdvanced, both forcing a rebuild. The cost of a false-
/// positive rebuild is one extra `make`; the cost of a false-
/// negative would be a cache slot keyed on a HEAD that no longer
/// describes the source — the asymmetry is the reason for the
/// pessimistic disposition. Callers should treat the returned
/// state as a best-effort approximation of probe-time, not an
/// instantaneous snapshot.
pub fn inspect_local_source_state(canonical: &Path) -> Result<LocalSourceState> {
    let (short_hash, is_dirty, is_git) = match gix::discover(canonical) {
        Ok(repo) => {
            let head = repo.head_id().with_context(|| "read HEAD")?;
            let short_hash = format!("{}", head).chars().take(7).collect::<String>();

            // tree_index_status compares a TREE id against the index;
            // the HEAD commit id is not itself a tree, so peel HEAD
            // to its root tree before diffing or the diff silently
            // returns an error and index dirt goes undetected.
            let head_tree = repo.head_tree().with_context(|| "read HEAD tree")?;
            let head_tree_id = head_tree.id;

            // Check HEAD-vs-index for tracked file changes.
            let mut index_dirty = false;
            let index = repo.index_or_empty().with_context(|| "open index")?;
            let _ = repo.tree_index_status(
                &head_tree_id,
                &index,
                None,
                gix::status::tree_index::TrackRenames::Disabled,
                |_, _, _| {
                    index_dirty = true;
                    Ok::<_, std::convert::Infallible>(std::ops::ControlFlow::Break(()))
                },
            );

            // Check index-vs-worktree for modified tracked files,
            // skipping submodules entirely (Ignore::All).
            let worktree_dirty = if !index_dirty {
                repo.status(gix::progress::Discard)
                    .with_context(|| "status")?
                    .index_worktree_rewrites(None)
                    .index_worktree_submodules(gix::status::Submodule::Given {
                        ignore: gix::submodule::config::Ignore::All,
                        check_dirty: false,
                    })
                    .index_worktree_options_mut(|opts| {
                        opts.dirwalk_options = None;
                    })
                    .into_index_worktree_iter(Vec::new())
                    .map(|mut iter| iter.next().is_some())
                    .unwrap_or(false)
            } else {
                false
            };

            let is_dirty = index_dirty || worktree_dirty;
            // Drop the HEAD hash when dirty — the commit does not
            // describe the actual source being built, so publishing
            // it via git_hash / cache_key would misidentify the
            // build input.
            let hash = if is_dirty { None } else { Some(short_hash) };
            (hash, is_dirty, true)
        }
        Err(_) => {
            // The downstream kernel_build_pipeline (cli::kernel_build_pipeline)
            // emits `NON_GIT_TREE_CACHE_SKIP_HINT` — a single
            // informational line that names both the cause and the
            // remediation paths — once the is_dirty=true branch
            // decides to skip the cache. Emitting a second
            // "not a git repository" warning here duplicated that
            // content for every non-git `--source` run. The
            // `(None, true, false)` tuple silently communicates
            // the non-git state to the cache-skip decision site;
            // no separate stderr line is needed on this path.
            (None, true, false)
        }
    };
    Ok(LocalSourceState {
        short_hash,
        is_dirty,
        is_git,
    })
}

/// Compose the cache key for a local source given its arch, optional
/// HEAD short hash, canonical source path, and optional user
/// `.config` hash.
///
/// Three shapes:
/// - `local-{hash7}-{arch}-kc{suffix}` — clean git tree, no user
///   `.config` (plain `make defconfig` path or no config file yet)
/// - `local-{hash7}-{arch}-cfg{user_config}-kc{suffix}` — clean git
///   tree with a user `.config` whose hash differs from `defconfig`
/// - `local-unknown-{path_hash}-{arch}-kc{suffix}` — dirty / non-git
///   tree (HEAD does not describe the source; the path-derived
///   crc32 salt keeps two distinct dirty trees from colliding on the
///   same `local-unknown-...` slot)
///
/// `path_hash` is the full 8-char (32-bit) lowercase-hex CRC32 of
/// the canonical source-path bytes. CRC32 keeps the per-path
/// disambiguator stable across runs without pulling in a
/// crypto-grade hash for what is fundamentally a slot disambiguator.
///
/// `user_config_hash` is `None` whenever the source tree has no
/// `.config` file yet (the build will run `make defconfig` and
/// produce one). This collapses the user-config branch back into the
/// hash-only key so a fresh checkout's first build still hits a
/// later cache lookup keyed without the cfg segment.
pub fn compose_local_cache_key(
    arch: &str,
    short_hash: &Option<String>,
    canonical: &Path,
    user_config_hash: Option<&str>,
) -> String {
    let suffix = crate::cache_key_suffix();
    match short_hash {
        Some(hash) => match user_config_hash {
            Some(cfg) => format!("local-{hash}-{arch}-cfg{cfg}-kc{suffix}"),
            None => format!("local-{hash}-{arch}-kc{suffix}"),
        },
        None => {
            let path_hash = canonical_path_hash(canonical);
            format!("local-unknown-{path_hash}-{arch}-kc{suffix}")
        }
    }
}

/// CRC32 of the canonical source-path bytes, lowercase hex
/// (full 8-char width — the entire 32-bit value). Disambiguates
/// `local-unknown-...` cache keys and per-source-tree lockfile
/// names across distinct dirty / non-git source trees so two
/// parallel `cargo ktstr test --kernel ./linux-a` and
/// `--kernel ./linux-b` runs can't write each other's vmlinux into
/// the same cache slot or share a single source-tree flock.
///
/// Full 32 bits (8 hex chars) of CRC32 keep collision risk
/// negligible against the practical population (handful of source
/// trees per host) while staying human-readable. The earlier
/// 6-char (24-bit) form left ~6× the collision surface for the
/// same key shape; truncation served no purpose other than visual
/// brevity. Path bytes are taken via `OsStr::as_encoded_bytes` so
/// a non-UTF-8 component (rare on Linux but possible) doesn't lose
/// entropy through a UTF-8 lossy conversion.
pub(crate) fn canonical_path_hash(canonical: &Path) -> String {
    let bytes = canonical.as_os_str().as_encoded_bytes();
    format!("{:08x}", crc32fast::hash(bytes))
}

/// Read `<canonical>/.config` and return its CRC32 as a lowercase
/// hex string suitable for embedding in the cache key. Returns
/// `None` when no `.config` exists (a fresh tree before the build
/// runs `make defconfig`).
///
/// Distinct from the `config_hash` written into [`crate::cache::KernelMetadata`]
/// at store time — that records the FINAL `.config` after
/// configuration runs, for diagnostic display in `kernel list`.
/// This helper records the PRE-BUILD `.config` so the cache key
/// reflects what the operator's tree currently has on disk; the
/// same `.config` content always maps to the same key, even if the
/// downstream `make olddefconfig` step elaborates additional
/// defaults.
fn config_hash_for_key(canonical: &Path) -> Option<String> {
    let config_path = canonical.join(".config");
    let data = std::fs::read(&config_path).ok()?;
    Some(format!("{:08x}", crc32fast::hash(&data)))
}

#[cfg(test)]
#[path = "fetch_tests.rs"]
mod tests;