cargo-affected 0.2.0

Run only the tests affected by git changes, using LLVM coverage.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
//! Coverage collection pipeline.
//!
//! Delegates build and test execution to `cargo nextest run`. We insert a
//! small runner shim (`cargo-affected runner-shim`) via
//! `--config target.<triple>.runner=[…]` that points `LLVM_PROFILE_FILE` at
//! a per-test subdirectory before `exec`ing the real test binary. After
//! nextest finishes we walk those subdirectories, merge profraws, export
//! coverage, and write per-(test, file) line ranges to SQLite.
//!
//! We use the `--config` array form rather than the
//! `CARGO_TARGET_<TRIPLE>_RUNNER` env var because cargo only
//! whitespace-splits the env-var form — a path like
//! `C:\Users\Joe Smith\…\cargo-affected.exe` (Windows) or
//! `/Users/joe/Library/Application Support/…` (macOS) would be
//! mis-tokenised. The TOML array preserves the path as one argv slot.
//!
//! Approach:
//! 1. Read crate roots from `cargo metadata`, scoped per nextest target
//!    (`binary_id`): each test's sentinel set covers its own crate root,
//!    its package's lib (for non-lib targets), and lib roots of workspace
//!    packages this target transitively depends on. Stored as sentinel-range
//!    rows via [`HitRange::sentinel`] (line 1 through
//!    `CRATE_ROOT_SENTINEL_END`) so any hunk in one of those files
//!    overlaps and re-selects the test.
//! 2. `cargo nextest list --message-format json` to enumerate every binary
//!    and every testcase.
//! 3. `cargo nextest run` with `-C instrument-coverage` in RUSTFLAGS and the
//!    runner wired in via `--config`. The preceding `list` step built the
//!    binaries, so `run` is a cache hit. nextest handles parallelism and
//!    progress. Each test invocation gets its `binary_id` straight from
//!    `NEXTEST_BINARY_ID` — the runner shim doesn't need to map paths.
//! 4. Capture HEAD sha (anchor for future diffs) before extraction so any
//!    git error surfaces before we spend time on coverage parsing.
//! 5. Post-run: for each subdir of profraw_base, read the `meta` sidecar,
//!    merge with `llvm-profdata`, export with `llvm-cov`, parse hit ranges.
//!    Parallelized across workers.
//! 6. Store mappings + collect_sha in the DB keyed by (binary_id, test_name).

use std::collections::{BTreeMap, BTreeSet, VecDeque};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::Mutex;
use std::time::Instant;

use anyhow::{bail, Context, Result};

use crate::coverage::{self, HitRange};
use crate::db::{affected_dir, Db, TestId, FINGERPRINT_KEEP};
use crate::fingerprint;
use crate::project::{
    canonicalize_no_verbatim, find_project_root, git_head_sha, git_working_tree_dirty,
};
use crate::selection;

/// Entry point for `cargo affected collect`. Returns nextest's exit code.
///
/// `diff = true` runs an incremental collect: only tests affected by changes
/// since one of the stored `collect_sha`s (or new tests added to the project)
/// are rerun under instrumentation, and their rows are re-anchored at the
/// new HEAD. Other tests' rows stay put. Errors out if there's no prior
/// collect for the current environment, or if any stored sha is no longer
/// reachable from HEAD.
pub fn collect(
    diff: bool,
    verbose: bool,
    allow_dirty: bool,
    nextest_args: &[String],
) -> Result<i32> {
    let total_start = Instant::now();
    let project = find_project_root()?;
    let project_root = &project.workspace_root;
    if verbose {
        eprintln!("project root: {}", project_root.display());
    }
    let canonical_root = canonicalize_no_verbatim(project_root)?;

    // Refuse to collect on a dirty tree by default: ranges would be filed
    // under HEAD but reflect working-tree line numbers, knocking the DB out
    // of phase with every later `git diff <collect_sha>` query.
    if git_working_tree_dirty(project_root)? {
        if allow_dirty {
            eprintln!(
                "warning: collecting on a dirty working tree (--allow-dirty); \
                 stored ranges may not align with future `affected run` queries"
            );
        } else {
            bail!(
                "working tree has uncommitted changes; commit or stash them \
                 before `cargo affected collect`, or pass --allow-dirty for a \
                 throwaway run (selection will be unreliable)"
            );
        }
    }

    require_nextest(project_root)?;
    let self_path = std::env::current_exe().context("failed to resolve current executable")?;
    let target_triple = current_target();
    let runner_config = format_runner_config(&target_triple, &self_path);

    let llvm_profdata = find_llvm_tool("llvm-profdata")?;
    let llvm_cov = find_llvm_tool("llvm-cov")?;
    if verbose {
        eprintln!(
            "llvm-profdata: {}\nllvm-cov: {}",
            llvm_profdata.display(),
            llvm_cov.display()
        );
    }

    // Anchor for future `run`/`status` diffs. Captured up front so a missing
    // HEAD (e.g., empty repo) errors before we spend time on builds.
    let collect_sha = git_head_sha(project_root)?;
    eprintln!("collect sha: {collect_sha}");

    // Build artifacts live under target/affected/build/ rather than the
    // project's default target/. Without isolation, cargo's main build phase
    // compiles every workspace package — including helper binaries pulled in
    // by `default-members` — into target/debug/ with the
    // `-C instrument-coverage` we set below. Those instrumented binaries
    // then linger after `collect` exits, and any later non-coverage
    // `cargo test` that spawns them writes `default_*.profraw` files to its
    // CWD. Routing the build into target/affected/build/ keeps the
    // instrumented copies out of target/debug/, where downstream tooling
    // (cargo-dist, IDEs, plain `cargo run`) expects clean artifacts.
    //
    // The matching `cargo affected run` flow leaves --target-dir unset so
    // it reuses target/debug/ — the user's normal cache — since `run`
    // doesn't enable instrumentation.
    let build_dir = affected_dir(project_root).join("build");
    std::fs::create_dir_all(&build_dir).context("failed to create build dir")?;
    // Sweep stale build-script profraws from prior collects; they accumulate
    // every time a build.rs reruns under instrumentation and aren't useful
    // between collects (build scripts don't show up in the test coverage we
    // care about — they ran during compile, not under the runner shim).
    for entry in std::fs::read_dir(&build_dir).context("scanning build dir")? {
        let entry = entry?;
        if entry.path().extension().is_some_and(|e| e == "profraw") {
            let _ = std::fs::remove_file(entry.path());
        }
    }

    // Profraw files live under target/affected/ alongside the DB. PID suffix
    // so concurrent `collect` invocations don't wipe each other's files.
    let profraw_dir = affected_dir(project_root).join(format!("profraw-{}", std::process::id()));
    if profraw_dir.exists() {
        std::fs::remove_dir_all(&profraw_dir).context("failed to clean profraw dir")?;
    }
    std::fs::create_dir_all(&profraw_dir).context("failed to create profraw dir")?;

    // Per-target sentinel set keyed by nextest's `binary_id`. Each test's
    // sentinel ranges cover its own crate root, its package's lib (if it's
    // not the lib itself), and the libs of any workspace packages it
    // transitively depends on. See `crate_root_sentinels_by_binary_id` for
    // the reasoning.
    let crate_root_sentinels = project.crate_root_sentinels_by_binary_id()?;
    if verbose {
        for (binary_id, paths) in &crate_root_sentinels {
            eprintln!(
                "crate-root sentinels for {binary_id}: {}",
                paths
                    .iter()
                    .map(|p| p.as_str())
                    .collect::<Vec<_>>()
                    .join(", ")
            );
        }
    }
    let crate_root_ranges_by_binary_id: BTreeMap<String, BTreeSet<HitRange>> =
        crate_root_sentinels
            .into_iter()
            .map(|(binary_id, paths)| {
                let ranges = paths.into_iter().map(HitRange::sentinel).collect();
                (binary_id, ranges)
            })
            .collect();

    let mut rustflags = std::env::var("RUSTFLAGS").unwrap_or_default();
    if !rustflags.is_empty() {
        rustflags.push(' ');
    }
    rustflags.push_str("-C instrument-coverage");

    // List first. Gives us the stable (package, target, kind) → binary_id map
    // we'll use to disambiguate same-basename binaries (e.g. two crates with
    // their own `tests/builds.rs`). The list step builds with the same
    // RUSTFLAGS and build flags as the run below, so the subsequent run is a
    // cache hit. Fingerprint is taken now so Cargo.lock is in its final
    // state — status/run will compare against that same state.
    eprintln!("listing tests with cargo nextest list...");
    let listing = nextest_list(
        project_root,
        Some(&rustflags),
        Some(&build_dir),
        &cargo_build_args(nextest_args),
    )?;
    eprintln!(
        "found {} tests across {} binaries",
        listing.tests.len(),
        listing.binaries.len()
    );
    let fingerprint = fingerprint::compute(&project)?;
    let env_fingerprint = &fingerprint.hex;

    // Open the DB once and thread it through. Eager open lets a busy/locked
    // database error out before we spend time on extraction.
    let mut db = Db::open(project_root)?;

    // Diff mode: validate prior collect, run selection, build the
    // nextest filter expression for the rerun set. Done after the listing
    // step so we use the same fingerprint for read and write — the list
    // step can update Cargo.lock, which would otherwise leave us reading
    // under one fingerprint and writing under another.
    //
    // The planner is read-only against the DB; any prune or row replacement
    // happens later in this function so the DB write surface stays in one
    // place.
    let diff_plan = if diff {
        match plan_diff_collect(project_root, &db, env_fingerprint, &listing)? {
            DiffOutcome::Plan(plan) => Some(plan),
            DiffOutcome::NothingToRecollect { listed } => {
                let pruned = db.prune_missing_tests(env_fingerprint, &listed)?;
                if pruned > 0 {
                    let s = if pruned == 1 { "" } else { "s" };
                    eprintln!("pruned {pruned} test{s} no longer present in nextest list");
                }
                eprintln!(
                    "done. nothing to recollect — no affected tests and no new tests \
                     ({:.1}s total)",
                    total_start.elapsed().as_secs_f64(),
                );
                return Ok(0);
            }
        }
    } else {
        None
    };

    // Build (or cache-hit) and run, with the runner shim wired up so each
    // test writes to its own per-test profraw directory.
    //
    // `--target-dir` routes build artifacts into target/affected/build/. The
    // build-script LLVM_PROFILE_FILE pattern lives at the target-dir root
    // so consumers can recover the target-dir via dirname(LLVM_PROFILE_FILE)
    // — same convention cargo-llvm-cov uses, which lets nextest setup-scripts
    // that build helper binaries match the runner's target-dir without
    // having to know cargo-affected's specific layout.
    eprintln!("running tests with cargo nextest run...");
    let mut cmd = Command::new("cargo");
    cmd.arg("nextest")
        .arg("run")
        .arg("--config")
        .arg(&runner_config)
        .arg("--target-dir")
        .arg(&build_dir)
        // `--no-tests=warn` so a filter that matches nothing real (every
        // selected test absent from the listing — common in `--diff` after
        // renames/deletions) doesn't make nextest exit non-zero. We
        // discriminate the legitimate "all phantoms" case from a build
        // failure in `handle_no_profraw_dirs` using the diff plan's
        // live-vs-phantom split, not nextest's exit code.
        .arg("--no-tests=warn")
        .env("RUSTFLAGS", &rustflags)
        .env("CARGO_AFFECTED_PROFRAW_BASE", &profraw_dir)
        // Catches build-script profraw before the runner shim kicks in for tests.
        .env("LLVM_PROFILE_FILE", build_dir.join("build-%p-%m.profraw"))
        .current_dir(project_root);
    let filter_config = match &diff_plan {
        Some(plan) => {
            let config = write_nextest_config(project_root, &plan.filter_expr())?;
            cmd.arg("--config-file").arg(&config);
            Some(config)
        }
        None => None,
    };
    for a in nextest_args {
        cmd.arg(a);
    }
    let status = cmd
        .status()
        .context("failed to run cargo nextest run")?;
    let nextest_exit = status.code().unwrap_or(1);
    if let Some(config) = &filter_config {
        // Best-effort cleanup; a stale file in gitignored target/ is harmless.
        let _ = std::fs::remove_file(config);
    }

    let test_dirs = list_test_dirs(&profraw_dir)?;
    let total = test_dirs.len();
    if total == 0 {
        let exit = handle_no_profraw_dirs(
            &mut db,
            env_fingerprint,
            diff_plan.as_ref(),
            nextest_exit,
            &profraw_dir,
        )?;
        remove_profraw_dir(&profraw_dir)?;
        return Ok(exit);
    }

    let num_workers = std::thread::available_parallelism()
        .map(|n| n.get())
        .unwrap_or(4);
    eprintln!("extracting coverage for {total} tests with {num_workers} workers...");

    let progress: Mutex<usize> = Mutex::new(0);
    let work: Mutex<VecDeque<(usize, PathBuf)>> =
        Mutex::new(test_dirs.into_iter().enumerate().collect());
    let mappings: Mutex<Vec<(TestId, BTreeSet<HitRange>)>> = Mutex::new(Vec::new());
    let extract_errors: Mutex<Vec<String>> = Mutex::new(Vec::new());

    std::thread::scope(|s| {
        for _ in 0..num_workers {
            s.spawn(|| loop {
                let Some((_idx, dir)) = work.lock().unwrap().pop_front() else {
                    break;
                };
                let t0 = Instant::now();
                let outcome = extract_one(&dir, &llvm_profdata, &llvm_cov, &canonical_root);
                let elapsed = t0.elapsed().as_secs_f64();
                let mut guard = progress.lock().unwrap();
                *guard += 1;
                let n = *guard;
                match outcome {
                    Ok(ExtractOutcome::Collected { test_id, mut ranges }) => {
                        let Some(pkg_ranges) =
                            crate_root_ranges_by_binary_id.get(&test_id.binary_id)
                        else {
                            eprintln!(
                                "[{n}/{total}] {}::{}: ERROR (binary_id is not a known \
                                 workspace target)",
                                test_id.binary_id, test_id.test_name
                            );
                            drop(guard);
                            extract_errors.lock().unwrap().push(format!(
                                "binary_id {:?} is not a known workspace target",
                                test_id.binary_id
                            ));
                            continue;
                        };
                        ranges.extend(pkg_ranges.iter().cloned());
                        eprintln!(
                            "[{n}/{total}] {}::{}: {} ranges ({elapsed:.1}s)",
                            test_id.binary_id,
                            test_id.test_name,
                            ranges.len()
                        );
                        drop(guard);
                        mappings.lock().unwrap().push((test_id, ranges));
                    }
                    Ok(ExtractOutcome::Skipped { test_id, reason }) => {
                        eprintln!(
                            "[{n}/{total}] {}::{}: SKIP ({reason})",
                            test_id.binary_id, test_id.test_name
                        );
                    }
                    Err(e) => {
                        eprintln!(
                            "[{n}/{total}] {}: ERROR ({e:#})",
                            dir.display()
                        );
                    }
                }
            });
        }
    });

    let extract_errors = extract_errors.into_inner().unwrap();
    if !extract_errors.is_empty() {
        bail!("coverage extraction failed:\n  {}", extract_errors.join("\n  "));
    }

    let mappings = mappings.into_inner().unwrap();

    let total_elapsed = total_start.elapsed();
    let region_count: usize = mappings.iter().map(|(_, r)| r.len()).sum();

    if let Some(plan) = diff_plan {
        eprintln!(
            "updating coverage for {} tests ({region_count} ranges)...",
            mappings.len()
        );
        db.update_coverage_for_tests(
            env_fingerprint,
            &fingerprint.components,
            &collect_sha,
            &mappings,
        )?;
        let pruned = db.prune_missing_tests(env_fingerprint, &plan.listed)?;
        if pruned > 0 {
            let s = if pruned == 1 { "" } else { "s" };
            eprintln!("pruned {pruned} test{s} no longer present in nextest list");
        }
    } else {
        eprintln!(
            "storing coverage for {} tests ({region_count} ranges)...",
            mappings.len()
        );
        db.store_coverage(
            env_fingerprint,
            &fingerprint.components,
            &collect_sha,
            &mappings,
        )?;
    }

    let evicted = db.gc(env_fingerprint, FINGERPRINT_KEEP)?;
    if evicted > 0 {
        let kept = db.fingerprint_count()?;
        let s = if evicted == 1 { "" } else { "s" };
        eprintln!("evicted {evicted} stale fingerprint{s} (kept {kept} of {FINGERPRINT_KEEP})");
    }

    eprintln!(
        "done. {} tests, {} ranges stored in target/affected/coverage.db ({:.1}s total)",
        mappings.len(),
        region_count,
        total_elapsed.as_secs_f64(),
    );
    remove_profraw_dir(&profraw_dir)?;
    Ok(nextest_exit)
}

/// Drop the per-collect profraw directory. The raw profile bundles inside
/// (~10 GB on a workspace this size) feed only the in-process `llvm-profdata`
/// → `llvm-cov` extraction; once the resulting hit-ranges land in
/// `coverage.db` they have no further use, and leaving them on disk wastes
/// space locally and overflows the GitHub Actions repo cache cap in CI.
/// Only called from the success paths — failed collects keep their bundles
/// for debugging.
fn remove_profraw_dir(profraw_dir: &Path) -> Result<()> {
    if !profraw_dir.exists() {
        return Ok(());
    }
    std::fs::remove_dir_all(profraw_dir)
        .with_context(|| format!("failed to remove profraw dir {}", profraw_dir.display()))
}

/// Plan for the rerun side of `collect --diff`: which tests to invoke
/// nextest with, and the full listing so a post-run prune can drop tests
/// that disappeared since the last collect.
struct DiffPlan {
    /// Tests selected for rerun — affected + new. Includes "phantoms":
    /// tests in the DB whose stored ranges overlap diff hunks but that no
    /// longer appear in the current nextest listing (renamed/deleted
    /// between collects). nextest filters those out at runtime; the
    /// `total == 0` recovery path uses the live/phantom split to tell
    /// "filter matched nothing real" apart from "runner shim failed".
    selected: BTreeSet<TestId>,
    /// Every test currently in `nextest list`. Drives prune (rows for
    /// renamed/deleted tests) and the live-vs-phantom check above.
    listed: BTreeSet<TestId>,
}

impl DiffPlan {
    /// Nextest filterset expression matching every selected test, including
    /// phantoms — nextest will silently match nothing for those.
    fn filter_expr(&self) -> String {
        let v: Vec<TestId> = self.selected.iter().cloned().collect();
        nextest_filter_expr(&v)
    }

    /// Selected tests that nextest can actually run (those present in the
    /// current listing). Used to distinguish the all-phantoms case from a
    /// runner-shim failure when extraction yields no profraw dirs.
    fn live_selected_count(&self) -> usize {
        self.selected.iter().filter(|t| self.listed.contains(t)).count()
    }
}

/// Result of the `collect --diff` preflight. The variant tells `collect`
/// whether to invoke nextest or short-circuit, and either way the listing
/// (carried as `listed`) drives the post-step prune.
enum DiffOutcome {
    /// Selection picked at least one test — invoke nextest with `Plan.filter_expr`,
    /// then write rows and prune.
    Plan(DiffPlan),
    /// Nothing to recollect — no affected tests, no new tests. Caller still
    /// runs prune so renamed/deleted tests' rows go away.
    NothingToRecollect {
        listed: BTreeSet<TestId>,
    },
}

/// Run the diff-mode preflight + selection. Read-only against `db` — any
/// row replacement or prune happens at the `collect` call site so the DB
/// write surface stays in one place. Bails out on fingerprint mismatch
/// (no stored coverage) or every-sha-missing (nothing reachable to query).
fn plan_diff_collect(
    project_root: &Path,
    db: &Db,
    env_fingerprint: &str,
    listing: &Listing,
) -> Result<DiffOutcome> {
    let prior_shas = db.collect_shas(env_fingerprint)?;
    if prior_shas.is_empty() {
        bail!(
            "--diff requires a prior `cargo affected collect` for the \
             current environment (Cargo.lock / rustc / build flags); \
             no stored coverage matches"
        );
    }
    let reach = selection::check_shas_reachable(project_root, &prior_shas)?;
    if !reach.missing.is_empty() {
        eprintln!(
            "{}",
            selection::missing_shas_notice(
                &reach.missing,
                "will be rerun and re-anchored at the new HEAD",
            ),
        );
    }
    if reach.reachable.is_empty() {
        bail!(
            "no reachable collect_sha for the current environment (every \
             stored sha is rebased away or otherwise unreachable from HEAD); \
             run `cargo affected collect` to re-anchor"
        );
    }
    if reach.max_commits_ahead > 0 {
        eprintln!(
            "note: {} commit(s) since prior collect — \
             re-anchoring affected tests at the new HEAD",
            reach.max_commits_ahead,
        );
    }

    let sel = selection::select_with_reach(
        project_root,
        db,
        env_fingerprint,
        listing,
        &reach,
        selection::DiagnosticDetail::Summary,
    )?;
    let selected = sel.selected();
    if selected.is_empty() {
        return Ok(DiffOutcome::NothingToRecollect { listed: sel.listed });
    }

    eprintln!("\n{}\n", selection::format_summary(&sel, "to recollect", false));
    Ok(DiffOutcome::Plan(DiffPlan {
        selected,
        listed: sel.listed,
    }))
}

/// Recovery for the case where nextest run produced no per-test profraw
/// directories. Discriminates three buckets so the user gets an actionable
/// message instead of a generic "no profraws" line:
///
/// - **Build or test failure** (nextest exited non-zero) — bail and let
///   nextest's own output explain. We pass `--no-tests=warn` to nextest so
///   "filter matched nothing" doesn't fall in here.
/// - **All-phantom selection** (`--diff` mode, every selected test absent
///   from the current listing) — expected when tests were renamed/deleted
///   between collects. Prune the stale rows and exit 0.
/// - **Runner shim didn't fire** (live tests should have run but no
///   per-test dirs appeared) — bail with a diagnostic pointing at the
///   shim. This is the case where nextest claims success but our
///   instrumentation never engaged.
fn handle_no_profraw_dirs(
    db: &mut Db,
    env_fingerprint: &str,
    diff_plan: Option<&DiffPlan>,
    nextest_exit: i32,
    profraw_dir: &Path,
) -> Result<i32> {
    if nextest_exit != 0 {
        bail!(
            "nextest exited with code {nextest_exit} and produced no per-test \
             profraw directories under {} — build or test failure (see nextest \
             output above)",
            profraw_dir.display(),
        );
    }

    if let Some(plan) = diff_plan {
        let live = plan.live_selected_count();
        if live > 0 {
            // nextest exited 0 with live tests in the filter, but no
            // per-test dirs appeared — those should each have one. The
            // runner shim must have failed to fire.
            bail!(
                "nextest exited 0 but {live} of {} selected tests should have \
                 been instrumented — no per-test profraw directories appeared \
                 under {}; the runner shim may have failed to fire",
                plan.selected.len(),
                profraw_dir.display(),
            );
        }
        eprintln!(
            "no tests rerun: every selected test is absent from the current \
             nextest listing (renamed or deleted between collects)"
        );
        let pruned = db.prune_missing_tests(env_fingerprint, &plan.listed)?;
        if pruned > 0 {
            let s = if pruned == 1 { "" } else { "s" };
            eprintln!("pruned {pruned} test{s} no longer present in nextest list");
        }
        return Ok(0);
    }

    // Full collect with no profraws and no diff plan: either the project
    // has no tests at all (nextest's `--no-tests=warn` lets us distinguish
    // this from a hard failure) or the shim never fired. We can't tell
    // apart from here without re-listing, so default to the more likely
    // explanation in this codepath — empty suite — and surface a hint.
    eprintln!(
        "no per-test profraw directories under {}\
         project may have no tests, or the runner shim may have failed to fire",
        profraw_dir.display(),
    );
    Ok(0)
}

/// Outcome of coverage extraction for a single per-test directory.
enum ExtractOutcome {
    Collected {
        test_id: TestId,
        ranges: BTreeSet<HitRange>,
    },
    Skipped {
        test_id: TestId,
        reason: String,
    },
}

/// Merge profraws in `dir` and export coverage.
///
/// Reads the `meta` sidecar the shim wrote (test name + binary path +
/// binary_id) so we know exactly which binary to pass to `llvm-cov export`
/// and how to store the result in the DB.
fn extract_one(
    dir: &Path,
    llvm_profdata: &Path,
    llvm_cov: &Path,
    canonical_root: &Path,
) -> Result<ExtractOutcome> {
    let meta = std::fs::read_to_string(dir.join("meta"))
        .with_context(|| format!("reading sidecar {}/meta", dir.display()))?;
    let mut lines = meta.lines();
    let test_name = lines
        .next()
        .context("empty meta sidecar")?
        .to_string();
    let binary = lines
        .next()
        .context("meta sidecar missing binary path")?
        .to_string();
    let binary_id = lines
        .next()
        .context("meta sidecar missing binary_id")?
        .to_string();
    let test_id = TestId::new(binary_id, test_name);
    let binary = PathBuf::from(binary);

    let profraw_files = list_profraw_files(dir)?;
    if profraw_files.is_empty() {
        return Ok(ExtractOutcome::Skipped {
            test_id,
            reason: "no profraw generated".into(),
        });
    }

    let profdata_path = dir.join("coverage.profdata");
    let mut merge_cmd = Command::new(llvm_profdata);
    merge_cmd.arg("merge").arg("--sparse");
    for f in &profraw_files {
        merge_cmd.arg(f);
    }
    merge_cmd.arg("-o").arg(&profdata_path);
    let merge_output = merge_cmd
        .output()
        .context("failed to run llvm-profdata merge")?;
    if !merge_output.status.success() {
        return Ok(ExtractOutcome::Skipped {
            test_id,
            reason: format!(
                "llvm-profdata merge failed: {}",
                String::from_utf8_lossy(&merge_output.stderr).trim()
            ),
        });
    }

    // POSIX ERE — no negative lookahead, so we enumerate prefixes to drop.
    // The filter shrinks `files[]` (1234 → 113 on a worktrunk-scale test) but
    // doesn't shrink `functions[]`, which is the bulk of the JSON. We still
    // re-filter in coverage.rs via `strip_prefix(project_root)` — this regex
    // is the cheap pre-filter, project-root strip is the authoritative gate.
    let export_output = Command::new(llvm_cov)
        .arg("export")
        .arg("--format=text")
        .arg(format!("--instr-profile={}", profdata_path.display()))
        .arg("--ignore-filename-regex=/rustc/|/\\.cargo/|/target/")
        .arg(&binary)
        .output()
        .context("failed to run llvm-cov export")?;
    if !export_output.status.success() {
        return Ok(ExtractOutcome::Skipped {
            test_id,
            reason: format!(
                "llvm-cov export failed: {}",
                String::from_utf8_lossy(&export_output.stderr).trim()
            ),
        });
    }

    let json = String::from_utf8_lossy(&export_output.stdout);
    match coverage::extract_hit_ranges(&json, canonical_root) {
        Ok(ranges) => Ok(ExtractOutcome::Collected { test_id, ranges }),
        Err(e) => Ok(ExtractOutcome::Skipped {
            test_id,
            reason: format!("parse error: {e}"),
        }),
    }
}

/// Build a single nextest filterset expression matching exactly the given
/// tests, grouped by `binary_id`. The result has the form
/// `(binary_id(=X) & (test(=a) | test(=b))) | (binary_id(=Y) & (test(=c)))`.
/// Empty input yields `none()` — a valid filterset that matches nothing.
///
/// `binary_id()` (not `binary()`) is the right predicate: the latter matches
/// the short binary name (e.g. `builds`) and so doesn't disambiguate
/// same-named binaries across workspace crates.
///
/// The expression can be arbitrarily long — it reaches nextest as a
/// `default-filter` in a config file (see [`write_nextest_config`]), never as
/// an inline command-line argument, so no OS argv-length limit applies.
pub(crate) fn nextest_filter_expr(tests: &[TestId]) -> String {
    if tests.is_empty() {
        return "none()".to_string();
    }
    let mut by_binary: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
    for t in tests {
        by_binary
            .entry(t.binary_id.as_str())
            .or_default()
            .push(t.test_name.as_str());
    }
    by_binary
        .into_iter()
        .map(|(binary_id, names)| {
            let inner = names
                .iter()
                .map(|n| format!("test(={n})"))
                .collect::<Vec<_>>()
                .join(" | ");
            format!("(binary_id(={binary_id}) & ({inner}))")
        })
        .collect::<Vec<_>>()
        .join(" | ")
}

/// Write the nextest config file that pins the run to `filter_expr`, and
/// return its absolute path for passing to nextest via `--config-file`.
///
/// The affected-test selection reaches nextest as a `default-filter` inside a
/// config file rather than as inline `-E` arguments. A large affected set
/// built an `-E` argument list megabytes long, which overflowed Windows'
/// ~32 KB `CreateProcess` command-line limit (`os error 206`). A config file
/// has no such limit: the command line stays a fixed `--config-file <path>`
/// no matter how many tests are selected.
///
/// `--config-file` replaces nextest's repo-config slot
/// (`<workspace>/.config/nextest.toml`), so the project's own config — if any
/// — is merged in: every key it sets is preserved and only
/// `[profile.default].default-filter` is touched, keeping the project's
/// profiles, setup-scripts, timeouts and JUnit settings intact. When the
/// project already sets `default-filter`, the selection is intersected with
/// it so the effective set matches the old inline-`-E` behavior (`-E` was
/// likewise intersected with the default filter).
pub(crate) fn write_nextest_config(project_root: &Path, filter_expr: &str) -> Result<PathBuf> {
    let project_config = project_root.join(".config").join("nextest.toml");
    let existing = match std::fs::read_to_string(&project_config) {
        Ok(s) => s,
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
        Err(e) => {
            return Err(e)
                .with_context(|| format!("failed to read {}", project_config.display()))
        }
    };
    let mut doc: toml_edit::DocumentMut = existing
        .parse()
        .with_context(|| format!("failed to parse {}", project_config.display()))?;

    let filter = match doc
        .get("profile")
        .and_then(|p| p.get("default"))
        .and_then(|d| d.get("default-filter"))
        .and_then(|v| v.as_str())
    {
        Some(existing) => format!("({filter_expr}) & ({existing})"),
        None => filter_expr.to_string(),
    };
    doc["profile"]["default"]["default-filter"] = toml_edit::value(filter);

    let dir = affected_dir(project_root);
    std::fs::create_dir_all(&dir).context("failed to create target/affected dir")?;
    // PID-suffixed so two concurrent `cargo affected` runs can't overwrite
    // each other's selection between writing the file and nextest reading it
    // — the same reasoning as the `profraw-<pid>` staging dir. The caller
    // removes it once nextest exits.
    let path = dir.join(format!("nextest-config-{}.toml", std::process::id()));
    std::fs::write(&path, doc.to_string())
        .with_context(|| format!("failed to write {}", path.display()))?;
    Ok(path)
}

/// Boolean cargo build flags accepted by both `cargo nextest list` and
/// `cargo nextest run` — no value token follows.
const BUILD_FLAGS_BARE: &[&str] = &[
    "--workspace",
    "--all",
    "--lib",
    "--bins",
    "--examples",
    "--tests",
    "--benches",
    "--all-targets",
    "--all-features",
    "--no-default-features",
    "--release",
    "-r",
    "--frozen",
    "--locked",
    "--offline",
    "--ignore-rust-version",
    "--future-incompat-report",
    "--unit-graph",
];

/// Long cargo build flags that consume a value — `--flag value` or the
/// joined `--flag=value`.
///
/// `--target-dir` is deliberately absent: it changes only where artifacts
/// land, not which tests exist, and `collect` already passes its own
/// `--target-dir` to `nextest_list` — forwarding a second one would make
/// `cargo nextest list` reject the duplicate.
const BUILD_FLAGS_VALUED: &[&str] = &[
    "--package",
    "--exclude",
    "--bin",
    "--example",
    "--test",
    "--bench",
    "--features",
    "--cargo-profile",
    "--target",
    "--manifest-path",
    "--build-jobs",
    "--config",
];

/// Short cargo build flags that consume a value — `-p mycrate` or the
/// joined `-pmycrate`.
const BUILD_FLAGS_SHORT_VALUED: &[&str] = &["-p", "-F", "-Z"];

/// Extract the cargo *build* flags from the post-`--` passthrough so the
/// `cargo nextest list` used for new-test detection builds the same test set
/// as the eventual `cargo nextest run`.
///
/// `list` and `run` share cargo's build options (`--features`, `-p`,
/// `--release`, …) but `run` adds runner/reporter options (`--retries`,
/// `--no-fail-fast`, `--no-tests`, …) that `list` rejects outright.
/// Forwarding the whole passthrough to `list` would break on any of those;
/// forwarding nothing lists a feature-less build while `run` builds with the
/// user's features, so "listed minus DB = new" compares two different test
/// sets. Hence an allowlist of the build flags — anything else (run-only
/// flags, test-name filters, positionals) is dropped: it either doesn't
/// affect which test binaries get built or `list` wouldn't accept it.
pub(crate) fn cargo_build_args(nextest_args: &[String]) -> Vec<String> {
    let mut out = Vec::new();
    let mut iter = nextest_args.iter();
    while let Some(arg) = iter.next() {
        let name = arg.split('=').next().unwrap_or(arg);
        if BUILD_FLAGS_BARE.contains(&name) {
            out.push(arg.clone());
        } else if BUILD_FLAGS_VALUED.contains(&name) {
            out.push(arg.clone());
            // `--flag value` carries the value in the next token;
            // `--flag=value` carries it inline.
            if !arg.contains('=') {
                if let Some(value) = iter.next() {
                    out.push(value.clone());
                }
            }
        } else if BUILD_FLAGS_SHORT_VALUED.contains(&arg.as_str()) {
            out.push(arg.clone());
            if let Some(value) = iter.next() {
                out.push(value.clone());
            }
        } else if BUILD_FLAGS_SHORT_VALUED.iter().any(|s| arg.starts_with(*s)) {
            // Joined short form: `-pmycrate`, `-Ffeature`.
            out.push(arg.clone());
        }
    }
    out
}

/// Result of `cargo nextest list`: every testcase as a (binary_id, test_name)
/// pair, the subset that is ignored, plus per-binary metadata.
pub(crate) struct Listing {
    /// Every testcase nextest enumerated, ignored or not. The complete set —
    /// `collect --diff` prunes DB rows against it, so a merely-ignored test
    /// must stay in here or its rows would be dropped.
    pub(crate) tests: Vec<TestId>,
    /// Subset of `tests` that nextest reports as `#[ignore]`d on this
    /// platform (covers conditional `#[cfg_attr(.., ignore)]` too). These
    /// are skipped by `cargo nextest run`, so they never gain coverage;
    /// new-test detection must exclude them or they read as "new" forever.
    pub(crate) ignored: BTreeSet<TestId>,
    pub(crate) binaries: Vec<BinaryEntry>,
}

/// One binary in nextest's listing. Carried for the suite-level count
/// surfaced in collect's progress output; the runner shim sources binary_id
/// directly from `NEXTEST_BINARY_ID` at test time.
#[derive(Debug, Clone)]
pub(crate) struct BinaryEntry {
    #[allow(dead_code)]
    pub(crate) binary_id: String,
}

/// Enumerate all tests via `cargo nextest list --message-format json`.
///
/// `rustflags_override` sets RUSTFLAGS in the child env (collect passes
/// `-C instrument-coverage`; run/status leave it `None` to inherit the user's
/// environment). `build_dir`, when set, routes build artifacts into that
/// directory (via `--target-dir`) and points LLVM_PROFILE_FILE at the same
/// directory so build-script profraws land alongside cargo's debug/ tree
/// rather than in the project root. Only collect passes this — run/status
/// reuse the user's default target/.
///
/// `build_args` are the cargo build flags (`--features`, `-p`, …) extracted
/// from the post-`--` passthrough by [`cargo_build_args`]. They must match
/// the build config of the subsequent `cargo nextest run`, or the listing
/// enumerates a different test set than the run builds and new-test
/// detection ("listed minus DB") becomes unsound.
pub(crate) fn nextest_list(
    project_root: &Path,
    rustflags_override: Option<&str>,
    build_dir: Option<&Path>,
    build_args: &[String],
) -> Result<Listing> {
    let mut cmd = Command::new("cargo");
    cmd.arg("nextest")
        .arg("list")
        .arg("--message-format")
        .arg("json")
        .stdout(Stdio::piped())
        .stderr(Stdio::inherit())
        .current_dir(project_root);
    if let Some(rf) = rustflags_override {
        cmd.env("RUSTFLAGS", rf);
    }
    if let Some(dir) = build_dir {
        cmd.arg("--target-dir").arg(dir);
        cmd.env("LLVM_PROFILE_FILE", dir.join("build-%p-%m.profraw"));
    }
    for a in build_args {
        cmd.arg(a);
    }
    let output = cmd
        .spawn()
        .context("failed to spawn cargo nextest list")?
        .wait_with_output()
        .context("failed to wait for cargo nextest list")?;
    if !output.status.success() {
        bail!("cargo nextest list failed (exit {:?})", output.status.code());
    }

    let stdout = std::str::from_utf8(&output.stdout)
        .context("cargo nextest list stdout was not valid UTF-8")?;
    let json: serde_json::Value =
        serde_json::from_str(stdout).context("failed to parse nextest list JSON")?;

    let mut tests = BTreeSet::new();
    let mut ignored = BTreeSet::new();
    let mut binaries = Vec::new();
    if let Some(suites) = json.get("rust-suites").and_then(|v| v.as_object()) {
        for suite in suites.values() {
            let binary_id = suite
                .get("binary-id")
                .and_then(|v| v.as_str())
                .context("nextest list entry missing binary-id")?
                .to_string();
            binaries.push(BinaryEntry {
                binary_id: binary_id.clone(),
            });
            let Some(cases) = suite.get("testcases").and_then(|v| v.as_object()) else {
                continue;
            };
            for (name, case) in cases {
                let test_id = TestId::new(binary_id.clone(), name.clone());
                let is_ignored = case
                    .get("ignored")
                    .and_then(|v| v.as_bool())
                    .context("nextest list testcase missing `ignored` flag")?;
                if is_ignored {
                    ignored.insert(test_id.clone());
                }
                tests.insert(test_id);
            }
        }
    }
    Ok(Listing {
        tests: tests.into_iter().collect(),
        ignored,
        binaries,
    })
}

/// List subdirectories of `profraw_dir` that look like per-test output
/// (contain a `meta` sidecar).
///
/// The shim writes per-test dirs at `profraw_dir/<binary_id>/<test_name>/`,
/// so we walk two levels. Splitting binary_id and test_name into separate
/// path components avoids the collision case where sanitization collapses
/// `::` into `_` and two genuinely-distinct (binary_id, test_name) pairs
/// produce the same single-level name.
fn list_test_dirs(profraw_dir: &Path) -> Result<Vec<PathBuf>> {
    let mut dirs = Vec::new();
    for binary_entry in std::fs::read_dir(profraw_dir)? {
        let binary_entry = binary_entry?;
        let binary_path = binary_entry.path();
        if !binary_path.is_dir() {
            continue;
        }
        for test_entry in std::fs::read_dir(&binary_path)? {
            let test_entry = test_entry?;
            let test_path = test_entry.path();
            if test_path.is_dir() && test_path.join("meta").exists() {
                dirs.push(test_path);
            }
        }
    }
    dirs.sort();
    Ok(dirs)
}

/// List all .profraw files in the given directory.
fn list_profraw_files(dir: &Path) -> Result<Vec<PathBuf>> {
    let mut files = Vec::new();
    for entry in std::fs::read_dir(dir)? {
        let entry = entry?;
        let path = entry.path();
        if path.extension().is_some_and(|e| e == "profraw") {
            files.push(path);
        }
    }
    Ok(files)
}

/// Ensure `cargo nextest` is available and recent enough that it sets
/// `NEXTEST_BINARY_ID` per test invocation (the runner shim relies on it
/// to attribute coverage). Fails with an install hint otherwise.
pub(crate) fn require_nextest(project_root: &Path) -> Result<()> {
    let output = Command::new("cargo")
        .arg("nextest")
        .arg("--version")
        .current_dir(project_root)
        .stderr(std::process::Stdio::null())
        .output();
    let stdout = match output {
        Ok(o) if o.status.success() => o.stdout,
        _ => bail!(
            "cargo-affected requires cargo-nextest >= {MIN_NEXTEST_VERSION}. \
             Install it with `cargo install cargo-nextest --locked`."
        ),
    };
    // `cargo nextest --version` prints `cargo-nextest 0.9.132 (...)`. Pull
    // out the second whitespace-separated field on the first line.
    let line = std::str::from_utf8(&stdout).unwrap_or_default().lines().next().unwrap_or("");
    let version = line.split_whitespace().nth(1).unwrap_or("");
    if !nextest_version_at_least(version, MIN_NEXTEST_VERSION) {
        bail!(
            "cargo-affected requires cargo-nextest >= {MIN_NEXTEST_VERSION} \
             (NEXTEST_BINARY_ID env support); found {:?}. \
             Upgrade with `cargo install cargo-nextest --locked`.",
            version,
        );
    }
    Ok(())
}

/// First nextest release that sets `NEXTEST_BINARY_ID` (and `NEXTEST_TEST_NAME`).
const MIN_NEXTEST_VERSION: &str = "0.9.116";

/// Compare `actual` >= `required` using semver-ish dotted-number ordering.
/// Trailing pre-release/build metadata after `-` or `+` is ignored.
/// Conservative: any unparseable version is treated as too old.
fn nextest_version_at_least(actual: &str, required: &str) -> bool {
    fn parts(v: &str) -> Option<Vec<u64>> {
        v.split(['-', '+']).next()?
            .split('.')
            .map(|p| p.parse().ok())
            .collect()
    }
    match (parts(actual), parts(required)) {
        (Some(a), Some(r)) => a >= r,
        _ => false,
    }
}

/// Find an LLVM tool by name.
fn find_llvm_tool(name: &str) -> Result<PathBuf> {
    // `EXE_SUFFIX` is `.exe` on Windows and empty elsewhere — `llvm-tools`
    // ships as `llvm-cov.exe` / `llvm-profdata.exe` on `*-windows-msvc`, so
    // probing the bare name finds nothing without it.
    let exe_name = format!("{name}{}", std::env::consts::EXE_SUFFIX);
    if let Ok(output) = Command::new("rustc").arg("--print").arg("sysroot").output() {
        if output.status.success() {
            let sysroot = String::from_utf8_lossy(&output.stdout).trim().to_string();
            let tool_path = PathBuf::from(&sysroot)
                .join("lib")
                .join("rustlib")
                .join(current_target())
                .join("bin")
                .join(&exe_name);
            if tool_path.exists() {
                return Ok(tool_path);
            }
        }
    }

    #[cfg(target_os = "macos")]
    if let Ok(output) = Command::new("xcrun").arg("--find").arg(name).output() {
        if output.status.success() {
            let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
            if !path.is_empty() {
                return Ok(PathBuf::from(path));
            }
        }
    }

    // PATH lookup. `which` on unix, `where` on Windows — both print the
    // resolved path on stdout. `where` may print multiple matches one per
    // line; take the first.
    let which_cmd = if cfg!(windows) { "where" } else { "which" };
    if let Ok(output) = Command::new(which_cmd).arg(&exe_name).output() {
        if output.status.success() {
            let stdout = String::from_utf8_lossy(&output.stdout);
            let path = stdout.lines().next().unwrap_or("").trim().to_string();
            if !path.is_empty() {
                return Ok(PathBuf::from(path));
            }
        }
    }

    bail!(
        "could not find {name}. Install `llvm-tools` via `rustup component add llvm-tools` \
         or ensure {name} is on PATH"
    )
}

/// Build the value for `--config target.<triple>.runner=…` as a TOML
/// array literal. The array form is required because cargo only
/// whitespace-splits the legacy env-var form (`CARGO_TARGET_<TRIPLE>_RUNNER`),
/// which silently breaks any binary path containing a space — common on
/// Windows (`C:\Users\Joe Smith\…`) and macOS
/// (`~/Library/Application Support/…`). Inside the array each element is a
/// TOML basic string, so `\` and `"` need escaping; nothing else realistic
/// in a filesystem path does.
fn format_runner_config(target_triple: &str, self_path: &Path) -> String {
    let escaped = self_path
        .to_string_lossy()
        .replace('\\', r"\\")
        .replace('"', "\\\"");
    format!(r#"target.{target_triple}.runner=["{escaped}", "runner-shim"]"#)
}

/// Get the current rustc target triple.
fn current_target() -> String {
    Command::new("rustc")
        .arg("-vV")
        .output()
        .ok()
        .and_then(|o| {
            let stdout = String::from_utf8_lossy(&o.stdout).to_string();
            stdout
                .lines()
                .find(|l| l.starts_with("host:"))
                .map(|l| l.trim_start_matches("host:").trim().to_string())
        })
        .unwrap_or_else(|| "unknown".to_string())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn filter_expr_empty_matches_nothing() {
        assert_eq!(nextest_filter_expr(&[]), "none()");
    }

    #[test]
    fn filter_expr_groups_by_binary() {
        let tests = vec![
            TestId::new("mock-stub::builds", "builds"),
            TestId::new("wt-perf::builds", "builds"),
            TestId::new("worktrunk", "utils::tests::test_x"),
            TestId::new("worktrunk", "utils::tests::test_y"),
        ];
        assert_eq!(
            nextest_filter_expr(&tests),
            "(binary_id(=mock-stub::builds) & (test(=builds))) | \
             (binary_id(=worktrunk) & (test(=utils::tests::test_x) | test(=utils::tests::test_y))) | \
             (binary_id(=wt-perf::builds) & (test(=builds)))",
        );
    }

    #[test]
    fn cargo_build_args_keeps_build_flags_drops_run_only() {
        let args: Vec<String> = [
            "--features",
            "shell-integration-tests",
            "--no-fail-fast",
            "--retries",
            "2",
            "--release",
            "--no-tests=warn",
        ]
        .iter()
        .map(|s| s.to_string())
        .collect();
        // `--features <value>` and `--release` survive; the run-only flags —
        // and `--retries`'s separate value token — are dropped.
        assert_eq!(
            cargo_build_args(&args),
            vec!["--features", "shell-integration-tests", "--release"],
        );
    }

    #[test]
    fn cargo_build_args_handles_joined_and_short_forms() {
        let args: Vec<String> = [
            "--features=a,b",
            "-p",
            "mycrate",
            "-r",
            "--max-fail=3",
            "some_test_filter",
        ]
        .iter()
        .map(|s| s.to_string())
        .collect();
        // `--flag=value`, `-p <value>`, and the `-r` short flag are build
        // args; `--max-fail=3` is run-only and the bare positional filter is
        // neither — both dropped.
        assert_eq!(
            cargo_build_args(&args),
            vec!["--features=a,b", "-p", "mycrate", "-r"],
        );
    }

    #[test]
    fn cargo_build_args_empty() {
        assert!(cargo_build_args(&[]).is_empty());
    }

    /// Regression for the Windows command-line overflow: a large affected set
    /// produced an `-E` argument list that blew past Windows' ~32 KB
    /// `CreateProcess` limit (`os error 206`). The selection now travels in a
    /// config file, so the filterset can be arbitrarily large while the
    /// command line stays a fixed `--config-file <path>`.
    #[test]
    fn large_selection_travels_in_config_file_not_argv() {
        let names: Vec<String> = (0..3000)
            .map(|i| format!("really_long_test_name_for_overflow_check_{i}"))
            .collect();
        let tests: Vec<TestId> = names.iter().map(|n| TestId::new("worktrunk", n)).collect();
        let expr = nextest_filter_expr(&tests);
        // The filterset itself dwarfs Windows' 32 KB command-line limit...
        assert!(
            expr.len() > 32 * 1024,
            "expected a large filterset, got {} bytes",
            expr.len()
        );
        // ...but it reaches nextest through a config file. The only thing
        // that lands on the command line is `--config-file <path>`, whose
        // length is bounded by the path, not the test count.
        let dir = tempfile::tempdir().unwrap();
        let config = write_nextest_config(dir.path(), &expr).unwrap();
        assert!(config.starts_with(dir.path()));
        let written = std::fs::read_to_string(&config).unwrap();
        for n in &names {
            assert!(written.contains(&format!("test(={n})")), "missing {n}");
        }
    }

    /// `write_nextest_config` overrides only `default-filter`; every other
    /// setting in the project's own `.config/nextest.toml` is carried through
    /// so its profiles, setup-scripts and timeouts still apply. The fixture
    /// mirrors the shape of a real consumer's config — top-level
    /// `experimental`, setup scripts, and an array-of-tables script binding —
    /// since `--config-file` replacing the repo config slot would otherwise
    /// silently drop all of it.
    #[test]
    fn write_nextest_config_preserves_project_settings() {
        let dir = tempfile::tempdir().unwrap();
        let config_dir = dir.path().join(".config");
        std::fs::create_dir_all(&config_dir).unwrap();
        std::fs::write(
            config_dir.join("nextest.toml"),
            "experimental = [\"setup-scripts\"]\n\
             \n\
             [profile.default]\n\
             slow-timeout = \"60s\"\n\
             \n\
             [profile.ci]\n\
             retries = 2\n\
             \n\
             [scripts.setup.build-bins]\n\
             command = [\"bash\", \"-c\", \"true\"]\n\
             \n\
             [[profile.default.scripts]]\n\
             filter = \"binary(integration)\"\n\
             setup = \"build-bins\"\n",
        )
        .unwrap();
        let config = write_nextest_config(dir.path(), "binary_id(=x) & test(=y)").unwrap();
        let doc: toml_edit::DocumentMut =
            std::fs::read_to_string(&config).unwrap().parse().unwrap();
        // Our selection landed under [profile.default].
        assert_eq!(
            doc["profile"]["default"]["default-filter"].as_str().unwrap(),
            "binary_id(=x) & test(=y)",
        );
        // The project's own settings survived untouched.
        assert_eq!(doc["experimental"].as_array().unwrap().len(), 1);
        assert_eq!(
            doc["profile"]["default"]["slow-timeout"].as_str().unwrap(),
            "60s",
        );
        assert_eq!(doc["profile"]["ci"]["retries"].as_integer().unwrap(), 2);
        assert!(doc["scripts"]["setup"]["build-bins"]["command"].is_array());
        assert_eq!(
            doc["profile"]["default"]["scripts"].as_array_of_tables().unwrap().len(),
            1,
        );
    }

    /// A project that already sets `default-filter` keeps it: the selection
    /// is intersected with it, matching the old inline-`-E` behavior (`-E`
    /// was intersected with the default filter).
    #[test]
    fn write_nextest_config_intersects_existing_default_filter() {
        let dir = tempfile::tempdir().unwrap();
        let config_dir = dir.path().join(".config");
        std::fs::create_dir_all(&config_dir).unwrap();
        std::fs::write(
            config_dir.join("nextest.toml"),
            "[profile.default]\ndefault-filter = \"not test(slow)\"\n",
        )
        .unwrap();
        let config = write_nextest_config(dir.path(), "test(=y)").unwrap();
        let doc: toml_edit::DocumentMut =
            std::fs::read_to_string(&config).unwrap().parse().unwrap();
        assert_eq!(
            doc["profile"]["default"]["default-filter"].as_str().unwrap(),
            "(test(=y)) & (not test(slow))",
        );
    }

    /// With no project config, a fresh one is generated carrying just the
    /// selection. The filename carries the process id so concurrent
    /// invocations write to distinct files.
    #[test]
    fn write_nextest_config_without_project_config() {
        let dir = tempfile::tempdir().unwrap();
        let config = write_nextest_config(dir.path(), "test(=solo)").unwrap();
        assert!(config
            .file_name()
            .unwrap()
            .to_str()
            .unwrap()
            .contains(&std::process::id().to_string()));
        let doc: toml_edit::DocumentMut =
            std::fs::read_to_string(&config).unwrap().parse().unwrap();
        assert_eq!(
            doc["profile"]["default"]["default-filter"].as_str().unwrap(),
            "test(=solo)",
        );
    }

    #[test]
    fn runner_config_uses_toml_array_form() {
        let path = PathBuf::from("/usr/local/bin/cargo-affected");
        assert_eq!(
            format_runner_config("aarch64-apple-darwin", &path),
            r#"target.aarch64-apple-darwin.runner=["/usr/local/bin/cargo-affected", "runner-shim"]"#,
        );
    }

    /// Spaces in the binary path are why we use the array form — cargo's
    /// `CARGO_TARGET_<TRIPLE>_RUNNER` env var only whitespace-splits, so a
    /// path containing a space would be mis-tokenised.
    #[test]
    fn runner_config_preserves_spaces_in_path() {
        let path = PathBuf::from("/Users/Joe Smith/.cargo/bin/cargo-affected");
        assert_eq!(
            format_runner_config("aarch64-apple-darwin", &path),
            r#"target.aarch64-apple-darwin.runner=["/Users/Joe Smith/.cargo/bin/cargo-affected", "runner-shim"]"#,
        );
    }

    /// Windows paths bring backslashes and (in pathological cases)
    /// double-quotes; both need TOML basic-string escaping inside the array.
    #[test]
    fn runner_config_escapes_backslashes_and_quotes() {
        let path = PathBuf::from(r#"C:\Users\Joe "Q" Smith\cargo-affected.exe"#);
        assert_eq!(
            format_runner_config("x86_64-pc-windows-msvc", &path),
            r#"target.x86_64-pc-windows-msvc.runner=["C:\\Users\\Joe \"Q\" Smith\\cargo-affected.exe", "runner-shim"]"#,
        );
    }

    #[test]
    fn nextest_version_compares_dotted_numbers() {
        assert!(nextest_version_at_least("0.9.116", "0.9.116"));
        assert!(nextest_version_at_least("0.9.132", "0.9.116"));
        assert!(nextest_version_at_least("0.10.0", "0.9.116"));
        assert!(nextest_version_at_least("1.0.0", "0.9.116"));
        assert!(!nextest_version_at_least("0.9.115", "0.9.116"));
        assert!(!nextest_version_at_least("0.9.99", "0.9.116"));
        assert!(!nextest_version_at_least("0.8.999", "0.9.116"));
        // Pre-release / build metadata after `-`/`+` is ignored.
        assert!(nextest_version_at_least("0.9.132-dev", "0.9.116"));
        assert!(nextest_version_at_least("0.9.116+sha.abc", "0.9.116"));
        // Unparseable: conservative — treat as too old.
        assert!(!nextest_version_at_least("garbage", "0.9.116"));
        assert!(!nextest_version_at_least("", "0.9.116"));
    }
}