ktstr 0.4.16

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
//! Generalized test payload — scheduler or binary workload.
//!
//! [`Payload`] is the primitive that `#[ktstr_test]` consumes for both
//! the scheduler slot and the optional binary/workload slots. A
//! payload's [`PayloadKind`] determines how it's launched: a
//! [`Scheduler`](crate::test_support::Scheduler) reference invokes the
//! existing scheduler-spawn path; a bare binary name spawns the binary
//! via the runtime [`PayloadRun`](crate::scenario::payload_run::PayloadRun)
//! builder.
//!
//! The constants this module exposes — particularly
//! [`Payload::KERNEL_DEFAULT`] — are used as the default scheduler
//! slot when no `scheduler = ...` attribute is supplied on a
//! `#[ktstr_test]`. `KERNEL_DEFAULT` wraps whatever scheduler the
//! running kernel selects when no sched_ext scheduler is attached
//! (EEVDF on Linux 6.6+) and surfaces on the wire as
//! `"kernel_default"`.
//!
//! [`KtstrTestEntry`](crate::test_support::KtstrTestEntry) carries
//! `payload` and `workloads` fields populated by the `#[ktstr_test]`
//! macro's `payload = ...` and `workloads = [...]` attributes.

use crate::test_support::Scheduler;

// ---------------------------------------------------------------------------
// Payload + PayloadKind
// ---------------------------------------------------------------------------

/// A test payload — either a scheduler or a userspace binary to run
/// inside the guest VM.
///
/// `Payload` unifies the two launch modes under one `#[ktstr_test]`
/// attribute surface: tests declare `scheduler = SOME_SCHED` for
/// scheduler-centric runs, `payload = SOME_BIN` for binary runs, or
/// both with `workloads = [...]` to compose binaries under a
/// scheduler. See [`PayloadKind`] for the two variants.
///
/// Use [`Payload::KERNEL_DEFAULT`] as the default scheduler
/// placeholder when a test doesn't attach a sched_ext scheduler —
/// it wraps the kernel's default scheduler (EEVDF on Linux 6.6+)
/// via [`Scheduler::EEVDF`].
///
/// `Payload` intentionally does NOT implement [`serde::Serialize`] /
/// [`serde::Deserialize`]. It is a compile-time-static definition that
/// references `&'static Scheduler` and `&'static [&'static str]`
/// slices — lifetimes that serialization cannot round-trip. Runtime
/// telemetry (per-payload metrics, exit codes, names) is serialized
/// via [`PayloadMetrics`] and [`Metric`] instead; those own their
/// data.
///
/// `#[non_exhaustive]` reserves the right to add fields without
/// breaking downstream code. Out-of-crate callers cannot construct
/// `Payload` via struct literal — use the const-fn constructors
/// ([`Payload::new`], [`Payload::from_scheduler`], [`Payload::binary`])
/// or the derive macros (`#[derive(Scheduler)]`, `#[derive(Payload)]`),
/// which route through [`Payload::new`] under the hood.
#[derive(Clone, Copy)]
#[non_exhaustive]
pub struct Payload {
    /// Short, stable name used in logs and sidecar records.
    pub name: &'static str,
    /// Launch kind — scheduler reference or binary name.
    pub kind: PayloadKind,
    /// How the framework extracts metrics from the payload's
    /// stdout, with stderr fallback when stdout yields no metrics.
    /// See [`OutputFormat`] for the per-variant contract and
    /// `scenario::payload_run` for the fallback mechanics.
    pub output: OutputFormat,
    /// Default CLI args appended when this payload runs. Test bodies
    /// can extend via `.arg(...)` or replace via `.clear_args()` +
    /// `.arg(...)` on the runtime builder.
    pub default_args: &'static [&'static str],
    /// Author-declared default checks evaluated against extracted
    /// [`PayloadMetrics`]. Payloads that need exit-code gating
    /// should include [`MetricCheck::ExitCodeEq(0)`](MetricCheck::ExitCodeEq)
    /// here; the runtime evaluates `ExitCodeEq` as a pre-pass
    /// before metric checks.
    pub default_checks: &'static [MetricCheck],
    /// Declared metric hints — polarity, unit. Unhinted metrics
    /// extracted from output land as [`Polarity::Unknown`].
    pub metrics: &'static [MetricHint],
    /// Host-side file specs resolved at runtime. Each entry is
    /// resolved through the framework's include-file pipeline — the
    /// same resolver used by CLI `-i` / `--include-files` arguments:
    /// bare names are searched in the host's `PATH`, explicit paths
    /// (absolute, relative, or containing `/`) must exist on the
    /// host, and directories are walked recursively. The entry's
    /// scheduler / payload / workloads / extra_include_files are
    /// aggregated at test time via
    /// [`KtstrTestEntry::all_include_files`](crate::test_support::KtstrTestEntry::all_include_files)
    /// and resolved through the same pipeline the `ktstr shell -i`
    /// path uses. Populate via the
    /// `#[include_files("helper", ...)]` attribute on
    /// `#[derive(Payload)]` or by spelling the array in the struct
    /// literal.
    pub include_files: &'static [&'static str],
    /// When `true`, the payload's spawn path does NOT place the
    /// child into its own process group via
    /// `CommandExt::process_group(0)`. The child inherits the
    /// parent ktstr process's pgid. Default (`false`) keeps the
    /// existing "fresh pgrp → killpg-reaches-descendants" model
    /// — see `src/scenario/payload_run.rs::build_command`.
    ///
    /// Opt-in for tty-dependent binaries: a shell-like tool that
    /// uses the controlling terminal's foreground process group
    /// for signal delivery (job-control signals, SIGHUP on tty
    /// close) reads a fresh pgrp as "no job control", which
    /// breaks interactive shells and `less`-style readers.
    /// Payloads that need tty job-control semantics set this
    /// true so they stay in the parent's pgrp and keep the
    /// inherited controlling-terminal association.
    ///
    /// Trade-off on the `true` branch: multi-process payloads
    /// can no longer be killed via `killpg(child_pid, SIGKILL)`
    /// because the child is not a pgrp leader; the kill path
    /// falls back to single-pid `kill(pid, SIGKILL)` and any
    /// descendants that the payload forks must either react to
    /// SIGHUP / pipe close or run the risk of orphaning. Most
    /// payloads should leave this `false`.
    pub uses_parent_pgrp: bool,
    /// When `Some`, the listed flag names form an allowlist that
    /// `Op::RunPayload` validation checks against at scenario-
    /// execution time (inside `apply_ops`, before the payload
    /// spawn) — any user-supplied `--flag` whose name is not in
    /// the allowlist produces an error surfaced through the step
    /// executor, surfacing typos as loud errors instead of silent
    /// no-ops that only manifest as "feature didn't activate" in
    /// the test output.
    ///
    /// `None` (default) disables validation — the payload accepts
    /// arbitrary flag sets. Use `None` for payloads that wrap
    /// binaries with open-ended flag surfaces (stress-ng, fio,
    /// schbench) where enumerating every accepted flag is either
    /// impossible or high-churn.
    ///
    /// `Some(&[])` is legal but rarely intended: it rejects EVERY
    /// long flag, including ones the wrapped binary legitimately
    /// accepts. Use `None` for "no validation" and a non-empty
    /// slice for "validate against this allowlist" — an empty
    /// slice means "only positional args and short flags are
    /// acceptable", which is almost never what a Payload author
    /// wants.
    ///
    /// Flag names in the slice are bare (no leading `--`) and
    /// match the syntax of `Op::RunPayload`'s per-flag slot.
    pub known_flags: Option<&'static [&'static str]>,

    /// Per-payload validation bounds applied host-side to
    /// extracted metrics (`OutputFormat::LlmExtract` only). When
    /// `None` (the default), only the framework's universal
    /// invariants apply. When `Some(&MetricBounds { … })`, each
    /// declared bound is enforced after extraction in
    /// [`crate::test_support::eval::host_side_llm_extract`] and
    /// any violation surfaces as an [`crate::assert::AssertDetail`].
    /// See [`MetricBounds`] for the per-bound contract and the
    /// rationale for declaring this per-payload rather than
    /// globally.
    ///
    /// `Json` and `ExitCode` payloads ignore this field — their
    /// extraction runs guest-side and goes through `default_checks`
    /// for assertion. The bound-checking pass is host-only because
    /// LlmExtract metrics are extracted host-side post-VM-exit.
    pub metric_bounds: Option<&'static MetricBounds>,
}

impl std::fmt::Debug for Payload {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        // The inner `Scheduler` does not implement `Debug`; render
        // the payload via its public identity fields instead so
        // downstream Debug-requiring contexts (test panics, trace
        // logs) can stamp a payload without a full struct dump.
        f.debug_struct("Payload")
            .field("name", &self.name)
            .field("kind", &self.kind)
            .field("output", &self.output)
            .field("default_args_len", &self.default_args.len())
            .field("default_checks_len", &self.default_checks.len())
            .field("metrics_len", &self.metrics.len())
            .finish()
    }
}

/// How a payload is launched inside the guest.
///
/// Two variants — scheduler and binary — map to the two launch paths
/// in the runtime. "Kernel default" (EEVDF) is represented as
/// `Scheduler(&Scheduler::EEVDF)` rather than a dedicated variant
/// because [`Scheduler`] already carries the no-userspace-binary
/// taxonomy via its own `binary: SchedulerSpec` field.
#[derive(Clone, Copy)]
pub enum PayloadKind {
    /// Wraps an existing [`Scheduler`] definition. The scheduler's
    /// own `binary: SchedulerSpec` carries the Eevdf/Discover/Path/
    /// KernelBuiltin taxonomy — no duplication at the Payload level.
    Scheduler(&'static Scheduler),
    /// Bare userspace binary looked up by name in the guest. Not a
    /// scheduler — runs as a workload under whatever scheduler the
    /// test declares.
    ///
    /// # How the binary reaches the guest
    ///
    /// The stored `&'static str` is the executable name passed to
    /// `std::process::Command::new` inside the guest (see
    /// [`PayloadRun::run`](crate::scenario::payload_run::PayloadRun::run)),
    /// which resolves it against the guest's `PATH`. The framework
    /// resolves binaries through the include-file pipeline — for
    /// `#[ktstr_test]` entries via declarative `include_files` /
    /// `extra_include_files`, or via `-i` on `ktstr shell`.
    ///
    /// Supply a binary through the framework's include-file
    /// pipeline. The pipeline is wired up to the `shell` subcommand
    /// of both `ktstr` and `cargo ktstr` through the repeatable
    /// `-i` / `--include-files` flag. Each `-i` argument accepts:
    ///
    /// - an explicit path (absolute, relative, or containing `/`) —
    ///   must exist on the host;
    /// - a bare name — searched in `PATH` on the host;
    /// - a directory — walked recursively, preserving structure under
    ///   `/include-files/<dirname>/...` in the guest.
    ///
    /// Every regular file ends up at `/include-files/<name>` (or
    /// deeper for directory walks). Dynamically-linked ELFs pull in
    /// their `DT_NEEDED` shared libraries automatically; the guest
    /// init prepends every `/include-files/*` subdirectory containing
    /// an executable to `PATH`, so a binary packaged with `-i` is
    /// runnable by bare name from a test body.
    ///
    /// Example — launch a shell VM with `fio` available by bare name:
    ///
    /// ```sh
    /// cargo ktstr shell -i fio --exec "fio --version"
    /// ```
    ///
    /// The `fio` binary is resolved against the host's `PATH`, copied
    /// to `/include-files/fio` in the guest, exposed on the guest
    /// `PATH`, and spawnable as `fio` from any guest-side process.
    ///
    /// # `#[ktstr_test]` entries
    ///
    /// Declarative `include_files` on `#[derive(Payload)]` and
    /// `extra_include_files` on `#[ktstr_test]` handle binary
    /// packaging automatically — no CLI `-i` and no bespoke harness
    /// needed.
    ///
    /// # Scheduler config files
    ///
    /// Scheduler-kind payloads that set
    /// [`Scheduler`](crate::test_support::Scheduler)'s `config_file`
    /// field get automatic packaging: the config file is placed at
    /// `/include-files/{filename}` without a `-i` flag — the field
    /// is the source the harness reads.
    ///
    /// # Binary-kind packaging
    ///
    /// Payloads built via `#[derive(Payload)]` get automatic binary
    /// packaging: the derive macro prepends the `binary = "..."`
    /// spec to the emitted `include_files` slice, so the spawn
    /// target is packaged into the guest without requiring a
    /// separate `#[include_files("...")]` entry. Auxiliary files
    /// the payload needs (helpers, config files, fixtures) still
    /// go on `#[include_files(...)]` — the derive only injects the
    /// primary binary.
    ///
    /// Payloads constructed manually via struct literal (rather
    /// than the derive) do not get this auto-injection: the
    /// harness does not derive `include_files` from the
    /// `PayloadKind::Binary(name)` at aggregation time. Manual
    /// constructions must list the binary in
    /// [`Payload::include_files`](Payload::include_files)
    /// themselves, or declare it on
    /// [`extra_include_files`](crate::test_support::KtstrTestEntry::extra_include_files)
    /// at the `#[ktstr_test]` level. A binary referenced at spawn
    /// time but neither auto-injected nor listed as an include is
    /// expected to already be present in the guest filesystem
    /// (e.g. a standard `busybox` applet on the base image);
    /// otherwise the omission surfaces as `ENOENT` at `exec` time
    /// inside the guest.
    ///
    /// # Fork / kill semantics
    ///
    /// A binary-kind payload is spawned in its own process group via
    /// `CommandExt::process_group(0)` in
    /// [`build_command`](crate::scenario::payload_run) so the
    /// framework can reach every descendant the binary forks. Direct consequences for test
    /// authors:
    ///
    /// - `std::process::Child::kill()` only targets the direct child
    ///   — a `fork()`ed descendant (stress-ng worker, fio `--numjobs`,
    ///   schbench worker mode, pipeline subshells under `sh -c`)
    ///   survives. Never call `child.kill()` directly on a payload
    ///   `Child`; the handle's `kill()` wrapper fans out SIGKILL to
    ///   the whole process group via `killpg`.
    /// - [`PayloadHandle::kill`](crate::scenario::payload_run::PayloadHandle::kill),
    ///   [`PayloadHandle::wait`](crate::scenario::payload_run::PayloadHandle::wait)
    ///   cleanup, and the panic-safety Drop arm all route through
    ///   `kill_payload_process_group`, which issues `killpg(pgid,
    ///   SIGKILL)` followed by a single-pid SIGKILL fallback so
    ///   descendants and the leader both exit. This is the only kill
    ///   path test authors need.
    /// - Pipe drainers (stdout / stderr reader threads) block on EOF,
    ///   which only arrives after every descendant holding the
    ///   write ends closes them. A bare `child.kill()` leaves the
    ///   descendants holding the pipes open and
    ///   `wait_and_capture` hangs
    ///   forever — motivating the `killpg` requirement.
    Binary(&'static str),
}

impl std::fmt::Debug for PayloadKind {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        // Scheduler does not implement Debug; render variant +
        // identity summary.
        match self {
            PayloadKind::Scheduler(s) => f.debug_tuple("Scheduler").field(&s.name).finish(),
            PayloadKind::Binary(name) => f.debug_tuple("Binary").field(name).finish(),
        }
    }
}

impl Payload {
    /// Placeholder payload that wraps the current kernel-default
    /// scheduler — [`Scheduler::EEVDF`] on Linux 6.6+ (the "no scx
    /// scheduler attached" case). Used as the default value of the
    /// `scheduler` slot on
    /// [`KtstrTestEntry`](crate::test_support::KtstrTestEntry) so
    /// tests without an explicit `scheduler = ...` attribute still
    /// get a valid, non-optional reference. Wire name is
    /// `"kernel_default"` — the Rust const and the serialized form
    /// agree, so the const describes what it selects for (the
    /// kernel's default) rather than naming a specific scheduler that
    /// a future kernel release could replace.
    ///
    /// ## `kernel_default` vs `eevdf` in sidecars
    ///
    /// `KERNEL_DEFAULT.name` is `"kernel_default"` (the intent-level
    /// label), while `KERNEL_DEFAULT.scheduler_name()` returns
    /// `"eevdf"` (the inner [`Scheduler::EEVDF`]'s `.name`). The two
    /// names answer different questions:
    ///
    /// - `"kernel_default"` answers "what did the test author select?"
    ///   — a future kernel release replacing EEVDF keeps this label
    ///   stable, so an in-memory match on author intent survives
    ///   kernel upgrades.
    /// - `"eevdf"` answers "what scheduler actually ran?" — the
    ///   concrete scheduling class in effect.
    ///
    /// **From the scheduler slot, only `scheduler_name()` reaches
    /// the sidecar.** The `SidecarResult.scheduler` field
    /// (src/test_support/sidecar.rs) is populated via
    /// `entry.scheduler.scheduler_name()` — the method is called on
    /// the payload in the scheduler slot, not on payload / workload
    /// slots, which route through separate serialization paths — and
    /// emits `"eevdf"` when the scheduler slot holds `KERNEL_DEFAULT`.
    /// The outer `Payload.name` (`"kernel_default"`) is NOT written
    /// to the sidecar — it stays in-memory only, used by logs,
    /// `#[ktstr_test]`-declaration lookups, and
    /// `Payload::display_name()`. Cross-kernel-version comparisons
    /// via sidecar `scheduler` therefore see `"eevdf"` today and
    /// whatever future scheduling class replaces EEVDF tomorrow;
    /// author-intent filtering on `"kernel_default"` requires
    /// consulting the in-memory `Payload::name` directly, not the
    /// sidecar.
    pub const KERNEL_DEFAULT: Payload = Payload::new(
        "kernel_default",
        PayloadKind::Scheduler(&Scheduler::EEVDF),
        OutputFormat::ExitCode,
        &[],
        &[],
        &[],
        &[],
        false,
        None,
        None,
    );

    /// Short, human-readable name for logging and sidecar output.
    pub const fn display_name(&self) -> &'static str {
        self.name
    }

    /// Return the inner [`Scheduler`] reference when this payload
    /// wraps one. Returns `None` for [`PayloadKind::Binary`].
    pub const fn as_scheduler(&self) -> Option<&'static Scheduler> {
        match self.kind {
            PayloadKind::Scheduler(s) => Some(s),
            PayloadKind::Binary(_) => None,
        }
    }

    /// True when this payload wraps a [`Scheduler`] (scheduler
    /// slot). False for binary payloads.
    pub const fn is_scheduler(&self) -> bool {
        matches!(self.kind, PayloadKind::Scheduler(_))
    }

    /// Primary const constructor for a [`Payload`].
    ///
    /// Takes every field by position so the two derive macros
    /// (`#[derive(Scheduler)]` / `#[derive(Payload)]`) can emit a
    /// single call instead of a struct-literal. `#[non_exhaustive]`
    /// on the struct prevents out-of-crate struct-literal
    /// construction; this constructor — defined in the same crate
    /// as `Payload` — is not subject to that restriction, so the
    /// macro-expanded tokens that reach downstream crates compile
    /// cleanly.
    ///
    /// For one-field constructions prefer [`Payload::from_scheduler`]
    /// or [`Payload::binary`] — both call into this helper and pin
    /// the non-identity fields to the exit-code-only defaults.
    #[allow(clippy::too_many_arguments)]
    pub const fn new(
        name: &'static str,
        kind: PayloadKind,
        output: OutputFormat,
        default_args: &'static [&'static str],
        default_checks: &'static [MetricCheck],
        metrics: &'static [MetricHint],
        include_files: &'static [&'static str],
        uses_parent_pgrp: bool,
        known_flags: Option<&'static [&'static str]>,
        metric_bounds: Option<&'static MetricBounds>,
    ) -> Payload {
        Payload {
            name,
            kind,
            output,
            default_args,
            default_checks,
            metrics,
            include_files,
            uses_parent_pgrp,
            known_flags,
            metric_bounds,
        }
    }

    /// Minimal const wrapper: build a `Payload` that references an
    /// existing `&'static Scheduler`. Used by unit tests and by the
    /// `#[derive(Scheduler)]` wrapper emission to produce the
    /// `{CONST}_PAYLOAD` const alongside the Scheduler const. Copies
    /// the scheduler's `name` into the payload's `name` so the two
    /// surfaces render with matching identity.
    pub const fn from_scheduler(sched: &'static Scheduler) -> Payload {
        Payload::new(
            sched.name,
            PayloadKind::Scheduler(sched),
            OutputFormat::ExitCode,
            &[],
            &[],
            &[],
            &[],
            false,
            None,
            None,
        )
    }

    /// Minimal const constructor for a binary-kind [`Payload`]. Fills
    /// the non-identity fields with the exit-code-only defaults — no
    /// CLI args, no author-declared checks, no metric hints, and
    /// [`OutputFormat::ExitCode`] — so a `#[ktstr_test]` entry or a
    /// direct unit test can declare a runnable binary with one line
    /// instead of spelling out the full struct literal.
    ///
    /// The `binary` string is the executable name passed to
    /// `std::process::Command::new` inside the guest. Supply it to
    /// the guest via `-i` / `--include-files` for CLI invocations or
    /// pre-install it in the initramfs for `#[ktstr_test]` entries —
    /// see [`PayloadKind::Binary`] for the full packaging contract.
    ///
    /// Pair with [`Payload::from_scheduler`] for the scheduler side
    /// of the same constructor surface.
    pub const fn binary(name: &'static str, binary: &'static str) -> Payload {
        Payload::new(
            name,
            PayloadKind::Binary(binary),
            OutputFormat::ExitCode,
            &[],
            &[],
            &[],
            &[],
            false,
            None,
            None,
        )
    }

    // -----------------------------------------------------------------
    // Scheduler-slot forwarding accessors
    //
    // These methods let every site that consumed `entry.scheduler:
    // &Scheduler` read the equivalent field off `entry.scheduler:
    // &Payload` without the caller having to unwrap
    // `as_scheduler()`. For a scheduler-kind payload the accessor
    // forwards to the inner `Scheduler`. For a binary-kind payload
    // the accessor returns a sensible default — usually the empty
    // slice or the no-op value — matching the semantics a binary
    // payload in the scheduler slot should carry (no sysctls, no
    // kargs, no scheduler-specific CLI flags).
    //
    // The binary-kind branch is not "best effort": a binary payload
    // in the scheduler slot is a valid configuration (pure userspace
    // test under the kernel default scheduler), and every accessor
    // below returns exactly what that scenario should see.
    // -----------------------------------------------------------------

    /// The scheduler's display name.
    ///
    /// Returns a compile-time-fixed LABEL, not a runtime reflection
    /// of the scheduling class the live kernel is actually running.
    /// A sidecar written on a kernel whose default is a successor
    /// scheduling class still records whatever string this method
    /// returns — the label comes from the `Payload` / inner
    /// `Scheduler` definition, nothing queries `/proc` or the live
    /// policy. Consumers that need to know the running kernel's
    /// scheduling class must cross-reference the sidecar's
    /// `host.kernel_release` with kernel-version-to-scheduler
    /// knowledge maintained outside the sidecar.
    ///
    /// Branch behavior:
    /// - `PayloadKind::Scheduler(s)` → `s.name` — the label attached
    ///   to that specific scheduler, e.g. `"eevdf"` for
    ///   [`Scheduler::EEVDF`] or `"scx_rusty"` for a scx_*
    ///   scheduler. This is what scheduler-kind payloads (including
    ///   `Payload::KERNEL_DEFAULT`, which wraps [`Scheduler::EEVDF`])
    ///   surface.
    /// - `PayloadKind::Binary(_)` → `"kernel_default"` — a binary
    ///   payload runs under whatever scheduler the test declares
    ///   elsewhere (or the kernel default if it declares none), so
    ///   the binary-kind payload carries no scheduler identity of
    ///   its own. The returned string is a LABEL ("test author did
    ///   not pin a scheduler here"), NOT a statement about which
    ///   scheduling class the VM actually ran under — the live
    ///   kernel may be running EEVDF, a successor class, or an scx
    ///   scheduler the binary's test harness attached separately;
    ///   `scheduler_name()` does not observe any of that. Only a
    ///   scheduler-kind payload explicitly wrapping
    ///   [`Scheduler::EEVDF`] returns the `"eevdf"` label; every
    ///   binary-kind payload returns `"kernel_default"` regardless
    ///   of what class is running.
    pub const fn scheduler_name(&self) -> &'static str {
        match self.kind {
            PayloadKind::Scheduler(s) => s.name,
            PayloadKind::Binary(_) => "kernel_default",
        }
    }

    /// The scheduler's binary spec when scheduler-kind; `None` for
    /// binary-kind payloads. Consumers that dispatch on the
    /// `SchedulerSpec` variant (e.g. `KernelBuiltin { enable, disable }`
    /// hook invocation) use this rather than the `scheduler_name`
    /// shortcut.
    pub const fn scheduler_binary(&self) -> Option<&'static crate::test_support::SchedulerSpec> {
        match self.kind {
            PayloadKind::Scheduler(s) => Some(&s.binary),
            PayloadKind::Binary(_) => None,
        }
    }

    /// True when this payload drives an active scheduling policy
    /// (anything other than the kernel default EEVDF). Forwards to
    /// `SchedulerSpec::has_active_scheduling` for scheduler-kind
    /// payloads; binary-kind payloads always return `false` — a
    /// binary runs under whatever scheduler the test declares, and
    /// does not itself impose one.
    pub const fn has_active_scheduling(&self) -> bool {
        match self.kind {
            PayloadKind::Scheduler(s) => s.binary.has_active_scheduling(),
            PayloadKind::Binary(_) => false,
        }
    }

    /// Scheduler flag declarations. Empty slice for binary-kind
    /// payloads (binaries have no scheduler flags).
    pub const fn flags(&self) -> &'static [&'static crate::scenario::flags::FlagDecl] {
        match self.kind {
            PayloadKind::Scheduler(s) => s.flags,
            PayloadKind::Binary(_) => &[],
        }
    }

    /// Guest sysctls applied before the scheduler starts. Empty slice
    /// for binary-kind payloads.
    pub const fn sysctls(&self) -> &'static [crate::test_support::Sysctl] {
        match self.kind {
            PayloadKind::Scheduler(s) => s.sysctls,
            PayloadKind::Binary(_) => &[],
        }
    }

    /// Extra guest kernel command-line arguments appended when
    /// booting the VM. Empty slice for binary-kind payloads.
    pub const fn kargs(&self) -> &'static [&'static str] {
        match self.kind {
            PayloadKind::Scheduler(s) => s.kargs,
            PayloadKind::Binary(_) => &[],
        }
    }

    /// Scheduler CLI args prepended before per-test `extra_sched_args`.
    /// Empty slice for binary-kind payloads.
    pub const fn sched_args(&self) -> &'static [&'static str] {
        match self.kind {
            PayloadKind::Scheduler(s) => s.sched_args,
            PayloadKind::Binary(_) => &[],
        }
    }

    /// Cgroup parent path. `None` for binary-kind payloads and for
    /// scheduler-kind payloads that did not set one.
    pub const fn cgroup_parent(&self) -> Option<crate::test_support::CgroupPath> {
        match self.kind {
            PayloadKind::Scheduler(s) => s.cgroup_parent,
            PayloadKind::Binary(_) => None,
        }
    }

    /// Host-side path to the scheduler config file. `None` for
    /// binary-kind payloads and for scheduler-kind payloads that
    /// did not set one.
    pub const fn config_file(&self) -> Option<&'static str> {
        match self.kind {
            PayloadKind::Scheduler(s) => s.config_file,
            PayloadKind::Binary(_) => None,
        }
    }

    /// Scheduler-wide assertion overrides. For binary-kind payloads
    /// returns `Assert::NO_OVERRIDES` — the default identity value
    /// merge that leaves per-entry assertions untouched.
    pub const fn assert(&self) -> &'static crate::assert::Assert {
        match self.kind {
            PayloadKind::Scheduler(s) => &s.assert,
            PayloadKind::Binary(_) => &crate::assert::Assert::NO_OVERRIDES,
        }
    }

    /// Names of all scheduler flags the scheduler-kind payload
    /// supports. Empty for binary-kind.
    pub fn supported_flag_names(&self) -> Vec<&'static str> {
        match self.kind {
            PayloadKind::Scheduler(s) => s.supported_flag_names(),
            PayloadKind::Binary(_) => Vec::new(),
        }
    }

    /// Extra CLI args associated with a scheduler flag. Always
    /// `None` for binary-kind.
    pub fn flag_args(&self, name: &str) -> Option<&'static [&'static str]> {
        match self.kind {
            PayloadKind::Scheduler(s) => s.flag_args(name),
            PayloadKind::Binary(_) => None,
        }
    }

    /// Default VM topology for this payload. Scheduler-kind payloads
    /// expose the topology declared on the inner `Scheduler` so tests
    /// that inherit from the scheduler slot stay consistent with the
    /// rest of the scheduler's test surface; binary-kind payloads
    /// return a minimal placeholder
    /// ([`Topology::DEFAULT_FOR_PAYLOAD`](crate::test_support::Topology::DEFAULT_FOR_PAYLOAD))
    /// — a pure binary workload has no scheduler-level topology
    /// opinion, so per-entry `#[ktstr_test(...)]` overrides are what
    /// actually drive the VM shape.
    pub const fn topology(&self) -> crate::test_support::Topology {
        match self.kind {
            PayloadKind::Scheduler(s) => s.topology,
            PayloadKind::Binary(_) => crate::test_support::Topology::DEFAULT_FOR_PAYLOAD,
        }
    }

    /// Gauntlet topology constraints. Scheduler-kind payloads forward
    /// to the inner `Scheduler::constraints`; binary-kind payloads
    /// return [`TopologyConstraints::DEFAULT`].
    pub const fn constraints(&self) -> crate::test_support::TopologyConstraints {
        match self.kind {
            PayloadKind::Scheduler(s) => s.constraints,
            PayloadKind::Binary(_) => crate::test_support::TopologyConstraints::DEFAULT,
        }
    }

    /// Generate scheduler-flag profiles for gauntlet expansion.
    /// Forwards to [`Scheduler::generate_profiles`] for scheduler-kind
    /// payloads; returns a single empty profile for binary-kind (a
    /// binary has no scheduler flags, and the gauntlet expander still
    /// wants one profile to run the test under).
    pub fn generate_profiles(
        &self,
        required: &[&'static str],
        excluded: &[&'static str],
    ) -> Vec<crate::scenario::FlagProfile> {
        match self.kind {
            PayloadKind::Scheduler(s) => s.generate_profiles(required, excluded),
            PayloadKind::Binary(_) => vec![crate::scenario::FlagProfile { flags: Vec::new() }],
        }
    }
}

// ---------------------------------------------------------------------------
// OutputFormat
// ---------------------------------------------------------------------------

/// How the framework extracts metrics from a payload's output.
///
/// `ExitCode` records only the exit code; no text parsing. `Json`
/// finds a JSON document region and walks numeric leaves into
/// [`Metric`] values. `LlmExtract` routes the same text through a
/// local small-model prompt that produces JSON, then runs the same
/// JSON walker — one extraction pipeline, two acquisition paths.
///
/// For `Json` and `LlmExtract`, extraction is stdout-primary with a
/// stderr fallback: the extractor runs first against stdout, and
/// only when that yields an empty metric set AND stderr is
/// non-empty does it retry against stderr. Well-behaved binaries
/// keep stdout canonical; payloads that emit structured output only
/// on stderr (schbench's `show_latencies` → `fprintf(stderr, ...)`)
/// still parse. The streams are never merged. `ExitCode` produces
/// no metrics from either stream — `extract_metrics` is invoked
/// (the control flow is variant-agnostic for simplicity) but the
/// `ExitCode` arm returns `Ok(vec![])` immediately, so the stderr
/// fallback runs and also returns empty. Observable behavior:
/// exit code only, no metrics.
#[derive(Debug, Clone, Copy)]
pub enum OutputFormat {
    /// Pass/fail from exit code alone. Stdout is archived for
    /// debugging but not parsed. `extract_metrics` is still invoked
    /// in the evaluate pipeline (variant-agnostic control flow) but
    /// returns `Ok(vec![])` immediately for this variant; the
    /// stderr fallback runs too and also returns empty. Observable
    /// behavior: no metrics extracted regardless of stream content.
    ExitCode,
    /// Parse the primary stream (stdout, or stderr on fallback) as
    /// JSON: find the JSON region within mixed output, extract
    /// numeric leaves as metrics keyed by dotted path (e.g.
    /// `jobs.0.read.iops`).
    Json,
    /// Feed the primary stream (stdout, or stderr on fallback) to a
    /// local small model; model emits JSON; walk that JSON as in
    /// [`OutputFormat::Json`] but tag each metric with
    /// [`MetricSource::LlmExtract`]. The optional `&'static str` is
    /// a user-provided focus hint appended to the default prompt.
    ///
    /// **Host-only, deferred extraction.** Unlike `Json`, the LLM
    /// extraction does NOT run inside the guest VM — the model
    /// (~2.4 GiB) does not fit in guest RAM and the cache lives on
    /// the host. Inside the guest, `ctx.payload(P).run()` returns
    /// empty metrics; the captured raw stdout/stderr ship across
    /// the SHM ring as a `MSG_TYPE_RAW_PAYLOAD_OUTPUT` entry, and
    /// the host's post-VM-exit pipeline runs `extract_via_llm` on
    /// the captured text. Test bodies for LlmExtract payloads must
    /// return `Ok(assert_result)` without inspecting `metrics.metrics`
    /// directly. Runtime `.check()` on LlmExtract payloads accepts
    /// only `MetricCheck::ExitCodeEq`; metric-level variants
    /// (`Min`/`Max`/`Range`/`Exists`) panic at runtime. Declare
    /// metric checks via `default_checks` on the `Payload` so the
    /// host can apply them.
    ///
    /// Same stdout-primary / stderr-fallback contract as `Json`,
    /// applied host-side rather than guest-side.
    ///
    /// When present, the hint is emitted on its own line as
    /// `Focus: <hint>\n\n` between the default prompt template and
    /// the `STDOUT:` section (see `compose_prompt` in
    /// `test_support::model`). An empty or whitespace-only hint is
    /// dropped — the line is not emitted — so a caller passing
    /// `Some("")` or `Some("   ")` sees the same prompt as `None`.
    LlmExtract(Option<&'static str>),
}

// ---------------------------------------------------------------------------
// Polarity, MetricCheck, Metric, MetricSource
// ---------------------------------------------------------------------------

/// Regression direction for a metric.
///
/// Used by `cargo ktstr test-stats` to classify deltas between runs.
/// Declared explicitly on [`MetricHint`]; unhinted metrics default to
/// [`Polarity::Unknown`] and are recorded without regression
/// classification.
#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum Polarity {
    /// Bigger is better (throughput, IOPS, bogo_ops/sec). Regression
    /// = decrease from baseline.
    HigherBetter,
    /// Smaller is better (latency percentiles, error rates).
    /// Regression = increase from baseline.
    LowerBetter,
    /// A target value that the metric should hover near. Regression
    /// = absolute distance exceeds a threshold, symmetric in either
    /// direction. The inner `f64` MUST be finite (not NaN/inf);
    /// construct via [`Polarity::target`], which enforces this at
    /// runtime in both debug and release.
    TargetValue(f64),
    /// Direction not declared; the metric is recorded but not
    /// classified as regression-relevant.
    Unknown,
}

impl Polarity {
    /// Map the legacy `higher_is_worse: bool` used by
    /// [`MetricDef`](crate::stats::MetricDef) to a `Polarity`.
    ///
    /// The sense is INVERSE: `true` (bigger values are regressions)
    /// maps to [`Polarity::LowerBetter`] (we want the metric to go
    /// down); `false` maps to [`Polarity::HigherBetter`].
    pub const fn from_higher_is_worse(higher_is_worse: bool) -> Polarity {
        if higher_is_worse {
            Polarity::LowerBetter
        } else {
            Polarity::HigherBetter
        }
    }

    /// Construct a [`Polarity::TargetValue`] after asserting that
    /// `target` is finite. Non-finite `target` (`NaN`, `±inf`)
    /// produces incorrect regression verdicts in the comparison
    /// pipeline, so the check runs in release builds too.
    pub fn target(target: f64) -> Polarity {
        assert!(
            target.is_finite(),
            "Polarity::TargetValue target must be finite, got {target}"
        );
        Polarity::TargetValue(target)
    }
}

/// Payload-author metric declaration: polarity + display unit.
///
/// Attached to a [`Payload`] via the `metrics` field. Metrics
/// extracted from output are looked up against this table by name to
/// set their [`Polarity`] and [`Metric::unit`]. Unmatched metrics
/// land with `Polarity::Unknown` and an empty unit string.
#[derive(Debug, Clone, Copy)]
pub struct MetricHint {
    /// Dotted-path metric name (e.g. `jobs.0.read.iops`).
    pub name: &'static str,
    /// Regression direction for this metric.
    pub polarity: Polarity,
    /// Human-readable unit for display (e.g. `iops`, `ns`). Empty
    /// string means "no unit"; matches the sentinel used by
    /// [`MetricDef`](crate::stats::MetricDef).
    pub unit: &'static str,
}

/// Per-payload validation bounds applied host-side to extracted
/// metrics from `OutputFormat::LlmExtract` payloads.
///
/// The framework's universal invariants in
/// [`crate::test_support::eval::validate_llm_extraction`] (unique
/// names, finite values, `MetricSource::LlmExtract`) are workload-
/// agnostic and apply to every LlmExtract payload. Workload-
/// specific bounds — minimum metric count, sign, magnitude — vary
/// per payload (schbench's > 5 latency rows vs a single-throughput
/// benchmark; non-negative microseconds vs delta-emitting payloads
/// that legitimately report negatives) and cannot be globalized.
///
/// `MetricBounds` lets the payload author declare these bounds
/// declaratively on the `Payload` struct via the `metric_bounds`
/// field. The host applies them after extraction in
/// [`crate::test_support::eval::host_side_llm_extract`]; each
/// violation surfaces as its own [`crate::assert::AssertDetail`]
/// with `DetailKind::Other`.
///
/// Every bound is `Option`-wrapped so a payload can declare any
/// subset: a payload that only cares about metric count leaves
/// `value_min` / `value_max` as `None`; a payload that only cares
/// about value magnitude leaves `min_count` as `None`.
///
/// `#[non_exhaustive]` so future bound classes (per-metric ranges,
/// required-name lists) can land without breaking existing
/// `MetricBounds { ... }` literals — call sites must use
/// struct-update or named-field initialization patterns.
///
/// Wire-format note: `MetricBounds` rides through the SHM ring on
/// `RawPayloadOutput::metric_bounds` (owned form). The struct's
/// fields (`Option<usize>` / `Option<f64>`) are serde-trivial; no
/// separate `WireMetricBounds` type is needed because `&'static
/// MetricBounds` carries no string slices that would defeat
/// serialization.
///
/// # Example
///
/// Schbench-style bounds: at least five percentile rows, every
/// reported value non-negative (latencies cannot be negative
/// microseconds), no upper magnitude cap because the value range
/// scales with workload duration. The struct is `#[non_exhaustive]`
/// to reserve room for future bound classes; out-of-crate callers
/// construct values through the [`MetricBounds::new`] const
/// constructor (or [`MetricBounds::NONE`] for the all-disabled
/// baseline).
///
/// ```
/// use ktstr::test_support::MetricBounds;
///
/// const SCHBENCH_BOUNDS: MetricBounds =
///     MetricBounds::new(Some(5), Some(0.0), None);
/// ```
///
/// Attach to a payload via `metric_bounds: Some(&SCHBENCH_BOUNDS)`
/// in the `Payload` struct literal (or via `#[derive(Payload)]`
/// once the macro grows the `metric_bounds` attribute hook). The
/// host's `host_side_llm_extract` reads the value off the
/// `RawPayloadOutput` after extraction and surfaces one
/// `AssertDetail` per bound violation.
#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
#[non_exhaustive]
pub struct MetricBounds {
    /// Minimum total metric count. When `Some(n)`, an extracted
    /// metric set with fewer than `n` entries surfaces an
    /// `AssertDetail` naming the shortfall. `None` disables the
    /// count check (the universal "did we extract anything"
    /// invariant in `validate_llm_extraction` does NOT enforce
    /// a lower bound — payloads that genuinely produce zero
    /// metrics on a clean run are allowed under the universal
    /// rules).
    pub min_count: Option<usize>,

    /// Universal lower bound applied to every metric value. When
    /// `Some(v)`, any metric with `value < v` surfaces an
    /// `AssertDetail` naming the metric and the bound. `None`
    /// disables the lower-bound check — payloads that emit
    /// signed delta metrics (improvement = negative latency
    /// delta) leave this `None`.
    pub value_min: Option<f64>,

    /// Universal upper bound applied to every metric value. When
    /// `Some(v)`, any metric with `value > v` surfaces an
    /// `AssertDetail` naming the metric and the bound. `None`
    /// disables the upper-bound check — payloads that emit
    /// large-but-legitimate metrics (memory bytes in a multi-TB
    /// container, RSS in petabyte ranges) leave this `None`.
    pub value_max: Option<f64>,
}

impl MetricBounds {
    /// Const constructor for the all-disabled defaults — every
    /// bound is `None`, so applying these to a metric set produces
    /// zero violations regardless of input shape. Useful as a
    /// const seed in tests and as the documented "no extra checks"
    /// baseline.
    pub const NONE: MetricBounds = MetricBounds {
        min_count: None,
        value_min: None,
        value_max: None,
    };

    /// Const constructor that takes all three bounds at once. The
    /// only out-of-crate construction path (the struct itself is
    /// `#[non_exhaustive]` so external callers cannot use struct-
    /// literal syntax). Pass `None` to disable any individual
    /// bound; pass `Some(value)` to enable it.
    pub const fn new(
        min_count: Option<usize>,
        value_min: Option<f64>,
        value_max: Option<f64>,
    ) -> MetricBounds {
        MetricBounds {
            min_count,
            value_min,
            value_max,
        }
    }
}

/// Assertion check evaluated against an extracted
/// [`PayloadMetrics`] (or the exit code for
/// [`MetricCheck::ExitCodeEq`](MetricCheck::ExitCodeEq)).
#[derive(Debug, Clone, Copy)]
pub enum MetricCheck {
    /// Fail when the named metric is below `value`.
    Min { metric: &'static str, value: f64 },
    /// Fail when the named metric exceeds `value`.
    Max { metric: &'static str, value: f64 },
    /// Fail when the named metric is outside `[lo, hi]`.
    Range {
        metric: &'static str,
        lo: f64,
        hi: f64,
    },
    /// Fail when the named metric is missing from the extracted set.
    Exists(&'static str),
    /// Fail when the payload's exit code is not equal to `expected`.
    ExitCodeEq(i32),
}

impl MetricCheck {
    /// Fail when the named metric is below `value`. Missing metric
    /// fails loudly per the evaluation pipeline's missing-metric
    /// contract.
    pub const fn min(metric: &'static str, value: f64) -> MetricCheck {
        MetricCheck::Min { metric, value }
    }

    /// Fail when the named metric exceeds `value`. Missing metric
    /// fails loudly.
    pub const fn max(metric: &'static str, value: f64) -> MetricCheck {
        MetricCheck::Max { metric, value }
    }

    /// Fail when the named metric falls outside `[lo, hi]` (inclusive
    /// on both ends). Missing metric fails loudly.
    ///
    /// Panics at construction when `lo > hi` — a reversed-bounds
    /// range describes an empty interval that no finite metric can
    /// satisfy, almost certainly a user error rather than an
    /// intentional always-fails check. Failing at the constructor
    /// surfaces the typo at the call site instead of letting the
    /// evaluator run an unsatisfiable check against every probe
    /// value. NaN bounds also trip this gate because `lo <= hi`
    /// is false for any NaN argument.
    pub const fn range(metric: &'static str, lo: f64, hi: f64) -> MetricCheck {
        assert!(
            lo <= hi,
            "MetricCheck::range: lo must be <= hi (reversed bounds are an empty interval)"
        );
        MetricCheck::Range { metric, lo, hi }
    }

    /// Fail when the named metric is absent from the extracted set.
    /// Presence-only — the metric value can be any finite number,
    /// including zero or negative.
    pub const fn exists(metric: &'static str) -> MetricCheck {
        MetricCheck::Exists(metric)
    }

    /// Fail when the payload's exit code differs from `expected`.
    /// Evaluated before metric-path checks so a mis-exited binary
    /// reports the exit-code mismatch rather than chained
    /// missing-metric failures.
    pub const fn exit_code_eq(expected: i32) -> MetricCheck {
        MetricCheck::ExitCodeEq(expected)
    }
}

/// Provenance of a [`Metric`] — tells downstream tooling whether the
/// value came from a structured-output parse or from LLM-derived
/// extraction.
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum MetricSource {
    /// Extracted directly from JSON output via
    /// [`OutputFormat::Json`].
    Json,
    /// Extracted by feeding stdout through the local model
    /// (`OutputFormat::LlmExtract` path). Values depend on the model's
    /// prompt-driven parse rather than the payload's own structured
    /// output; downstream tooling that compares runs should surface
    /// the source so users can filter out LLM-derived metrics when
    /// reproducibility matters.
    LlmExtract,
}

/// Which of the payload's output streams a [`Metric`] was extracted
/// from.
///
/// Orthogonal to [`MetricSource`]: `source` captures HOW the metric
/// was produced (structured JSON parse vs LLM-driven extraction);
/// `stream` captures WHERE the bytes came from (payload stdout vs
/// stderr). Both axes matter for diagnosing "surprise metrics" in
/// post-run analysis: a metric tagged [`Self::Stderr`] signals a
/// payload whose structured output landed on the diagnostic stream
/// — well-behaved payloads keep stdout canonical per the
/// [`OutputFormat`] doc contract, so a stderr tag is a review hint
/// ("is this payload misconfigured, or did the fallback
/// intentionally pick it up?") even when `source` says the parse
/// itself succeeded.
///
/// Populated by the extraction pipeline in
/// [`crate::scenario::payload_run`]: the stdout-primary branch
/// stamps [`Stdout`](Self::Stdout), the stderr-fallback branch
/// stamps [`Stderr`](Self::Stderr). The streams are never merged;
/// one or the other produces the metric set, and that identity
/// propagates through [`Metric::stream`].
///
/// Status: persisted on the sidecar for future review-tooling
/// (CI dashboards, `cargo ktstr stats`-style filters); not yet
/// consumed by `stats compare` or any automated pipeline. The
/// field is wired end-to-end from the payload-pipeline to the
/// sidecar JSON today so that downstream review tools can start
/// filtering on it without a schema change — but no production
/// consumer reads it yet. A follow-up task wires filtering into
/// `stats compare` output.
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[non_exhaustive]
pub enum MetricStream {
    /// Extracted from the payload's stdout (the happy path for
    /// fio / stress-ng / most benchmark tools).
    Stdout,
    /// Extracted from the payload's stderr via the stderr-fallback
    /// contract (for payloads that emit structured summaries to
    /// stderr — e.g. schbench's `show_latencies` →
    /// `fprintf(stderr, ...)`).
    Stderr,
    /// Synthesized by a host-side probe rather than parsed from a
    /// child process's output streams. Used by payloads whose
    /// "metrics" are derived from external observation — currently
    /// the `ktstr-jemalloc-probe` family, which emits JSON
    /// describing TID-keyed jemalloc counter values read via
    /// `process_vm_readv` on the target process's address space,
    /// not by the target process's own stdout/stderr.
    ///
    /// This variant is orthogonal to [`Stdout`](Self::Stdout) and
    /// [`Stderr`](Self::Stderr): it does NOT mean "probe wrote to
    /// stdout/stderr" (which would be stamped `Stdout` via the
    /// usual extraction pipeline). It means the metric's ultimate
    /// SOURCE is external introspection rather than a channel
    /// emission by the measured process. Downstream review
    /// tooling that filters on `MetricStream` can use `Synthesized`
    /// to identify probe-authored metrics where the "keep stdout
    /// canonical" convention does not apply — a probe's output
    /// channel is an implementation detail of the probe binary,
    /// not a claim about the subject process's channel hygiene.
    ///
    /// # `#[non_exhaustive]` migration note
    ///
    /// `MetricStream` gained this variant after `Stdout` / `Stderr`
    /// were already serialized in on-disk sidecars; the enum is
    /// `#[non_exhaustive]` so downstream pattern matches must
    /// include a wildcard `_ =>` arm, and future probe-authored
    /// stream sources (e.g. a BPF-map reader) can land without
    /// a wire-format migration.
    Synthesized,
}

/// A single extracted metric from a payload's output.
///
/// Populated by the extraction pipeline after the payload exits.
/// Sidecar serialization carries these alongside the pass/fail
/// verdict so test-stats can classify regressions across runs.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Metric {
    /// Dotted-path name matching the JSON leaf or the LLM-emitted key.
    pub name: String,
    /// Numeric value.
    pub value: f64,
    /// Regression direction, copied from the matching
    /// [`MetricHint`] or left as [`Polarity::Unknown`] when no hint
    /// matches.
    pub polarity: Polarity,
    /// Display unit string; empty when no unit was declared.
    pub unit: String,
    /// Where this metric came from — JSON parse or LLM extraction.
    pub source: MetricSource,
    /// Which of the payload's output streams the metric was read
    /// from — stdout on the happy path, stderr under the
    /// stderr-fallback contract. See [`MetricStream`] for the
    /// orthogonality with `source` and the "well-behaved
    /// payloads keep stdout canonical" review hint.
    pub stream: MetricStream,
}

/// All metrics extracted from a single payload run plus the process
/// exit code.
///
/// Each concurrent payload (primary or workload, foreground or
/// background) produces one `PayloadMetrics` value. Sidecar stores
/// these as a `Vec<PayloadMetrics>` so per-payload provenance is
/// preserved across composed tests. Payload identity (name and
/// cgroup placement) is carried by the enclosing sidecar record —
/// not by `PayloadMetrics` itself, which holds only the extracted
/// metrics and exit code.
///
/// The `payload_index` field stamps every per-invocation emission
/// (one `PayloadMetrics` value per `.run()` / `.wait()` / `.kill()` /
/// `.try_wait()` call) with a monotonically increasing per-process
/// counter — assigned at emit time inside the guest VM. Hosts use
/// the index to pair an [`OutputFormat::LlmExtract`] payload's
/// empty-metrics `PayloadMetrics` slot with its companion
/// [`crate::test_support::RawPayloadOutput`] without relying on
/// emission order, which would conflate a `Json` payload that
/// legitimately produced zero metrics (no numeric leaves) with an
/// `LlmExtract` placeholder.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct PayloadMetrics {
    /// Per-invocation index assigned at emit time. Monotonically
    /// increasing within a single guest VM process, starting at 0.
    /// Pairs with [`crate::test_support::RawPayloadOutput::payload_index`]
    /// for `OutputFormat::LlmExtract` payloads — the host matches
    /// raw output to its empty-metrics slot by equal index.
    pub payload_index: usize,
    /// Extracted metrics. Empty when [`OutputFormat::ExitCode`] is
    /// used or when JSON parsing found no numeric leaves.
    pub metrics: Vec<Metric>,
    /// Process exit code (0 = success). Used by
    /// [`MetricCheck::ExitCodeEq`](MetricCheck::ExitCodeEq) in the check
    /// evaluation pre-pass.
    pub exit_code: i32,
}

impl PayloadMetrics {
    /// Look up a metric by exact name. Returns `None` when the
    /// metric is not in the set.
    pub fn get(&self, name: &str) -> Option<f64> {
        self.metrics
            .iter()
            .find(|m| m.name == name)
            .map(|m| m.value)
    }
}

/// Owned-strings counterpart to [`MetricHint`] used to ship the
/// payload's polarity / unit declarations across the guest-to-host
/// SHM ring.
///
/// `MetricHint` carries `&'static str` references that cannot
/// round-trip through serde; the guest builds a `Vec<WireMetricHint>`
/// from `payload.metrics` at LlmExtract emit time and the host
/// consumes it in [`crate::scenario::payload_run::resolve_polarities_owned`]
/// to stamp the host-extracted [`Metric`] set.
///
/// Wire-only — never constructed in test bodies. Public-by-crate so
/// `eval.rs` can decode and consume it.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub(crate) struct WireMetricHint {
    /// Dotted-path metric name, mirrors [`MetricHint::name`].
    pub name: String,
    /// Regression direction, mirrors [`MetricHint::polarity`].
    pub polarity: Polarity,
    /// Display unit, mirrors [`MetricHint::unit`].
    pub unit: String,
}

impl From<&MetricHint> for WireMetricHint {
    fn from(h: &MetricHint) -> Self {
        Self {
            name: h.name.to_string(),
            polarity: h.polarity,
            unit: h.unit.to_string(),
        }
    }
}

/// Raw stdout/stderr captured from a payload that declared
/// [`OutputFormat::LlmExtract`].
///
/// Emitted by the guest alongside an empty
/// [`PayloadMetrics`](PayloadMetrics) so the host can run the
/// LLM-backed extraction post-VM-exit. LLM extraction never runs in
/// the guest: the model (~2.4 GiB) does not fit in guest VM RAM, and
/// the cache lives on the host. Each `RawPayloadOutput` carries a
/// `payload_index` matching the empty-metrics `PayloadMetrics`
/// slot's `payload_index`. The host pairs them by equal index, not
/// emission order — see [`payload_index`](Self::payload_index).
///
/// `hint` is the focus directive declared on the payload's
/// `OutputFormat::LlmExtract(Some(hint))`. Stored as `Option<String>`
/// (rather than `Option<&'static str>`) because the SHM transport is
/// owned-bytes only.
///
/// `metric_hints` carries an owned-strings copy of the payload's
/// `metrics: &[MetricHint]` slice — required to apply
/// [`Polarity`] and unit classification on the host after extraction.
/// The guest's static `&'static [MetricHint]` cannot round-trip
/// across SHM, so [`WireMetricHint`] mirrors the fields with owned
/// `String`s. Empty slice when the payload declared no hints.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub(crate) struct RawPayloadOutput {
    /// Per-invocation index — equals the
    /// [`PayloadMetrics::payload_index`] of the empty-metrics slot
    /// emitted by the same `OutputFormat::LlmExtract` invocation.
    /// The host pairs raw output to its empty-metrics slot by equal
    /// index rather than emission order, which would conflate a
    /// `Json` payload that legitimately produced zero metrics with
    /// an `LlmExtract` placeholder.
    pub payload_index: usize,
    /// Stdout captured from the payload's child process. Non-UTF-8
    /// bytes are replaced with U+FFFD per the framework's
    /// stream-capture contract.
    pub stdout: String,
    /// Stderr captured from the payload's child process. The host's
    /// `extract_metrics` runs stdout-primary with stderr-fallback,
    /// matching the legacy guest-side dispatch contract. Non-UTF-8
    /// bytes are replaced with U+FFFD per the framework's
    /// stream-capture contract.
    pub stderr: String,
    /// Optional focus directive declared on
    /// `OutputFormat::LlmExtract(Some(hint))`. `None` when the
    /// payload declared `LlmExtract(None)` or `LlmExtract`. The
    /// host's `extract_via_llm` threads this into the prompt.
    pub hint: Option<String>,
    /// Owned-strings copy of the payload's `metrics` slice. Consumed
    /// by [`crate::scenario::payload_run::resolve_polarities_owned`]
    /// on the host to apply [`Polarity`] + unit to the host-extracted
    /// metric set. Empty when the payload declared no hints.
    pub metric_hints: Vec<WireMetricHint>,
    /// Owned copy of the payload's [`MetricBounds`] declaration —
    /// `Some(bounds)` when the payload set
    /// [`Payload::metric_bounds`], `None` when it didn't. Consumed
    /// by the host's `host_side_llm_extract` after extraction to
    /// apply per-payload validation (minimum metric count, value
    /// magnitude bounds). `MetricBounds` is `Copy` + serde, so no
    /// owned-strings conversion is needed — the value rides through
    /// SHM by value.
    pub metric_bounds: Option<MetricBounds>,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn payload_kernel_default_const_is_scheduler_kind() {
        assert!(matches!(
            Payload::KERNEL_DEFAULT.kind,
            PayloadKind::Scheduler(_)
        ));
        assert_eq!(Payload::KERNEL_DEFAULT.display_name(), "kernel_default");
        assert!(matches!(
            Payload::KERNEL_DEFAULT.output,
            OutputFormat::ExitCode
        ));
        assert!(Payload::KERNEL_DEFAULT.default_args.is_empty());
        assert!(Payload::KERNEL_DEFAULT.default_checks.is_empty());
        assert!(Payload::KERNEL_DEFAULT.metrics.is_empty());
    }

    #[test]
    fn payload_kernel_default_wraps_scheduler_eevdf() {
        match Payload::KERNEL_DEFAULT.kind {
            PayloadKind::Scheduler(s) => {
                assert_eq!(s.name, Scheduler::EEVDF.name);
            }
            PayloadKind::Binary(_) => panic!("EEVDF should be Scheduler-kind, got Binary"),
        }
    }

    /// [`Payload::binary`] fills a binary-kind [`Payload`] with the
    /// exit-code-only defaults — empty `default_args`,
    /// `default_checks`, `metrics`, and `OutputFormat::ExitCode`.
    /// Evaluated in a `const` block so any future drift that makes
    /// the constructor non-const surfaces here at compile time; the
    /// runtime assertions pin the field-level defaults so a
    /// drive-by change (e.g. flipping `output` to `Json`) reshapes
    /// every `Payload::binary(…)` call site visibly.
    #[test]
    fn payload_binary_const_constructor_shape() {
        const P: Payload = Payload::binary("fio_payload", "fio");
        assert_eq!(P.name, "fio_payload");
        assert!(matches!(P.kind, PayloadKind::Binary("fio")));
        assert!(matches!(P.output, OutputFormat::ExitCode));
        assert!(P.default_args.is_empty());
        assert!(P.default_checks.is_empty());
        assert!(P.metrics.is_empty());
        assert!(!P.is_scheduler());
        assert!(P.as_scheduler().is_none());
    }

    #[test]
    fn check_constructors() {
        assert!(matches!(
            MetricCheck::min("x", 1.0),
            MetricCheck::Min { .. }
        ));
        assert!(matches!(
            MetricCheck::max("x", 1.0),
            MetricCheck::Max { .. }
        ));
        assert!(matches!(
            MetricCheck::range("x", 1.0, 2.0),
            MetricCheck::Range { .. }
        ));
        assert!(matches!(MetricCheck::exists("x"), MetricCheck::Exists("x")));
        assert!(matches!(
            MetricCheck::exit_code_eq(0),
            MetricCheck::ExitCodeEq(0)
        ));
    }

    #[test]
    fn metric_set_get_returns_value() {
        let pm = PayloadMetrics {
            payload_index: 0,
            metrics: vec![Metric {
                name: "iops".to_string(),
                value: 1000.0,
                polarity: Polarity::HigherBetter,
                unit: "iops".to_string(),
                source: MetricSource::Json,
                stream: MetricStream::Stdout,
            }],
            exit_code: 0,
        };
        assert_eq!(pm.get("iops"), Some(1000.0));
        assert_eq!(pm.get("missing"), None);
    }

    #[test]
    fn polarity_target_value_carries_data() {
        let p = Polarity::TargetValue(42.0);
        match p {
            Polarity::TargetValue(v) => assert_eq!(v, 42.0),
            _ => panic!("expected TargetValue variant"),
        }
    }

    #[test]
    fn output_format_variants() {
        let _: OutputFormat = OutputFormat::ExitCode;
        let _: OutputFormat = OutputFormat::Json;
        let _: OutputFormat = OutputFormat::LlmExtract(None);
        let _: OutputFormat = OutputFormat::LlmExtract(Some("focus on iops"));
    }

    #[test]
    fn metric_source_serde_round_trip() {
        let js = serde_json::to_string(&MetricSource::Json).unwrap();
        let de: MetricSource = serde_json::from_str(&js).unwrap();
        assert_eq!(de, MetricSource::Json);
        let js = serde_json::to_string(&MetricSource::LlmExtract).unwrap();
        let de: MetricSource = serde_json::from_str(&js).unwrap();
        assert_eq!(de, MetricSource::LlmExtract);
    }

    /// Wire-format round-trip for every [`MetricStream`] variant.
    /// Pins the serde representation so a sidecar written by one
    /// version of ktstr deserializes under another — a silent wire
    /// change (rename, internal tag, numeric encoding) would
    /// surface here, not as a missing-field error on every
    /// existing sidecar. Mirrors
    /// [`metric_source_serde_round_trip`] so the two metric-tag
    /// enums share one pinning convention.
    #[test]
    fn metric_stream_serde_round_trip() {
        for s in [MetricStream::Stdout, MetricStream::Stderr] {
            let js = serde_json::to_string(&s).expect("serialize");
            let de: MetricStream = serde_json::from_str(&js).expect("deserialize");
            assert_eq!(
                de, s,
                "MetricStream::{s:?} wire format must round-trip \
                 identically; serialized as {js}, deserialized to \
                 {de:?}",
            );
        }
    }

    #[test]
    fn polarity_serde_round_trip() {
        for p in [
            Polarity::HigherBetter,
            Polarity::LowerBetter,
            Polarity::TargetValue(2.78),
            Polarity::Unknown,
        ] {
            let js = serde_json::to_string(&p).unwrap();
            let de: Polarity = serde_json::from_str(&js).unwrap();
            assert_eq!(de, p);
        }
    }

    // PayloadKind::Binary construction + pattern match.
    #[test]
    fn payload_kind_binary_construction_and_match() {
        const FIO: Payload = Payload {
            name: "fio",
            kind: PayloadKind::Binary("fio"),
            output: OutputFormat::Json,
            default_args: &[],
            default_checks: &[],
            metrics: &[],
            include_files: &[],
            uses_parent_pgrp: false,
            known_flags: None,
            metric_bounds: None,
        };
        match FIO.kind {
            PayloadKind::Binary(name) => assert_eq!(name, "fio"),
            PayloadKind::Scheduler(_) => panic!("expected Binary, got Scheduler"),
        }
        assert!(!FIO.is_scheduler());
        assert!(FIO.as_scheduler().is_none());
    }

    // Const bindings verify const-fn actually works in const context.
    const _MIN: MetricCheck = MetricCheck::min("x", 1.0);
    const _MAX: MetricCheck = MetricCheck::max("x", 2.0);
    const _RANGE: MetricCheck = MetricCheck::range("x", 1.0, 2.0);
    const _EXISTS: MetricCheck = MetricCheck::exists("x");
    const _EXIT: MetricCheck = MetricCheck::exit_code_eq(0);
    const _KERNEL_DEFAULT_REF: &Payload = &Payload::KERNEL_DEFAULT;
    const _KERNEL_DEFAULT_IS_SCHED: bool = Payload::KERNEL_DEFAULT.is_scheduler();
    const _KERNEL_DEFAULT_DISPLAY: &str = Payload::KERNEL_DEFAULT.display_name();

    // Proves an arbitrary `Payload` (not just `Payload::KERNEL_DEFAULT`) is
    // const-constructible via struct literal — the #[derive(Payload)]
    // proc-macro emits exactly this shape.
    const _PAYLOAD_CONST_BUILD: Payload = Payload {
        name: "fio",
        kind: PayloadKind::Binary("fio"),
        output: OutputFormat::Json,
        default_args: &["--output-format=json"],
        default_checks: &[MetricCheck::exit_code_eq(0)],
        metrics: &[MetricHint {
            name: "jobs.0.read.iops",
            polarity: Polarity::HigherBetter,
            unit: "iops",
        }],
        include_files: &[],
        uses_parent_pgrp: false,
        known_flags: None,
        metric_bounds: None,
    };

    #[test]
    fn const_bindings_are_usable() {
        assert!(matches!(_MIN, MetricCheck::Min { .. }));
        assert!(matches!(_MAX, MetricCheck::Max { .. }));
        assert!(matches!(_RANGE, MetricCheck::Range { .. }));
        assert!(matches!(_EXISTS, MetricCheck::Exists("x")));
        assert!(matches!(_EXIT, MetricCheck::ExitCodeEq(0)));
        assert_eq!(_KERNEL_DEFAULT_REF.name, "kernel_default");
        const { assert!(_KERNEL_DEFAULT_IS_SCHED) };
        assert_eq!(_KERNEL_DEFAULT_DISPLAY, "kernel_default");
    }

    // from_higher_is_worse helper.
    #[test]
    fn polarity_from_higher_is_worse_flips_sense() {
        assert_eq!(Polarity::from_higher_is_worse(true), Polarity::LowerBetter);
        assert_eq!(
            Polarity::from_higher_is_worse(false),
            Polarity::HigherBetter
        );
    }

    /// Round-trip bool → Polarity → bool for HigherBetter /
    /// LowerBetter yields the identity. Pins the "inverse sense"
    /// contract documented on `MetricDef::higher_is_worse` and
    /// `Polarity::from_higher_is_worse` so a future polarity
    /// refactor can't accidentally flip one direction without the
    /// other and silently break delta-classification downstream.
    ///
    /// The test synthesizes a throw-away `MetricDef` for each
    /// polarity because the production `METRICS` table's entries
    /// live in `stats.rs` and are test-only not importable from
    /// here — constructing the struct literal directly keeps the
    /// round-trip self-contained.
    #[test]
    fn higher_is_worse_polarity_round_trip() {
        use crate::stats::{MetricDef, MetricKind};

        // true (higher-is-worse) → LowerBetter → true.
        let m = MetricDef {
            name: "t",
            polarity: Polarity::from_higher_is_worse(true),
            kind: MetricKind::Counter,
            default_abs: 0.0,
            default_rel: 0.0,
            display_unit: "",
            accessor: |_| None,
        };
        assert_eq!(m.polarity, Polarity::LowerBetter);
        assert!(m.higher_is_worse(), "LowerBetter → higher_is_worse = true");

        // false (higher-is-better) → HigherBetter → false.
        let m = MetricDef {
            name: "f",
            polarity: Polarity::from_higher_is_worse(false),
            kind: MetricKind::Counter,
            default_abs: 0.0,
            default_rel: 0.0,
            display_unit: "",
            accessor: |_| None,
        };
        assert_eq!(m.polarity, Polarity::HigherBetter);
        assert!(
            !m.higher_is_worse(),
            "HigherBetter → higher_is_worse = false"
        );
    }

    /// `MetricDef::higher_is_worse` is total over every `Polarity`
    /// variant — the current implementation lumps `LowerBetter`,
    /// `TargetValue`, and `Unknown` all into `true`. Pinned so a
    /// subtle change (e.g. TargetValue → its own category) doesn't
    /// silently flip regression direction for every test using
    /// target metrics.
    #[test]
    fn higher_is_worse_covers_all_polarity_variants() {
        use crate::stats::{MetricDef, MetricKind};
        fn make(p: Polarity) -> MetricDef {
            MetricDef {
                name: "x",
                polarity: p,
                kind: MetricKind::Counter,
                default_abs: 0.0,
                default_rel: 0.0,
                display_unit: "",
                accessor: |_| None,
            }
        }
        assert!(!make(Polarity::HigherBetter).higher_is_worse());
        assert!(make(Polarity::LowerBetter).higher_is_worse());
        assert!(make(Polarity::TargetValue(42.0)).higher_is_worse());
        assert!(make(Polarity::Unknown).higher_is_worse());
    }

    #[test]
    fn polarity_target_accepts_finite() {
        let p = Polarity::target(0.5);
        assert_eq!(p, Polarity::TargetValue(0.5));
    }

    /// `Polarity::target(NaN)` must panic in release too — non-finite
    /// target values produce silent incorrect regression verdicts in
    /// `compare_rows`, so the gate is a runtime `assert!` (not
    /// `debug_assert!`). Pins that a release build won't silently
    /// let NaN slip through.
    #[test]
    #[should_panic(expected = "Polarity::TargetValue target must be finite")]
    fn polarity_target_rejects_nan_panics() {
        let _ = Polarity::target(f64::NAN);
    }

    /// `Polarity::target(+inf)` panics symmetrically with NaN.
    /// `compare_rows` would otherwise produce inf-vs-finite verdicts
    /// that depend on IEEE-754 infinity arithmetic rather than
    /// meaningful regression direction.
    #[test]
    #[should_panic(expected = "Polarity::TargetValue target must be finite")]
    fn polarity_target_rejects_positive_infinity_panics() {
        let _ = Polarity::target(f64::INFINITY);
    }

    /// `Polarity::target(-inf)` ditto.
    #[test]
    #[should_panic(expected = "Polarity::TargetValue target must be finite")]
    fn polarity_target_rejects_negative_infinity_panics() {
        let _ = Polarity::target(f64::NEG_INFINITY);
    }

    /// `Polarity::TargetValue(NaN)` — which bypasses the
    /// `Polarity::target` constructor's runtime assert when a hand-
    /// built struct literal is used — serializes to
    /// `{"TargetValue":null}` via serde_json because
    /// `serde_json::Number::from_f64` returns `None` on non-finite
    /// values and the default serializer falls back to `null`.
    /// The resulting document does NOT round-trip: deserialization
    /// fails because `null` can't satisfy the inner `f64` slot.
    /// So NaN cannot survive a sidecar write + read pair, even
    /// though the write step silently coerces it. Pins both halves
    /// of this asymmetric guard so a future serde-attribute change
    /// (e.g. `serialize_with = "serialize_nan_as_zero"`) or a
    /// custom deserializer gets surfaced here.
    #[test]
    fn polarity_target_nan_serializes_as_null_and_fails_to_round_trip() {
        let p = Polarity::TargetValue(f64::NAN);
        let s = serde_json::to_string(&p).expect("NaN→null serialization is the current behavior");
        assert_eq!(s, "{\"TargetValue\":null}");
        assert!(
            serde_json::from_str::<Polarity>(&s).is_err(),
            "the null-coerced round-trip must fail to deserialize so a NaN written \
             by an un-guarded producer cannot silently re-enter a run",
        );
    }

    /// Raw `NaN` / `Infinity` tokens are not valid JSON, so a
    /// sidecar file hand-edited (or emitted by a non-serde writer)
    /// to contain them will be rejected at parse time. Pairs with
    /// the null-round-trip test above.
    #[test]
    fn polarity_target_nan_cannot_deserialize_from_non_json_literals() {
        assert!(serde_json::from_str::<Polarity>("{\"TargetValue\":NaN}").is_err());
        assert!(serde_json::from_str::<Polarity>("{\"TargetValue\":Infinity}").is_err());
        assert!(serde_json::from_str::<Polarity>("{\"TargetValue\":-Infinity}").is_err());
    }

    /// `MetricCheck::range(metric, lo, hi)` panics when `lo > hi`.
    /// A reversed-bounds range describes an empty interval that no
    /// finite metric can satisfy — almost always a user error.
    /// Failing loudly at the constructor surfaces the typo at the
    /// call site rather than letting the evaluator run an
    /// unsatisfiable check against every probe value.
    #[test]
    #[should_panic(expected = "lo must be <= hi")]
    fn check_range_reversed_bounds_panics_at_construction() {
        let _ = MetricCheck::range("iops", 100.0, 50.0);
    }

    /// Equal bounds (`lo == hi`) describe a single-point interval —
    /// allowed; the metric must equal that exact value. Pins the
    /// `<=` (not `<`) gate in the constructor.
    #[test]
    fn check_range_equal_bounds_construct() {
        let r = MetricCheck::range("iops", 50.0, 50.0);
        match r {
            MetricCheck::Range { metric, lo, hi } => {
                assert_eq!(metric, "iops");
                assert_eq!(lo, 50.0);
                assert_eq!(hi, 50.0);
            }
            _ => panic!("expected Range variant"),
        }
    }

    /// NaN as either bound trips the `lo <= hi` gate (NaN comparisons
    /// always return false), so the constructor panics. Prevents an
    /// always-fails check from slipping into the evaluator pipeline.
    #[test]
    #[should_panic(expected = "lo must be <= hi")]
    fn check_range_nan_lo_panics() {
        let _ = MetricCheck::range("iops", f64::NAN, 50.0);
    }

    #[test]
    #[should_panic(expected = "lo must be <= hi")]
    fn check_range_nan_hi_panics() {
        let _ = MetricCheck::range("iops", 50.0, f64::NAN);
    }

    // Debug + helper method surface.
    #[test]
    fn payload_debug_renders_identity_fields() {
        let s = format!("{:?}", Payload::KERNEL_DEFAULT);
        assert!(s.contains("Payload"), "debug output: {s}");
        assert!(s.contains("eevdf"), "debug output: {s}");
        assert!(
            s.contains("kind: Scheduler(\"eevdf\")"),
            "debug output: {s}"
        );
    }

    #[test]
    fn payload_kind_debug_renders_variant_and_identity() {
        let binary = PayloadKind::Binary("fio");
        let s = format!("{binary:?}");
        assert!(s.contains("Binary"), "debug output: {s}");
        assert!(s.contains("fio"), "debug output: {s}");

        let sched = Payload::KERNEL_DEFAULT.kind;
        let s = format!("{sched:?}");
        assert!(s.contains("Scheduler"), "debug output: {s}");
        assert!(s.contains("eevdf"), "debug output: {s}");
    }

    #[test]
    fn output_format_derive_debug_clone_copy() {
        let a = OutputFormat::Json;
        let b = a; // Copy
        let _ = format!("{a:?} {b:?}"); // Debug
    }

    #[test]
    fn as_scheduler_extracts_ref_for_scheduler_kind() {
        let s = Payload::KERNEL_DEFAULT
            .as_scheduler()
            .expect("Scheduler kind");
        assert_eq!(s.name, "eevdf");
    }

    #[test]
    fn payload_clone_preserves_identity() {
        let a = Payload::KERNEL_DEFAULT;
        assert_eq!(a.name, Payload::KERNEL_DEFAULT.name);
        assert_eq!(a.is_scheduler(), Payload::KERNEL_DEFAULT.is_scheduler());
        assert_eq!(a.as_scheduler().map(|s| s.name), Some("eevdf"));
    }

    /// Round-trip a [`RawPayloadOutput`] carrying NON-empty stdout
    /// AND non-empty stderr through serde_json::to_vec /
    /// from_slice — the wire format the guest's
    /// `emit_raw_payload_output_to_shm` actually uses on the SHM ring
    /// (see src/scenario/payload_run.rs `emit_raw_payload_output_to_shm`).
    /// Pins the wire-format invariant for the deferred LlmExtract
    /// path: both raw streams MUST survive the guest→host transport,
    /// because the host's `host_side_llm_extract` runs the model
    /// stdout-primary with stderr-fallback, and a regression that
    /// dropped either stream on the wire would silently degrade the
    /// extraction (e.g. a schbench-style payload that emits its
    /// summary on stderr would land empty on the host).
    ///
    /// Round-trip rather than direct value comparison so a future
    /// change to field naming, serde rename, or representation
    /// surfaces here as a failed deserialize rather than a silent
    /// per-field shape drift.
    #[test]
    fn raw_payload_output_serde_round_trip_carries_both_streams() {
        let original = RawPayloadOutput {
            payload_index: 17,
            stdout: "stdout document with metrics: {\"iops\": 100}\n".to_string(),
            stderr: "stderr fallback document: {\"latency\": 42}\n".to_string(),
            hint: Some("focus on iops".to_string()),
            metric_hints: vec![
                WireMetricHint {
                    name: "iops".to_string(),
                    polarity: Polarity::HigherBetter,
                    unit: "iops".to_string(),
                },
                WireMetricHint {
                    name: "latency".to_string(),
                    polarity: Polarity::LowerBetter,
                    unit: "ns".to_string(),
                },
            ],
            metric_bounds: None,
        };
        let bytes = bincode::serde::encode_to_vec(&original, bincode::config::standard())
            .expect("RawPayloadOutput must always bincode-serialize");
        let (restored, _consumed): (RawPayloadOutput, _) =
            bincode::serde::decode_from_slice(&bytes, bincode::config::standard())
                .expect("wire format must round-trip");

        assert_eq!(restored.payload_index, original.payload_index);
        assert_eq!(
            restored.stdout, original.stdout,
            "stdout must round-trip byte-for-byte; lost stdout would degrade \
             the host's stdout-primary extraction silently",
        );
        assert_eq!(
            restored.stderr, original.stderr,
            "stderr must round-trip byte-for-byte; lost stderr would silently \
             defeat the stderr-fallback contract for payloads (e.g. schbench) \
             that emit structured output on stderr only",
        );
        assert_eq!(restored.hint, original.hint);
        assert_eq!(restored.metric_hints.len(), original.metric_hints.len());
        for (got, want) in restored
            .metric_hints
            .iter()
            .zip(original.metric_hints.iter())
        {
            assert_eq!(got.name, want.name);
            assert_eq!(got.polarity, want.polarity);
            assert_eq!(got.unit, want.unit);
        }
    }

    /// Boundary case for the wire-format pin: empty stdout AND empty
    /// stderr round-trip cleanly without panicking the deserializer
    /// or collapsing into a None / null. Pins that the SHM transport
    /// preserves the empty-string distinction the host needs to
    /// distinguish "stream had no bytes" from "stream was missing"
    /// (the host's stderr fallback is gated on `!stderr.is_empty()` —
    /// an absent vs empty stderr would behave differently on the
    /// host if the wire format dropped the field).
    #[test]
    fn raw_payload_output_serde_round_trip_empty_streams_preserved() {
        let original = RawPayloadOutput {
            payload_index: 0,
            stdout: String::new(),
            stderr: String::new(),
            hint: None,
            metric_hints: Vec::new(),
            metric_bounds: None,
        };
        let bytes = serde_json::to_vec(&original).expect("serialize");
        let restored: RawPayloadOutput = serde_json::from_slice(&bytes).expect("deserialize");
        assert_eq!(restored.payload_index, 0);
        assert!(
            restored.stdout.is_empty(),
            "empty stdout must survive round-trip as empty string, not vanish to None or null"
        );
        assert!(
            restored.stderr.is_empty(),
            "empty stderr must survive round-trip as empty string"
        );
        assert!(restored.hint.is_none(), "absent hint must survive as None");
        assert!(
            restored.metric_hints.is_empty(),
            "empty metric_hints must survive as empty Vec"
        );
    }

    /// Asymmetric stream content: stdout populated, stderr empty.
    /// Common shape for well-behaved payloads (fio, stress-ng JSON
    /// mode) where stdout is canonical. Pins that the wire format
    /// does not "collapse" the empty stderr into the populated
    /// stdout (e.g. via a smart serializer that omits empty
    /// fields and then a deserializer that maps absence to a
    /// duplicate of the other field).
    #[test]
    fn raw_payload_output_serde_round_trip_stdout_only() {
        let original = RawPayloadOutput {
            payload_index: 3,
            stdout: r#"{"throughput": 9000}"#.to_string(),
            stderr: String::new(),
            hint: None,
            metric_hints: Vec::new(),
            metric_bounds: None,
        };
        let bytes = serde_json::to_vec(&original).expect("serialize");
        let restored: RawPayloadOutput = serde_json::from_slice(&bytes).expect("deserialize");
        assert_eq!(restored.stdout, original.stdout);
        assert!(restored.stderr.is_empty(), "stderr must remain empty");
    }

    /// Inverse of `raw_payload_output_serde_round_trip_stdout_only`:
    /// stderr populated, stdout empty — the schbench-style shape that
    /// drives the stderr-fallback contract on the host. A regression
    /// that dropped this stream-asymmetric variant on the wire would
    /// silently break every stderr-only payload's metrics.
    #[test]
    fn raw_payload_output_serde_round_trip_stderr_only() {
        let original = RawPayloadOutput {
            payload_index: 9,
            stdout: String::new(),
            stderr: r#"{"latency_p99": 1234}"#.to_string(),
            hint: None,
            metric_hints: Vec::new(),
            metric_bounds: None,
        };
        let bytes = serde_json::to_vec(&original).expect("serialize");
        let restored: RawPayloadOutput = serde_json::from_slice(&bytes).expect("deserialize");
        assert!(restored.stdout.is_empty(), "stdout must remain empty");
        assert_eq!(restored.stderr, original.stderr);
    }
}