ktstr 0.18.0

Test harness for Linux process schedulers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
use super::*;

// -- MetricKind temporal aggregation ----------------------------

/// `Counter` reduces by sum-of-finite-samples. NaN drops, empty
/// returns None.
#[test]
fn aggregate_samples_counter_sums_finite_values() {
    assert_eq!(
        aggregate_samples(&[1.0, 2.0, 3.0], MetricKind::Counter),
        Some(6.0),
    );
    assert_eq!(
        aggregate_samples(&[1.0, f64::NAN, 3.0], MetricKind::Counter),
        Some(4.0),
        "NaN samples drop from the sum",
    );
    assert_eq!(
        aggregate_samples(&[], MetricKind::Counter),
        None,
        "empty input → None",
    );
    assert_eq!(
        aggregate_samples(&[f64::NAN, f64::INFINITY], MetricKind::Counter),
        None,
        "all-non-finite → None",
    );
}

/// `Gauge(Avg)` reduces by arithmetic mean.
#[test]
fn aggregate_samples_gauge_avg_means_finite() {
    let r = aggregate_samples(&[1.0, 2.0, 3.0], MetricKind::Gauge(GaugeAgg::Avg));
    assert_eq!(r, Some(2.0));
}

/// `Gauge(Last)` returns the last finite sample.
#[test]
fn aggregate_samples_gauge_last_returns_last() {
    let r = aggregate_samples(&[1.0, 2.0, 3.0], MetricKind::Gauge(GaugeAgg::Last));
    assert_eq!(r, Some(3.0));
    // NaN at the tail still drops; Last picks the last FINITE.
    let r = aggregate_samples(&[1.0, 2.0, f64::NAN], MetricKind::Gauge(GaugeAgg::Last));
    assert_eq!(r, Some(2.0));
}

/// `Gauge(Max)` and `Peak` both reduce by max.
#[test]
fn aggregate_samples_max_and_peak_pick_largest() {
    let r = aggregate_samples(&[1.0, 5.0, 3.0], MetricKind::Gauge(GaugeAgg::Max));
    assert_eq!(r, Some(5.0));
    let r = aggregate_samples(&[1.0, 5.0, 3.0], MetricKind::Peak);
    assert_eq!(r, Some(5.0));
}

/// `Timestamp` returns the last sample (latest snapshot).
#[test]
fn aggregate_samples_timestamp_returns_last() {
    let r = aggregate_samples(&[100.0, 200.0, 300.0], MetricKind::Timestamp);
    assert_eq!(r, Some(300.0));
}

// -- Weighted Gauge(Avg) ------------------------------------

/// `Gauge(Avg)` with explicit per-sample weights computes the
/// weighted arithmetic mean `sum(v*w) / sum(w)`. (10*5 + 20*15)
/// / 20 = 17.5 — a 50%-larger weight on the higher value
/// pulls the mean above the unweighted midpoint of 15.
#[test]
fn aggregate_samples_weighted_gauge_avg_pulls_toward_heavier_sample() {
    let r = aggregate_samples_weighted(&[(10.0, 5), (20.0, 15)], MetricKind::Gauge(GaugeAgg::Avg));
    assert_eq!(r, Some(17.5));
}

/// `Gauge(Avg)` without weights (unweighted entry point)
/// computes the arithmetic mean (legacy semantic). The
/// previous test's same value vector yields 15.0 here vs
/// 17.5 weighted — the difference is the cross-RUN bias.
#[test]
fn aggregate_samples_gauge_avg_unweighted_is_arithmetic_mean() {
    let r = aggregate_samples(&[10.0, 20.0], MetricKind::Gauge(GaugeAgg::Avg));
    assert_eq!(r, Some(15.0));
}

/// `Gauge(Avg)` with zero total weight falls back to the
/// unweighted arithmetic mean rather than dividing by zero.
/// Mirrors `merge_metric_values` at
/// `crate::assert::merge_matched_phase_buckets` per
/// single-source-of-truth.
#[test]
fn aggregate_samples_weighted_gauge_avg_zero_total_weight_falls_back_to_mean() {
    let r = aggregate_samples_weighted(&[(10.0, 0), (30.0, 0)], MetricKind::Gauge(GaugeAgg::Avg));
    assert_eq!(r, Some(20.0));
}

/// `Counter` ignores weights — sum is weight-independent by
/// construction. Pinned so a future refactor that introduces
/// weight-sensitive Counter semantics breaks here.
#[test]
fn aggregate_samples_weighted_counter_ignores_weights() {
    let r = aggregate_samples_weighted(&[(10.0, 5), (20.0, 15)], MetricKind::Counter);
    assert_eq!(r, Some(30.0));
}

/// `Peak` ignores weights — max is weight-independent.
#[test]
fn aggregate_samples_weighted_peak_ignores_weights() {
    let r = aggregate_samples_weighted(&[(10.0, 5), (20.0, 15)], MetricKind::Peak);
    assert_eq!(r, Some(20.0));
}

/// `Gauge(Max)` ignores weights — max is weight-independent.
#[test]
fn aggregate_samples_weighted_gauge_max_ignores_weights() {
    let r = aggregate_samples_weighted(&[(10.0, 5), (20.0, 15)], MetricKind::Gauge(GaugeAgg::Max));
    assert_eq!(r, Some(20.0));
}

/// Cross-RUN reduction of the derived kinds — the ONLY path that reaches
/// aggregate_finite for Distribution/WorstLowest (WITHIN a run they are
/// `is_derived` and never reduced from a slice). Worst folds by MAX
/// (weight-independent); every other Distribution reduction and every
/// WorstLowest fold by UNWEIGHTED mean — proven with UNEQUAL weights
/// (5 vs 15) so a run_sample_count-weighted mean (which would give 25.0)
/// is distinguishable from the unweighted 20.0.
#[test]
fn aggregate_samples_weighted_distribution_worstlowest_arms() {
    // Worst → MAX, weight-independent: max(10, 20) = 20.
    assert_eq!(
        aggregate_samples_weighted(
            &[(10.0, 5), (20.0, 15)],
            MetricKind::Distribution {
                source: SampleSource::RunDelayNs,
                reduction: SampleReduction::Worst,
            },
        ),
        Some(20.0),
    );
    // Distribution (non-Worst) → UNWEIGHTED mean: (10 + 30)/2 = 20.0,
    // NOT the run_sample_count-weighted (10*5 + 30*15)/20 = 25.0.
    assert_eq!(
        aggregate_samples_weighted(
            &[(10.0, 5), (30.0, 15)],
            MetricKind::Distribution {
                source: SampleSource::WakeLatencyNs,
                reduction: SampleReduction::P99,
            },
        ),
        Some(20.0),
    );
    // WorstLowest → UNWEIGHTED mean: same (10 + 30)/2 = 20.0, not 25.0.
    assert_eq!(
        aggregate_samples_weighted(
            &[(10.0, 5), (30.0, 15)],
            MetricKind::WorstLowest {
                numerator: WorstLowestNumerator::Iterations,
                denominator: WorstLowestDenominator::CpuTimeNs,
            },
        ),
        Some(20.0),
    );
}

/// Cross-RUN weight-0 contributor contract: a monitor-off / no-periodic-
/// capture run (`run_sample_count == 0`) that DID emit a Distribution /
/// WorstLowest key is COUNTED in the unweighted mean, never zero-weighted
/// out. `aggregate_finite`'s Distribution/WorstLowest arm is
/// `sum / finite.len()` (it structurally ignores the weight closure — see
/// the monitor-off rationale at the arm: a weighted fold "would silently
/// zero-weight a monitor-off run"), so a `(value, 0)` pair that survives
/// the `is_finite` filter is included. Guards against a future weight-aware
/// refactor silently dropping a monitor-off run's distributional value.
#[test]
fn aggregate_samples_weighted_distribution_worstlowest_counts_zero_weight_contributor() {
    // (10.0, 0) = a monitor-off run that emitted the key; (30.0, 15) a
    // normal run. The weight-0 run IS counted: (10 + 30)/2 = 20.0 — a
    // weight-aware fold that dropped the 0-weight run would give 30.0.
    assert_eq!(
        aggregate_samples_weighted(
            &[(10.0, 0), (30.0, 15)],
            MetricKind::Distribution {
                source: SampleSource::WakeLatencyNs,
                reduction: SampleReduction::P99,
            },
        ),
        Some(20.0),
    );
    assert_eq!(
        aggregate_samples_weighted(
            &[(10.0, 0), (30.0, 15)],
            MetricKind::WorstLowest {
                numerator: WorstLowestNumerator::Iterations,
                denominator: WorstLowestDenominator::CpuTimeNs,
            },
        ),
        Some(20.0),
    );
}

/// `Gauge(Last)` and `Timestamp` ignore weights — last-finite
/// is weight-independent.
#[test]
fn aggregate_samples_weighted_gauge_last_and_timestamp_ignore_weights() {
    let last =
        aggregate_samples_weighted(&[(10.0, 5), (20.0, 15)], MetricKind::Gauge(GaugeAgg::Last));
    assert_eq!(last, Some(20.0));
    let ts = aggregate_samples_weighted(&[(10.0, 5), (20.0, 15)], MetricKind::Timestamp);
    assert_eq!(ts, Some(20.0));
}

/// NaN-valued pairs drop entirely (value AND weight) — the
/// (f64, usize) pair type keeps the weight bound to its
/// value so a NaN filter can't misalign weights to other
/// samples. (10*5 + 30*20) / (5+20) = 650 / 25 = 26.0.
#[test]
fn aggregate_samples_weighted_gauge_avg_drops_nan_pairs_in_lockstep() {
    let r = aggregate_samples_weighted(
        &[(10.0, 5), (f64::NAN, 10), (30.0, 20)],
        MetricKind::Gauge(GaugeAgg::Avg),
    );
    assert_eq!(r, Some(26.0));
}

// -- Per-phase reductions --------------------------------------

/// `phase_counter_delta` returns `last - first` of finite
/// samples — the right semantic for a per-phase reduction
/// over a cumulative-since-boot counter. Distinct from
/// `aggregate_samples(..., Counter)` which sums the samples
/// (correct for cross-run aggregation, wrong for per-phase
/// deltas).
#[test]
fn phase_counter_delta_returns_last_minus_first() {
    // Cumulative-since-boot counter samples in [100, 150, 175,
    // 200] yield delta 100 across the phase window.
    assert_eq!(
        phase_counter_delta(&[100.0, 150.0, 175.0, 200.0]),
        Some(100.0),
    );
    // NaN samples drop from the finite slice; first/last
    // are computed over the filtered sequence.
    assert_eq!(
        phase_counter_delta(&[f64::NAN, 150.0, 175.0, f64::NAN]),
        Some(25.0),
    );
}

/// `phase_counter_delta` returns `Some(0.0)` for a phase with
/// exactly one finite sample (self-delta — the metric was
/// observed but no per-phase change can be computed), and
/// `None` only when zero samples are finite. The distinction
/// matters for the bucket renderer: `Some(0.0)` paints "phase
/// has data, delta is 0"; `None` paints "no data".
#[test]
fn phase_counter_delta_one_finite_sample_is_self_delta() {
    assert_eq!(phase_counter_delta(&[42.0]), Some(0.0));
    assert_eq!(phase_counter_delta(&[f64::NAN, 42.0, f64::NAN]), Some(0.0));
    assert_eq!(phase_counter_delta(&[]), None);
    assert_eq!(phase_counter_delta(&[f64::NAN, f64::INFINITY]), None);
}

/// A counter that regresses across a phase window
/// (scheduler-restart counter reset, kernel module reload,
/// etc.) clamps to 0 rather than emitting a negative delta a
/// downstream "negative count is impossible" assertion would
/// either misread or trip on. Mirrors the existing
/// `monitor::counter_delta` clamp pattern.
#[test]
fn phase_counter_delta_clamps_negative_to_zero_on_counter_reset() {
    assert_eq!(
        phase_counter_delta(&[500.0, 600.0, 100.0]),
        Some(0.0),
        "last < first clamps to 0 (counter reset detected)",
    );
}

/// `aggregate_samples_for_phase` dispatches Counter through
/// `phase_counter_delta` (per-phase delta) and every other
/// kind through `aggregate_samples` (flat-run semantic). Pins
/// the invariant: a Counter-kind metric must NOT collapse
/// to a sum across the phase window — that's the bug the
/// per-phase aggregator was introduced to fix.
#[test]
fn aggregate_samples_for_phase_returns_none_for_derived_kinds() {
    // Derived kinds (Rate / Distribution / WorstLowest) are `is_derived`,
    // merge as Recompute, and have NO per-phase value: returning None keeps
    // them off the single-slice reducers within a run (their value is
    // produced post-merge by derive_rate_metrics /
    // populate_run_distribution_metrics). Pins the within-run skip-routing.
    let mk = |kind: MetricKind| MetricDef {
        name: "x",
        accessor: |_| None,
        display_unit: "",
        polarity: crate::test_support::Polarity::LowerBetter,
        default_abs: 0.0,
        default_rel: 0.0,
        kind,
    };
    for kind in [
        MetricKind::Rate {
            numerator: "a",
            denominator: "b",
        },
        MetricKind::Distribution {
            source: SampleSource::WakeLatencyNs,
            reduction: SampleReduction::P99,
        },
        MetricKind::WorstLowest {
            numerator: WorstLowestNumerator::Iterations,
            denominator: WorstLowestDenominator::NumWorkers,
        },
    ] {
        assert!(kind.is_derived(), "{kind:?} must be is_derived");
        assert_eq!(kind.merge_kind(), MergeKind::Recompute);
        assert_eq!(
            aggregate_samples_for_phase(&mk(kind), &[1.0, 2.0, 3.0]),
            None,
            "derived kind {kind:?} must have no per-phase reduction",
        );
    }
}

#[test]
fn aggregate_samples_for_phase_dispatches_on_kind() {
    let counter = MetricDef {
        name: "total_test_counter",
        accessor: |_| None,
        display_unit: "",
        polarity: crate::test_support::Polarity::HigherBetter,
        default_abs: 0.0,
        default_rel: 0.0,
        kind: MetricKind::Counter,
    };
    // Counter routes through `phase_counter_delta`, NOT
    // `aggregate_samples`'s sum.
    assert_eq!(
        aggregate_samples_for_phase(&counter, &[100.0, 150.0, 175.0]),
        Some(75.0),
        "Counter kind must reduce by last - first, not by sum",
    );
    assert_ne!(
        aggregate_samples_for_phase(&counter, &[100.0, 150.0, 175.0]),
        Some(425.0),
        "Counter kind MUST NOT collapse to flat-run sum across a phase",
    );

    let peak = MetricDef {
        name: "max_test_peak",
        accessor: |_| None,
        display_unit: "",
        polarity: crate::test_support::Polarity::LowerBetter,
        default_abs: 0.0,
        default_rel: 0.0,
        kind: MetricKind::Peak,
    };
    // Peak routes through `aggregate_samples` -> max.
    assert_eq!(
        aggregate_samples_for_phase(&peak, &[1.0, 5.0, 3.0]),
        Some(5.0),
        "Peak kind must reduce by max",
    );

    let gauge_avg = MetricDef {
        name: "worst_test_gauge",
        accessor: |_| None,
        display_unit: "",
        polarity: crate::test_support::Polarity::LowerBetter,
        default_abs: 0.0,
        default_rel: 0.0,
        kind: MetricKind::Gauge(GaugeAgg::Avg),
    };
    assert_eq!(
        aggregate_samples_for_phase(&gauge_avg, &[2.0, 4.0, 6.0]),
        Some(4.0),
        "Gauge(Avg) kind must reduce by arithmetic mean",
    );

    let delta_sum = MetricDef {
        name: "total_test_delta",
        accessor: |_| None,
        display_unit: "",
        polarity: crate::test_support::Polarity::LowerBetter,
        default_abs: 0.0,
        default_rel: 0.0,
        kind: MetricKind::DeltaSum,
    };
    // DeltaSum samples are ALREADY per-read deltas, so the per-phase
    // reduction SUMS them — NOT a last-minus-first that would
    // difference two deltas. [10, 20, 5] -> 35, not 5 - 10 (which a
    // Counter would clamp to 0).
    assert_eq!(
        aggregate_samples_for_phase(&delta_sum, &[10.0, 20.0, 5.0]),
        Some(35.0),
        "DeltaSum kind must reduce by sum of per-read deltas",
    );
    assert_eq!(
        aggregate_samples(&[10.0, 20.0, 5.0], MetricKind::DeltaSum),
        Some(35.0),
        "DeltaSum flat-run reduction is also a sum",
    );
}

/// A [`MetricKind::Rate`] is derived as numerator/denominator at the
/// per-phase level and RE-POOLED (Σnum/Σdenom) across a merge — never
/// averaged as a ready-made ratio. Pins the core of the Rate kind.
#[test]
fn rate_derives_per_phase_and_repools_across_merge() {
    use std::collections::BTreeMap;
    // Per-phase: rate = num / denom.
    let mut phase = BTreeMap::new();
    phase.insert("iters".to_string(), 1000.0);
    phase.insert("secs".to_string(), 4.0);
    derive_rate_metrics_from(&mut phase, std::iter::once(("rate", "iters", "secs")));
    assert_eq!(
        phase.get("rate").copied(),
        Some(250.0),
        "per-phase rate = num/denom",
    );

    // Cross-phase merge re-pools: the components fold by their own
    // kind (Counter -> sum) FIRST, then the rate is re-derived from
    // the pooled components. Phase A = 1000 iters / 1 s = 1000/s;
    // phase B = 10 iters / 9 s ≈ 1.11/s. The correct merged rate is
    // (1000+10)/(1+9) = 101.0, NOT the mean of the two ratios
    // (1000 + 1.11)/2 ≈ 500.6.
    let mut merged = BTreeMap::new();
    merged.insert("iters".to_string(), 1000.0 + 10.0); // Counter sum
    merged.insert("secs".to_string(), 1.0 + 9.0); // Counter sum
    derive_rate_metrics_from(&mut merged, std::iter::once(("rate", "iters", "secs")));
    assert_eq!(
        merged.get("rate").copied(),
        Some(101.0),
        "merged rate must re-pool Σnum/Σdenom",
    );
    let mean_of_ratios = (1000.0 + (10.0 / 9.0)) / 2.0;
    assert!(
        (merged.get("rate").copied().unwrap() - mean_of_ratios).abs() > 100.0,
        "re-pool must differ from mean-of-ratios (got {:?}, mean-of-ratios {mean_of_ratios})",
        merged.get("rate"),
    );
}

/// `derive_rate_metrics` leaves the rate key ABSENT (distinct from a
/// real 0.0) when a component is missing, the denominator is zero, or
/// a component is non-finite.
#[test]
fn rate_absent_on_missing_component_zero_or_nonfinite() {
    use std::collections::BTreeMap;
    // Denominator missing.
    let mut m = BTreeMap::new();
    m.insert("iters".to_string(), 5.0);
    derive_rate_metrics_from(&mut m, std::iter::once(("rate", "iters", "secs")));
    assert!(!m.contains_key("rate"), "absent denom -> no rate key");

    // Denominator zero (must NOT insert inf).
    m.insert("secs".to_string(), 0.0);
    derive_rate_metrics_from(&mut m, std::iter::once(("rate", "iters", "secs")));
    assert!(!m.contains_key("rate"), "zero denom -> no rate key");

    // Non-finite numerator.
    let mut n = BTreeMap::new();
    n.insert("iters".to_string(), f64::NAN);
    n.insert("secs".to_string(), 2.0);
    derive_rate_metrics_from(&mut n, std::iter::once(("rate", "iters", "secs")));
    assert!(!n.contains_key("rate"), "NaN numerator -> no rate key");

    // Finite inputs whose QUOTIENT overflows to inf: absent, not inf.
    let mut o = BTreeMap::new();
    o.insert("iters".to_string(), f64::MAX);
    o.insert("secs".to_string(), f64::MIN_POSITIVE);
    derive_rate_metrics_from(&mut o, std::iter::once(("rate", "iters", "secs")));
    assert!(!o.contains_key("rate"), "inf quotient -> no rate key");
}

/// A Rate has no samples of its own, so the per-phase reducer returns
/// None (the build post-pass derives it from components instead).
#[test]
fn rate_kind_returns_none_from_per_phase_reducer() {
    let rate = MetricDef {
        name: "test_rate",
        accessor: |_| None,
        display_unit: "",
        polarity: crate::test_support::Polarity::HigherBetter,
        default_abs: 0.0,
        default_rel: 0.0,
        kind: MetricKind::Rate {
            numerator: "n",
            denominator: "d",
        },
    };
    assert_eq!(
        aggregate_samples_for_phase(&rate, &[1.0, 2.0, 3.0]),
        None,
        "Rate reduces to None per-phase; derive_rate_metrics owns it",
    );
}

/// Routing a Rate through the single-slice reducer is a bug — it
/// cannot express Σnum/Σdenom — so `aggregate_finite` panics rather
/// than silently producing a meaningless one-slice value.
#[test]
#[should_panic(expected = "must be derived via derive_rate_metrics")]
fn rate_kind_panics_in_single_slice_reducer() {
    let _ = aggregate_samples(
        &[1.0, 2.0],
        MetricKind::Rate {
            numerator: "n",
            denominator: "d",
        },
    );
}

/// All-empty / all-NaN inputs to either entry point return
/// `None`. The phase renderer treats absent values as "no
/// finite samples for this metric in this phase" — distinct
/// from `Some(0.0)` which is a real reduced zero from finite
/// samples — so the `None` shape must round-trip.
#[test]
fn aggregate_samples_for_phase_returns_none_on_empty_or_all_nan() {
    let counter = MetricDef {
        name: "total_x",
        accessor: |_| None,
        display_unit: "",
        polarity: crate::test_support::Polarity::HigherBetter,
        default_abs: 0.0,
        default_rel: 0.0,
        kind: MetricKind::Counter,
    };
    assert_eq!(aggregate_samples_for_phase(&counter, &[]), None);
    assert_eq!(
        aggregate_samples_for_phase(&counter, &[f64::NAN, f64::NAN]),
        None,
    );
    let peak = MetricDef {
        name: "max_x",
        accessor: |_| None,
        display_unit: "",
        polarity: crate::test_support::Polarity::LowerBetter,
        default_abs: 0.0,
        default_rel: 0.0,
        kind: MetricKind::Peak,
    };
    assert_eq!(aggregate_samples_for_phase(&peak, &[]), None);
    assert_eq!(
        aggregate_samples_for_phase(&peak, &[f64::NAN, f64::INFINITY]),
        None,
    );
}

/// Every entry in the `METRICS` registry must have a kind set.
/// Pinned via the registry walk so a future entry that forgot
/// to specify `kind` fails to compile (struct-literal
/// non_exhaustive forces it), and a registry entry whose kind
/// is `Counter` matches one of the well-known total/stuck_count
/// names — drift either direction trips here.
#[test]
fn every_metric_has_kind_consistent_with_naming() {
    for m in METRICS {
        // Counter and DeltaSum metrics are both cumulative totals
        // (Counter = since-boot, DeltaSum = sum of per-read deltas),
        // so both must be named with `total_` / `_count` /
        // `total_iterations` / `stuck_count` per the established
        // convention.
        if matches!(m.kind, MetricKind::Counter | MetricKind::DeltaSum) {
            assert!(
                m.name.starts_with("total_") || m.name.ends_with("_count"),
                "Counter/DeltaSum-kind metric must follow total_*/*_count naming, got {:?}",
                m.name,
            );
        }
        // Peak metrics must be named with `max_` or be the documented
        // worst-case high-water entry `worst_gap_ms`. (worst_run_delay_us
        // is now MetricKind::Distribution{RunDelayNs, Worst}, not Peak.)
        if matches!(m.kind, MetricKind::Peak) {
            assert!(
                m.name.starts_with("max_") || m.name == "worst_gap_ms",
                "Peak-kind metric must use max_* naming OR be a documented worst-* peak, got {:?}",
                m.name,
            );
        }
        // Distribution metrics are re-pooled run-level by
        // `populate_run_distribution_metrics`; for cgroups with no carried
        // samples (backdrop / stripped) it folds the per-cgroup
        // `distribution_cgroup_reduction` worst-wins via `f64::max` (in the
        // `populate_run_distribution_metrics_from` loop), which is the
        // correct worst-wins ONLY for LowerBetter metrics. Enforce that
        // coupling so a future HigherBetter Distribution cannot silently
        // invert the degraded-path regression signal (max would pick the
        // BEST cgroup as the "worst"). A HigherBetter Distribution must
        // first make that worst-wins fold polarity-aware.
        if matches!(m.kind, MetricKind::Distribution { .. }) {
            assert_eq!(
                m.polarity,
                crate::test_support::Polarity::LowerBetter,
                "Distribution-kind metric {:?} must be LowerBetter \
                     (the carrier-less fold maxes); got {:?}",
                m.name,
                m.polarity,
            );
        }
        // WorstLowest metrics are re-pooled by
        // `populate_run_distribution_metrics`'s lowest-wins fold
        // (`worst.is_none_or(|w| v < w)`), which treats the LOWEST per-cgroup
        // value as the worst — correct ONLY for HigherBetter metrics.
        // Enforce the mirror of the Distribution gate so a future
        // LowerBetter WorstLowest cannot silently invert the regression
        // signal (select the least-bad cgroup, mask the starved one); such a
        // metric must first make the lowest-wins fold polarity-aware.
        if matches!(m.kind, MetricKind::WorstLowest { .. }) {
            assert_eq!(
                m.polarity,
                crate::test_support::Polarity::HigherBetter,
                "WorstLowest-kind metric {:?} must be HigherBetter \
                     (the lowest-wins fold treats lowest as worst); got {:?}",
                m.name,
                m.polarity,
            );
        }
        // Rate metrics are derived ratios; name them `*_rate` or
        // `*_per_*` so the registry reads as a rate at a glance.
        if let MetricKind::Rate {
            numerator,
            denominator,
        } = m.kind
        {
            assert!(
                m.name.ends_with("_rate") || m.name.contains("_per_"),
                "Rate-kind metric must use *_rate or *_per_* naming, got {:?}",
                m.name,
            );
            // Components must be registered AND not themselves Rate:
            // derive_rate_metrics is a pure function of non-derived
            // components, which is what keeps the re-pool associative.
            // A rate-of-a-rate would make the post-pass order-dependent
            // on METRICS declaration order (the inner rate's key is
            // skipped in the merge loop, so a stale value could be read).
            for comp in [numerator, denominator] {
                let cd = metric_def(comp).unwrap_or_else(|| {
                    panic!(
                        "Rate metric {:?} component {comp:?} is not registered",
                        m.name
                    )
                });
                assert!(
                    !matches!(cd.kind, MetricKind::Rate { .. }),
                    "Rate metric {:?} component {comp:?} must not itself be Rate \
                         (a rate-of-a-rate breaks the associative re-derive)",
                    m.name,
                );
            }
        }

        // REVERSE gate: a metric NAMED like a per-second rate MUST be a
        // Rate, so a future per-second metric cannot silently ship as a
        // Gauge that averages ready-made ratios (the (r₁+r₂)/2 bug). Scoped
        // to per-SECOND tokens (`_rate` / `_per_sec` / `_per_cpu_sec`) — NOT
        // bare `_per_` — so a count-denominator metric like
        // `worst_iterations_per_worker` (a `WorstLowest` min-selection) is
        // not falsely flagged. `worst_iterations_per_cpu_sec` is the
        // documented exception: it is a `MetricKind::WorstLowest` (the
        // lowest cgroup's iterations / CPU-second, the per-cgroup
        // starvation signal selected lowest-wins), NOT a Σnum/Σdenom pooled
        // rate — so it is correctly NOT a Rate and keeps its accurate
        // `_per_cpu_sec` name. (The pooled cohort rate IS a Rate, under the
        // distinct name `iterations_per_cpu_sec`.)
        let looks_like_rate = m.name.ends_with("_rate")
            || m.name.contains("_per_sec")
            || m.name.contains("_per_cpu_sec");
        if looks_like_rate && m.name != "worst_iterations_per_cpu_sec" {
            assert!(
                matches!(m.kind, MetricKind::Rate { .. }),
                "metric {:?} is named like a per-second rate but is not \
                     MetricKind::Rate (register it as a Rate, or allowlist it \
                     here if it is intentionally a non-re-pooled gauge)",
                m.name,
            );
        }
    }
}

/// Replaces the legacy `col_mean_std_basic` polars-side check
/// with a direct test on the hand-rolled [`mean`] / [`std_dev`]
/// helpers that the new aggregation path uses end-to-end.
/// `1..=5` is symmetric around 3, so the mean lands exactly and
/// the Bessel-corrected std exceeds 1.0 (the population std of
/// `1..=5` is √2 ≈ 1.414; the sample std is √2.5 ≈ 1.581).
#[test]
fn mean_std_basic() {
    let xs = [1.0_f64, 2.0, 3.0, 4.0, 5.0];
    let m = mean(xs.iter().copied());
    let s = std_dev(xs.iter().copied());
    assert!((m - 3.0).abs() < 0.01);
    assert!(s > 1.0);
}

/// Mirrors the legacy `col_mean_std_missing_column` defense:
/// when no finite values are present, both helpers return 0.0
/// rather than NaN / panic — the same "missing column → (0.0,
/// 0.0)" contract that polars's `col_mean_std` carried, expressed
/// over the iterator surface.
#[test]
fn mean_std_empty_returns_zero() {
    let empty: [f64; 0] = [];
    assert_eq!(mean(empty.iter().copied()), 0.0);
    assert_eq!(std_dev(empty.iter().copied()), 0.0);
    // Single finite value: mean is that value, std is 0.0 (Bessel
    // correction requires count >= 2; matches polars `.std(1)`).
    let single = [7.5_f64];
    assert!((mean(single.iter().copied()) - 7.5).abs() < f64::EPSILON);
    assert_eq!(std_dev(single.iter().copied()), 0.0);
}

/// Non-finite values (NaN, ±inf) must be ignored — a sentinel
/// metric value can't poison the cohort mean. Pins the
/// `.filter(|x| x.is_finite())` arm in both helpers.
#[test]
fn mean_std_skips_non_finite() {
    let xs = [1.0_f64, f64::NAN, 3.0, f64::INFINITY, 5.0];
    // Only 1, 3, 5 contribute. Mean = 3.0; sample std = 2.0.
    assert!((mean(xs.iter().copied()) - 3.0).abs() < 1e-9);
    assert!((std_dev(xs.iter().copied()) - 2.0).abs() < 1e-9);
}

/// Mean and std_dev handle negative values correctly.
/// Negative values are valid for metrics like migration_ratio.
#[test]
fn mean_std_handles_negative_values() {
    let xs = [-2.0_f64, -1.0, 0.0, 1.0, 2.0];
    let m = mean(xs.iter().copied());
    let s = std_dev(xs.iter().copied());
    assert!(
        (m - 0.0).abs() < 1e-9,
        "mean of symmetric values should be 0, got {m}"
    );
    // Sample std for [-2, -1, 0, 1, 2]: sqrt( (4+1+0+1+4) / 4 ) = sqrt(10/4) = sqrt(2.5) ≈ 1.581
    assert!((s - 1.58113883).abs() < 1e-6, "std dev mismatch, got {s}");
}

/// Mean and std_dev handle large values without overflow.
/// Uses values near f64::MAX / 10 to avoid actual overflow in sum.
#[test]
fn mean_std_handles_large_values() {
    let large = 1e150_f64;
    let xs = [large, large * 2.0, large * 3.0];
    let m = mean(xs.iter().copied());
    let s = std_dev(xs.iter().copied());
    assert!(
        (m - large * 2.0).abs() / large < 1e-12,
        "mean of large values"
    );
    // Std dev should be large * sqrt( (1^2 + 0^2 + 1^2) / 2 ) = large * 1.0
    assert!((s - large).abs() / large < 1e-12, "std dev of large values");
}

/// Mean and std_dev handle subnormal (very small) values.
/// Subnormals are finite and should be included in calculations.
#[test]
fn mean_std_handles_subnormal_values() {
    let tiny = f64::MIN_POSITIVE / 2.0; // Subnormal
    let xs = [tiny, tiny * 2.0, tiny * 3.0];
    let m = mean(xs.iter().copied());
    // The sum is 6 subnormal ULPs and 6/3 = 2 ULPs exactly, so an
    // exact equality is correct and catches wrong-divisor,
    // partial-sum, and first-element bugs that a finite/positive
    // check would admit (e.g. count-1 → 3*tiny, first sample → tiny).
    assert_eq!(m, 2.0 * tiny, "subnormals must be summed/averaged exactly");
}

/// std_dev with exactly two values uses Bessel's correction (ddof=1).
/// For [a, b], std = sqrt( ((a-m)^2 + (b-m)^2) / 1 ) where m = (a+b)/2.
/// This simplifies to |a-b| / sqrt(2) * sqrt(2) = |a-b|.
#[test]
fn std_dev_two_values_bessel_corrected() {
    let xs = [3.0_f64, 7.0];
    let s = std_dev(xs.iter().copied());
    // Mean = 5.0, deviations = [-2, 2], sum_sq = 8, variance = 8 / 1 = 8, std = sqrt(8) ≈ 2.828
    assert!(
        (s - 2.8284271247461903).abs() < 1e-9,
        "Bessel-corrected std dev for two values"
    );
}

// -- find_outliers and find_worst_topos tests --

/// find_outliers returns empty vec when input is empty.
#[test]
fn find_outliers_empty_input() {
    let rows: Vec<GauntletRow> = vec![];
    let outliers = find_outliers(&rows);
    assert!(outliers.is_empty(), "empty input should yield no outliers");
}

/// find_outliers returns empty vec when no rows pass the filter.
/// Only rows with is_pass() == true are considered for outlier detection.
#[test]
fn find_outliers_no_pass_rows() {
    let r1 = make_row("s1", "t1", false, 10.0); // failed
    let mut r2 = make_row("s2", "t2", true, 20.0);
    r2.skipped = true; // skipped, not a pass
    let rows = vec![r1, r2];
    let outliers = find_outliers(&rows);
    assert!(outliers.is_empty(), "no pass rows should yield no outliers");
}

/// find_outliers returns empty vec when only one scenario exists.
/// With a single scenario, the scenario mean equals the overall mean,
/// so no scenario can exceed the 2-sigma threshold.
#[test]
fn find_outliers_single_scenario_no_outlier() {
    let r1 = make_row("only", "t1", true, 10.0);
    let r2 = make_row("only", "t2", true, 12.0);
    let r3 = make_row("only", "t3", true, 11.0);
    let rows = vec![r1, r2, r3];
    let outliers = find_outliers(&rows);
    assert!(
        outliers.is_empty(),
        "single scenario cannot produce outliers"
    );
}

/// find_outliers detects a clear outlier scenario.
/// Three scenarios: two with low spread (5.0), one with high spread (100.0).
/// The high-spread scenario should be flagged as an outlier on the "spread" metric.
#[test]
fn find_outliers_detects_clear_outlier() {
    // Many "normal" scenarios with low spread (10 rows total)
    let mut rows = Vec::new();
    for i in 0..5 {
        rows.push(make_row("normal1", &format!("t{i}"), true, 5.0));
    }
    for i in 5..10 {
        rows.push(make_row("normal2", &format!("t{i}"), true, 5.0));
    }
    // One outlier scenario with high spread (100+ to ensure > 2-sigma)
    rows.push(make_row("outlier", "t10", true, 100.0));
    rows.push(make_row("outlier", "t11", true, 110.0));

    let outliers = find_outliers(&rows);
    assert!(!outliers.is_empty(), "should detect outlier scenario");

    // Find the spread outlier
    let spread_outlier = outliers.iter().find(|o| o.metric == "spread");
    assert!(
        spread_outlier.is_some(),
        "should have spread metric outlier"
    );
    let outlier = spread_outlier.unwrap();
    assert_eq!(outlier.scenario, "outlier");
    assert!(
        outlier.sigma > 2.0,
        "sigma should exceed 2.0 threshold, got {}",
        outlier.sigma
    );
    // Worst topos should include t10 and t11 (both exceed threshold)
    assert!(outlier.worst_topos.contains(&"t10".to_string()));
    assert!(outlier.worst_topos.contains(&"t11".to_string()));
}

/// find_outliers does NOT flag a scenario at exactly the 2-sigma threshold.
/// The condition is `scenario_mean > threshold`, not `>=`, so exact equality
/// should not produce an outlier. This prevents borderline cases from noise.
#[test]
fn find_outliers_threshold_is_strictly_greater() {
    // Create a scenario where mean is exactly at 2-sigma threshold.
    // For simplicity, use 3 scenarios: two with value 0.0, one with value X.
    // Overall mean = X/3, std = sqrt( ((0-m)^2 + (0-m)^2 + (X-m)^2) / 2 )
    // We want X = mean + 2*std. Solving: X = X/3 + 2*std => 2X/3 = 2*std => X/3 = std
    // std^2 = (m^2 + m^2 + (X-m)^2) / 2 = (2m^2 + (2m)^2) / 2 where m = X/3
    // = (2m^2 + 4m^2) / 2 = 3m^2 => std = m*sqrt(3) = X*sqrt(3)/3
    // Setting X/3 = X*sqrt(3)/3 => 1 = sqrt(3) which is false.
    // Simpler: use many normal rows and one borderline row.
    // Let's construct: 10 rows with value 0.0, 1 row with value V.
    // Mean = V/11, variance = (10*(V/11)^2 + (V - V/11)^2) / 10
    // = (10*V^2/121 + (10V/11)^2) / 10 = (10V^2/121 + 100V^2/121) / 10 = 110V^2 / 1210 = V^2 / 11
    // std = V / sqrt(11), threshold = V/11 + 2V/sqrt(11)
    // For the single row scenario, mean = V. We want V = threshold.
    // V = V/11 + 2V/sqrt(11) => 1 = 1/11 + 2/sqrt(11) => 10/11 = 2/sqrt(11) => sqrt(11) = 22/10 = 2.2 => 11 = 4.84 false.
    // This is getting complex. Instead, test the boundary condition directly:
    // Create a case where scenario_mean is just below threshold (no outlier)
    // and another just above (outlier).

    // Use 5 normal scenarios with spread 10.0, and 1 test scenario with spread 30.0
    // Normal: 5*2 = 10 rows at 10.0. Test: 2 rows at 30.0.
    // Overall mean = (100 + 60) / 12 = 160/12 ≈ 13.33
    // Std: variance = [10*(10-13.33)^2 + 2*(30-13.33)^2] / 11
    // = [10*11.09 + 2*277.78] / 11 = [110.9 + 555.56] / 11 = 666.46 / 11 ≈ 60.59, std ≈ 7.78
    // Threshold = 13.33 + 2*7.78 = 13.33 + 15.56 = 28.89
    // Test scenario mean = 30.0 > 28.89, so it IS an outlier.
    // To test the boundary, we need a value just below threshold.

    // Simpler approach: verify that the > comparison is used, not >=.
    // Create a minimal case where we know the threshold and test both sides.
    // 4 rows: two scenarios with 2 rows each
    // Scenario A: values 0.0, 0.0 (mean 0.0)
    // Scenario B: values 10.0, 10.0 (mean 10.0)
    // Overall mean = 5.0, std = sqrt( ((0-5)^2*2 + (10-5)^2*2) / 3 ) = sqrt( (50 + 50) / 3 ) = sqrt(100/3) ≈ 5.77
    // Threshold = 5.0 + 2*5.77 = 5.0 + 11.55 = 16.55
    // Neither scenario mean (0.0 or 10.0) exceeds 16.55, so no outliers.
    let rows = vec![
        make_row("A", "t1", true, 0.0),
        make_row("A", "t2", true, 0.0),
        make_row("B", "t3", true, 10.0),
        make_row("B", "t4", true, 10.0),
    ];

    let outliers = find_outliers(&rows);
    // No scenario should be an outlier since 10.0 < 16.55
    let spread_outliers: Vec<_> = outliers.iter().filter(|o| o.metric == "spread").collect();
    assert!(
        spread_outliers.is_empty(),
        "no outlier when below threshold"
    );
}

/// Companion to the below-threshold case: a scenario whose mean is
/// far above overall_mean + 2*std MUST be flagged. The strictly-
/// greater test only proves the absence side (10.0 < 16.55); without
/// this, a never-flags or inverted (`<` instead of `>`) comparison
/// passes. 10 scenarios at spread 10 + 1 at 100: overall mean ~18,
/// std ~27, threshold ~72; the 100 scenario clears it decisively.
#[test]
fn find_outliers_flags_scenario_above_threshold() {
    let mut rows: Vec<GauntletRow> = (0..10)
        .map(|i| make_row(&format!("normal{i}"), "t", true, 10.0))
        .collect();
    rows.push(make_row("hot", "t", true, 100.0));
    let outliers = find_outliers(&rows);
    let spread: Vec<_> = outliers.iter().filter(|o| o.metric == "spread").collect();
    assert!(
        spread.iter().any(|o| o.scenario == "hot"),
        "a scenario far above the 2-sigma threshold must be flagged as a spread outlier",
    );
}

/// find_outliers skips metrics with near-zero standard deviation.
/// When std < f64::EPSILON, the metric has no measurable spread,
/// so outlier detection would be noise. The function should skip it.
#[test]
fn find_outliers_skips_zero_std_metrics() {
    // All rows have identical spread value, so std = 0.0
    let r1 = make_row("s1", "t1", true, 10.0);
    let r2 = make_row("s2", "t2", true, 10.0);
    let r3 = make_row("s3", "t3", true, 10.0);
    let rows = vec![r1, r2, r3];

    let outliers = find_outliers(&rows);
    // No spread outliers because std = 0 (below EPSILON)
    let spread_outliers: Vec<_> = outliers.iter().filter(|o| o.metric == "spread").collect();
    assert!(
        spread_outliers.is_empty(),
        "zero std dev should skip metric"
    );
}

/// find_outliers sorts results by sigma descending.
/// Higher sigma (more extreme outliers) should appear first.
#[test]
fn find_outliers_sorts_by_sigma_descending() {
    // Create two outlier scenarios with different sigma values
    // Normal baseline: 30 rows with spread 5.0 (large baseline for stable std)
    let mut rows = Vec::new();
    for i in 0..15 {
        rows.push(make_row("normal1", &format!("t{i}"), true, 5.0));
    }
    for i in 15..30 {
        rows.push(make_row("normal2", &format!("t{i}"), true, 5.0));
    }
    // Outlier 1: (spread 100.0, 110.0)
    rows.push(make_row("outlier1", "t30", true, 100.0));
    rows.push(make_row("outlier1", "t31", true, 110.0));
    // Outlier 2: more extreme (spread 140.0, 150.0)
    // Both should exceed threshold, but outlier2 has higher sigma
    rows.push(make_row("outlier2", "t40", true, 140.0));
    rows.push(make_row("outlier2", "t41", true, 150.0));

    let outliers = find_outliers(&rows);
    let spread_outliers: Vec<_> = outliers.iter().filter(|o| o.metric == "spread").collect();
    assert!(
        spread_outliers.len() >= 2,
        "should have at least 2 spread outliers, got {}",
        spread_outliers.len()
    );

    // Outlier2 should have higher sigma and appear first
    let first = &spread_outliers[0];
    let second = &spread_outliers[1];
    assert!(
        first.sigma >= second.sigma,
        "outliers should be sorted by sigma descending"
    );
    assert_eq!(
        first.scenario, "outlier2",
        "extreme outlier should be first"
    );
}

/// find_worst_topos returns empty vec when no rows match scenario.
#[test]
fn find_worst_topos_empty_when_no_matching_scenario() {
    let r1 = make_row("s1", "t1", true, 10.0);
    let rows = vec![r1];
    let accessor: MetricAccessor = |r| r.spread;
    let worst = find_worst_topos(&rows, "nonexistent", accessor, 5.0);
    assert!(
        worst.is_empty(),
        "no matching scenario should yield empty vec"
    );
}

/// find_worst_topos returns only topologies exceeding threshold.
/// The threshold comparison is strict (>), not >=.
#[test]
fn find_worst_topos_filters_by_threshold() {
    let mut r1 = make_row("s1", "t1", true, 10.0);
    r1.spread = 5.0;
    let mut r2 = make_row("s1", "t2", true, 10.0);
    r2.spread = 15.0;
    let mut r3 = make_row("s1", "t3", true, 10.0);
    r3.spread = 25.0;
    let rows = vec![r1, r2, r3];

    let accessor: MetricAccessor = |r| r.spread;
    // Threshold 15.0: t1 (5.0) no, t2 (15.0) no (strict >), t3 (25.0) yes
    let worst = find_worst_topos(&rows, "s1", accessor, 15.0);
    assert_eq!(worst.len(), 1, "only t3 should exceed threshold");
    assert!(worst.contains(&"t3".to_string()));
    assert!(
        !worst.contains(&"t2".to_string()),
        "t2 at threshold should not be included"
    );
}

/// find_worst_topos includes rows regardless of pass/fail status.
/// The function is used to attribute outliers to topologies, and a
/// failing topology that drives high metric values should still surface.
#[test]
fn find_worst_topos_includes_failed_rows() {
    let mut r1 = make_row("s1", "t1", true, 10.0); // pass
    r1.spread = 30.0;
    let mut r2 = make_row("s1", "t2", false, 10.0); // fail
    r2.spread = 40.0;
    let rows = vec![r1, r2];

    let accessor: MetricAccessor = |r| r.spread;
    let worst = find_worst_topos(&rows, "s1", accessor, 20.0);
    // Both should be included regardless of pass/fail
    assert_eq!(worst.len(), 2);
    assert!(worst.contains(&"t1".to_string()));
    assert!(worst.contains(&"t2".to_string()));
}

/// group_field returns None for unknown column names.
/// Only "scenario", "topology", and "work_type" are valid dimensions.
#[test]
fn group_field_unknown_column_returns_none() {
    let row = make_row("s1", "t1", true, 10.0);
    assert!(group_field(&row, "scenario").is_some());
    assert!(group_field(&row, "topology").is_some());
    assert!(group_field(&row, "work_type").is_some());
    assert!(group_field(&row, "invalid").is_none());
    assert!(group_field(&row, "").is_none());
    assert!(group_field(&row, "Spread").is_none()); // case-sensitive
}

/// group_field extracts the correct field for each valid dimension.
#[test]
fn group_field_extracts_correct_dimension() {
    let row = make_row("my_scenario", "my_topo", true, 10.0);
    assert_eq!(group_field(&row, "scenario"), Some("my_scenario"));
    assert_eq!(group_field(&row, "topology"), Some("my_topo"));
    assert_eq!(group_field(&row, "work_type"), Some("SpinWait"));
}

// -- format_dimension_summary tests --

#[test]
fn format_dimension_summary_computed_values() {
    // Two scenarios: "fast" with spread=4.0, gap=40, and "slow" with spread=20.0, gap=200.
    // Each has 1 row. format_dimension_summary sorts by avg_spread descending.
    let mut r1 = make_row("slow", "tiny-1llc", false, 20.0);
    r1.gap_ms = 200;
    r1.imbalance_ratio = 2.5; // > 1.0, should show imbal=2.5
    r1.max_dsq_depth = 8; // > 0, should show dsq=8
    r1.stuck_count = 2.0; // > 0, should show stuck=2
    r1.fallback_count = 15; // > 0, should show fallback=15
    let r2 = make_row("fast", "tiny-1llc", true, 4.0);
    let rows = vec![r1, r2];
    let out = format_dimension_summary(&rows, "scenario");
    // "slow" has higher spread, should appear first (sorted descending).
    let slow_pos = out.find("slow").unwrap();
    let fast_pos = out.find("fast").unwrap();
    assert!(
        slow_pos < fast_pos,
        "slow should sort before fast, got:\n{out}"
    );
    // Check computed values for "slow"
    assert!(out.contains("0/1 passed"), "slow: 0/1 passed, got:\n{out}");
    assert!(
        out.contains("avg_spread=20.0%"),
        "slow: avg_spread=20.0%, got:\n{out}"
    );
    assert!(
        out.contains("avg_gap=200ms"),
        "slow: avg_gap=200ms, got:\n{out}"
    );
    assert!(out.contains("imbal=2.5"), "slow: imbal=2.5, got:\n{out}");
    assert!(out.contains("dsq=8"), "slow: dsq=8, got:\n{out}");
    assert!(out.contains("stuck=2"), "slow: stuck=2, got:\n{out}");
    assert!(
        out.contains("fallback=15"),
        "slow: fallback=15, got:\n{out}"
    );
    // "fast" should show 1/1 passed
    assert!(out.contains("1/1 passed"), "fast: 1/1 passed, got:\n{out}");
}

/// A row whose `inconclusive` bit is set must render in the
/// "inconclusive" bucket of the dimension-summary line, NOT
/// silently fold into the "failed" bucket as the arithmetic
/// `fail = total - pass - skip` would have done. The current
/// arithmetic `fail = total - pass - skip - inconc` plus the
/// format string `"({} skipped, {} inconclusive, {} failed)"`
/// together produce the correct breakdown.
///
/// A future polars-aggregation refactor that drops the
/// `inconc_count` agg, the `inconclusive` group-by column, or the
/// subtraction at the residual would re-introduce the
/// misclassification — this test fails loudly in that case.
#[test]
fn format_dimension_summary_renders_inconclusive_bucket_distinctly() {
    // Three rows on the same dimension: one Pass, one
    // Inconclusive (passed=false, inconclusive=true), one Fail
    // (all-false). Triple-state coverage in one dimension
    // confirms that the format string surfaces every non-pass
    // bucket honestly.
    let mut r_pass = make_row("group_a", "t1", true, 5.0);
    r_pass.skipped = false;
    r_pass.inconclusive = false;
    let mut r_inc = make_row("group_a", "t1", false, 5.0);
    r_inc.skipped = false;
    r_inc.inconclusive = true;
    let mut r_fail = make_row("group_a", "t1", false, 5.0);
    r_fail.skipped = false;
    r_fail.inconclusive = false;
    let rows = vec![r_pass, r_inc, r_fail];
    let out = format_dimension_summary(&rows, "scenario");
    assert!(
        out.contains("1/3 passed"),
        "expected '1/3 passed' for 1-pass-of-3: got:\n{out}"
    );
    assert!(
        out.contains("1 inconclusive"),
        "inconclusive row must NOT silently fold into the failed \
             bucket; got:\n{out}"
    );
    assert!(
        out.contains("1 failed"),
        "real Fail row must render as 1 failed (not be hidden by \
             the inconclusive subtraction); got:\n{out}"
    );
    assert!(
        out.contains("0 skipped"),
        "no Skip contributor; skipped bucket must be 0: got:\n{out}"
    );
}

// -- analyze_rows tests --

#[test]
fn analyze_rows_empty() {
    assert!(analyze_rows(&[]).is_empty());
}

#[test]
fn analyze_rows_with_work_type_diversity() {
    let mut rows = vec![
        make_row("a", "t1", true, 5.0),
        make_row("a", "t1", true, 6.0),
    ];
    rows[0].work_type = "SpinWait".into();
    rows[1].work_type = "Bursty".into();
    let report = analyze_rows(&rows);
    assert!(
        report.contains("By work_type"),
        "should show work_type section when diverse"
    );
    assert!(report.contains("SpinWait"), "should list SpinWait");
    assert!(report.contains("Bursty"), "should list Bursty");
}

#[test]
fn analyze_rows_no_work_type_section_when_uniform() {
    let rows = vec![
        make_row("a", "t1", true, 5.0),
        make_row("b", "t2", true, 8.0),
    ];
    let report = analyze_rows(&rows);
    assert!(
        !report.contains("By work_type"),
        "should not show work_type when uniform"
    );
}