dsfb-gpu-debug-core 0.1.0

Deterministic CPU reference, hash chain, and semantic authority for dsfb-gpu-debug.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
//! Detector-motif evaluation.
//!
//! Sixteen deterministic detectors, each a closed-form function over the
//! entity's residual and sign grids. Bit `i` of the output cell's
//! `detector_mask` is set when `MOTIF_CATALOG[i]` fires.
//!
//! Design constraints:
//!
//! * **Per-entity, history-bounded.** Each detector reads at most
//!   `DetectorThresholds::history_window` cells back from the current
//!   window. This is what keeps the CUDA mirror tractable — a kernel
//!   thread can index into the entity's contiguous slice of the grid
//!   without atomics or shared state.
//! * **Threshold-driven, not learned.** Every decision is a comparison
//!   against a contract-locked Q16.16 threshold. No probability, no
//!   confidence score, no learned weight.
//! * **Pure functions.** No allocation in the inner loop, no
//!   floating-point. Two calls with the same inputs produce identical
//!   output buffers, which is the property the case-file hash chain
//!   depends on.

#![cfg(feature = "std")]

use std::vec::Vec;

use crate::fixed::Q16;
use crate::motif::MotifClass;
use crate::residual::ResidualCell;
use crate::sign::SignCell;

/// Configuration table for the 16 detectors. Every field carries the
/// `_q16_raw` suffix when the value is a raw Q16.16 `i32`, the `_q16`
/// suffix when it is a `Q16` (rare here for clarity), and a plain `u32`
/// for window counts. All fields are part of the contract hash.
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub struct DetectorThresholds {
    /// Norm above this fires `ResidualSpike`. v0 default: 10 ms (~ ten
    /// times the baseline latency).
    pub spike_q16_raw: i32,
    /// EWMA drift above this fires `SustainedResidualElevation`.
    pub sustain_q16_raw: i32,
    /// Absolute slew above this fires `SlewShock`.
    pub slew_shock_q16_raw: i32,
    /// Minimum norm during a plateau.
    pub plateau_min_q16_raw: i32,
    /// Maximum absolute slew tolerated inside a plateau.
    pub plateau_slew_max_q16_raw: i32,
    /// Number of consecutive cells the plateau condition must hold.
    pub plateau_windows: u32,
    /// Lookback window for the oscillation detector.
    pub oscillation_window: u32,
    /// Number of sign alternations in slew that fires `Oscillation`.
    pub oscillation_alternations: u32,
    /// Low edge of the deadband. A previous-cell norm below this is the
    /// "below" side of the deadband-exit transition.
    pub deadband_low_q16_raw: i32,
    /// High edge of the deadband. A current-cell norm above this is the
    /// "above" side of the deadband-exit transition.
    pub deadband_high_q16_raw: i32,
    /// Residual error rate above this fires `ErrorRateBurst`.
    pub error_burst_q16_raw: i32,
    /// Latency-residual threshold for the coupling detector.
    pub coupling_lat_q16_raw: i32,
    /// Error-residual threshold for the coupling detector.
    pub coupling_err_q16_raw: i32,
    /// Number of cells contributing to the variance detector's window.
    pub variance_window: u32,
    /// Max-minus-min norm above this fires `VarianceExpansion`. v0 uses
    /// the spread (a deterministic proxy for variance) so we don't need
    /// a Q16 multiply-accumulate over the window.
    pub variance_threshold_q16_raw: i32,
    /// Number of consecutive cells over which the drift must
    /// monotonically rise to fire `DriftRamp`. The `ramp_window` cells
    /// are the most recent `ramp_window` cells in entity order.
    pub ramp_window: u32,
    /// Minimum norm at a recovery edge. Below this the recovery edge is
    /// suppressed (we are not in a meaningful recovery if the level is
    /// near zero).
    pub recovery_min_norm_q16_raw: i32,
    /// All-axes upper bound for `CleanWindowStability`. If norm, drift,
    /// and |slew| are all under this, and no other detector fired, the
    /// clean bit is set.
    pub clean_band_q16_raw: i32,
    /// Confuser detector: cur.norm above this and prev.norm below
    /// `clean_band` makes the cell a transient-spike candidate.
    pub confuser_min_q16_raw: i32,
    /// Coupling between drift and error rate that fires
    /// `FanoutPrecursor`. The drift threshold reuses `sustain` and the
    /// error threshold reuses `error_burst`/8, so the fan-out detector
    /// fires earlier than the burst detector — capturing precursor
    /// conditions rather than the burst itself.
    pub fanout_drift_q16_raw: i32,
    /// EntityLocalAnomaly: norm exceeds this many times the drift.
    /// v0 default: 4. Stored as Q16.16 multiplier so the threshold can
    /// be tuned later without changing the comparison form.
    pub entity_anomaly_factor_q16_raw: i32,
    /// Number of windows of history any detector may read back from the
    /// current cell. The window sizes above are required to be ≤ this
    /// number; this is a structural bound.
    pub history_window: u32,
}

impl DetectorThresholds {
    /// Canonical v0 thresholds. Pinned by the contract; any change is a
    /// contract breach. Chosen so the three injected fixture episodes
    /// (latency ramp, error burst, slew shock + recovery) cleanly trigger
    /// the relevant detectors while clean windows stay silent.
    pub const CANONICAL: Self = Self {
        spike_q16_raw: 10 * 65_536,
        sustain_q16_raw: 5 * 65_536,
        slew_shock_q16_raw: 20 * 65_536,
        plateau_min_q16_raw: 5 * 65_536,
        plateau_slew_max_q16_raw: 65_536, // 1 ms
        plateau_windows: 3,
        oscillation_window: 6,
        oscillation_alternations: 3,
        deadband_low_q16_raw: 2 * 65_536,
        deadband_high_q16_raw: 4 * 65_536,
        error_burst_q16_raw: 0x4000, // 0.25 in Q16
        coupling_lat_q16_raw: 5 * 65_536,
        coupling_err_q16_raw: 0x1000, // 0.0625 in Q16
        variance_window: 5,
        variance_threshold_q16_raw: 30 * 65_536,
        ramp_window: 4,
        recovery_min_norm_q16_raw: 5 * 65_536,
        clean_band_q16_raw: 65_536, // 1 ms
        confuser_min_q16_raw: 10 * 65_536,
        fanout_drift_q16_raw: 3 * 65_536,
        entity_anomaly_factor_q16_raw: 4 * 65_536,
        history_window: 8,
    };
}

/// One `(window, entity)` detector cell.
///
/// The 16-bit `detector_mask` is the OR of every motif bit that fired
/// on this cell. The cell carries its position metadata so the
/// downstream consensus stage can address cells by `(window, entity)`.
#[repr(C)]
#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)]
pub struct DetectorCell {
    /// Window index this cell belongs to.
    pub window_idx: u32,
    /// Entity this cell belongs to.
    pub entity_id: u32,
    /// Bitmask of fired motif bits. Bit `i` corresponds to
    /// `motif::MotifClass::from_bit_index(i)`.
    pub detector_mask: u32,
}

/// R.9.b — fixed-width detector bitset, sized for the D2000 headline
/// profile (2 048 bits, 256 bytes). All wider detector profiles
/// (`DetectorProfile::D64..D2000`) share this layout so the cell ABI
/// is profile-independent; profiles below 2 000 leave the unused
/// high bits zero. `repr(C)` so the GPU sees the same byte form.
pub type DetectorMask2048 = [u64; 32];

/// R.9.b — wide-mask detector cell used by every profile other than
/// `DetectorProfile::D16`. Byte layout (264 bytes total, 8-byte
/// aligned by construction):
///
/// ```text
///   offset  field
///        0  window_idx: u32
///        4  entity_id:  u32
///        8  detector_mask: [u64; 32]   (256 bytes)
/// ```
///
/// **Why a separate cell type**: changing `DetectorCell.detector_mask`
/// from `u32` to `[u64; 32]` would shift the canonical compact byte
/// form for D16, breaking Audit-mode golden hashes. R.9.b preserves
/// D16 byte-for-byte by keeping the legacy cell intact and adding
/// this wider cell only on the new wide dispatch path.
///
/// **Memory budget** (256x4096 K=1): 1 M cells × 264 bytes = ~270 MB
/// for the detector stage. That fits comfortably at K=1 single-
/// catalog; K>1 batched needs per-profile cell-size optimisation
/// before it fits on a 16 GB GPU (deferred to R.9.c).
#[repr(C)]
#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)]
pub struct DetectorCellWide {
    /// Window index this cell belongs to.
    pub window_idx: u32,
    /// Entity this cell belongs to.
    pub entity_id: u32,
    /// 2 048-bit detector mask. Bit `i` corresponds to detector id `i`
    /// for the active profile; bit position semantics depend on the
    /// `DetectorProfile` (e.g. for D64: `detector_id = motif_id * 4
    /// + variant_id`).
    pub detector_mask: DetectorMask2048,
}

impl DetectorCellWide {
    /// Test whether a specific detector id fired on this cell.
    /// Returns `false` for ids ≥ 2 048 (the mask width).
    #[must_use]
    pub const fn fired_by_id(&self, detector_id: u32) -> bool {
        if detector_id >= 2048 {
            return false;
        }
        let word = (detector_id / 64) as usize;
        let bit = detector_id % 64;
        (self.detector_mask[word] & (1u64 << bit)) != 0
    }

    /// Total number of detector bits set on this cell.
    #[must_use]
    pub fn popcount(&self) -> u32 {
        let mut total = 0u32;
        let mut i = 0;
        while i < self.detector_mask.len() {
            total += self.detector_mask[i].count_ones();
            i += 1;
        }
        total
    }

    /// Set the bit at the given detector id. No-op for ids ≥ 2 048.
    pub fn set_bit(&mut self, detector_id: u32) {
        if detector_id >= 2048 {
            return;
        }
        let word = (detector_id / 64) as usize;
        let bit = detector_id % 64;
        self.detector_mask[word] |= 1u64 << bit;
    }
}

/// R.9.b — D64 variant scale factors in Q16.16. The D64 profile
/// runs each of the 16 canonical motifs at four threshold scales,
/// producing 64 distinct detector slots. `detector_id = motif_id * 4
/// + variant_id`.
///
/// Variant semantics (locked by the R.9.b design):
///
/// * V0 = canonical (× 1.0). For V0 the scaled-threshold vector is
///   byte-identical to the canonical `DetectorThresholds`, so the
///   V0 bit at slot `motif_id * 4 + 0` equals the D16 mask bit at
///   slot `motif_id`. This gives D64 the strict-superset property:
///   every D16 firing is recoverable from the D64 mask.
/// * V1 = sensitive (× 0.5). Lower thresholds and shorter windows
///   ⇒ more firings.
/// * V2 = strict (× 1.5). Higher thresholds and longer windows ⇒
///   fewer firings.
/// * V3 = persistence-biased (× 0.75). A different scale that
///   produces a distinct firing pattern from V0/V1/V2 in most
///   cases.
///
/// Future R.9 phases (D128 / D205 / D2000) may add more variants
/// or compose family-parameter combinations on top of these scales;
/// they are not constrained to keep these specific values.
pub const D64_VARIANT_COUNT: u32 = 4;

/// R.9.b — Total detector count for the D64 profile (16 motifs × 4
/// variants = 64). Matches `DetectorProfile::D64.active_detector_count()`.
pub const D64_TOTAL_DETECTORS: u32 = 16 * D64_VARIANT_COUNT;

/// R.9.b — Variant scale factors in Q16.16. Order is canonical (V0,
/// V1, V2, V3) and must never be reordered: `detector_registry_hash`
/// is sensitive to this list via the profile-id metadata, but the
/// per-cell firing pattern is sensitive to the precise scale values
/// and their ordering across variants.
pub const D64_VARIANT_SCALES_Q16: [i32; 4] = [
    1 << 16,               // V0: 1.0 (canonical)
    1 << 15,               // V1: 0.5 (sensitive)
    (1 << 16) + (1 << 15), // V2: 1.5 (strict)
    (1 << 16) - (1 << 14), // V3: 0.75 (persistence-biased)
];

/// R.9.d.1 — D128 profile variant count. 16 canonical motifs ×
/// 8 threshold-scaled variants = 128 active detectors. Active bits
/// `0..127` populate `DetectorCellWide::detector_mask[0..2]`
/// (D128 spans words 0 and 1; bits 128..2047 stay zero).
pub const D128_VARIANT_COUNT: u32 = 8;

/// R.9.d.1 — total D128 detector count.
pub const D128_TOTAL_DETECTORS: u32 = 16 * D128_VARIANT_COUNT;

/// R.9.d.1 — D128 variant scale factors in Q16.16. Order is
/// canonical (V0..V7) and must never be reordered: the
/// detector-registry hash binds to the profile id + ordered scale
/// values, and the per-cell firing pattern is sensitive to the
/// precise scales.
///
/// V0..V3 mirror the D64 scales bit-for-bit so the D128 V0-only
/// projection equals the D64 V0-only projection (and therefore the
/// canonical D16 mask) — the same R.9.b "bridge invariant"
/// extended to a wider variant set. V4..V7 add a finer-grained
/// threshold sweep on both the strict and sensitive sides so the
/// OR-projected mask sees more cells fire than D64 does.
pub const D128_VARIANT_SCALES_Q16: [i32; 8] = [
    1 << 16,               // V0: 1.0   (canonical — matches D64.V0)
    1 << 15,               // V1: 0.5   (sensitive — matches D64.V1)
    (1 << 16) + (1 << 15), // V2: 1.5   (strict    — matches D64.V2)
    (1 << 16) - (1 << 14), // V3: 0.75  (persistence-biased — matches D64.V3)
    1 << 14,               // V4: 0.25  (very sensitive)
    (1 << 16) + (1 << 14), // V5: 1.25
    (1 << 17),             // V6: 2.0   (very strict)
    (1 << 17) + (1 << 16), // V7: 3.0   (extreme strict)
];

/// R.9.d.2 — D205 profile variant count. 16 canonical motifs ×
/// 13 threshold-scaled variants = **208 fireable slots**, of
/// which the bottom **205** are reported as active detectors
/// (`DetectorProfile::D205.active_detector_count()`). The
/// remaining three slots (bit indices 205, 206, 207) are
/// deterministically held at zero by the gate
/// `det_id < D205_ACTIVE_BITS` inside `evaluate_wide`; bits
/// 208..2047 are never iterated. The "205" canonical name
/// mirrors the dsfb-debug mature taxonomy count.
///
/// **The three reserved-not-fired slots are intentional**: they
/// are the scaling-ladder bridge to the dsfb-debug 27-tier
/// taxonomy, which has an uneven per-motif distribution. We do
/// NOT split the variants per motif (that would break the
/// regular kernel iteration). Instead we iterate the full 16 ×
/// 13 grid and gate firings by `det_id < 205`. This keeps the
/// CPU/GPU paths simple and the high-bit slots deterministic.
pub const D205_VARIANT_COUNT: u32 = 13;

/// R.9.d.2 — total fireable bit count for D205. Equals
/// `DetectorProfile::D205.active_detector_count()` = 205.
/// The 16 × 13 iteration produces 208 candidate slots; the gate
/// `det_id < D205_ACTIVE_BITS` masks the top 3 to zero.
pub const D205_ACTIVE_BITS: u32 = 205;

/// R.9.d.2 — total iterated slots for D205. Equals 16 × 13 =
/// 208. The three slots `[205, 206, 207]` (motif 15 variants
/// 10/11/12) are iterated but their bits are NOT set in the
/// output mask. Bits 208..2047 are never touched.
pub const D205_TOTAL_SLOTS: u32 = 16 * D205_VARIANT_COUNT;

/// R.9.d.2 — D205 variant scale factors in Q16.16. Order is
/// canonical (V0..V12) and must never be reordered: the
/// detector-registry hash binds to the profile id + ordered scale
/// values, and the per-cell firing pattern is sensitive to the
/// precise scales.
///
/// **Bridge invariants (panel-locked)**:
/// - V0..V7 mirror the D128 scales bit-for-bit so the D205 V0-only
///   projection equals D128.V0 (= D64.V0 = canonical D16).
/// - V0..V7 produce the same per-cell firings as D128 → the
///   D205 per-motif OR-projection over V0..V7 equals D128's
///   per-motif OR-projection. The additional variants V8..V12
///   can only add cells to the OR-projection, so
///   `D205 OR ⊇ D128 OR ⊇ D64 OR ⊇ D16`.
///
/// V8..V12 sample five additional deterministic dyadic
/// fractions between the existing scales, broadening the
/// threshold sweep without introducing floating-point arithmetic.
pub const D205_VARIANT_SCALES_Q16: [i32; 13] = [
    1 << 16,               // V0: 1.0    (canonical — matches D128.V0)
    1 << 15,               // V1: 0.5    (matches D128.V1)
    (1 << 16) + (1 << 15), // V2: 1.5    (matches D128.V2)
    (1 << 16) - (1 << 14), // V3: 0.75   (matches D128.V3)
    1 << 14,               // V4: 0.25   (matches D128.V4)
    (1 << 16) + (1 << 14), // V5: 1.25   (matches D128.V5)
    1 << 17,               // V6: 2.0    (matches D128.V6)
    (1 << 17) + (1 << 16), // V7: 3.0    (matches D128.V7)
    (1 << 14) + (1 << 13), // V8: 0.375  (between V4 and V1)
    (1 << 15) + (1 << 13), // V9: 0.625  (between V1 and V3)
    (1 << 16) - (1 << 13), // V10: 0.875 (slight sensitive bias)
    (1 << 16) + (1 << 13), // V11: 1.125 (slight strict bias)
    (1 << 17) - (1 << 14), // V12: 1.75  (between V2 and V6)
];

/// R.9.b — scale a Q16.16 raw threshold value by another Q16.16
/// factor. Math: `(value × scale) >> 16`, with the multiplication
/// performed in i64 so a Q32.32 intermediate is representable.
/// Truncates toward zero (the natural `>>` behaviour on i64).
///
/// For `scale_q16 = 1 << 16` (= 1.0), the function is the identity
/// on every valid Q16.16 input — this is what gives D64.V0 the
/// byte-identical-to-canonical property.
#[must_use]
pub fn scale_q16_threshold(value_raw: i32, scale_q16: i32) -> i32 {
    let result = (i64::from(value_raw) * i64::from(scale_q16)) >> 16;
    // The result fits in i32 for all realistic Q16.16 thresholds we
    // use (peak values are well under 2^15). Saturate defensively if
    // a future profile pushes the bound — clamping to i32::MAX/MIN
    // is the deterministic choice.
    if result > i64::from(i32::MAX) {
        i32::MAX
    } else if result < i64::from(i32::MIN) {
        i32::MIN
    } else {
        result as i32
    }
}

/// R.9.b — scale a `u32` window count by a Q16.16 factor, rounding
/// to nearest and clamping to ≥ 1. Used for the two motifs whose
/// "primary parameter" is a window count rather than a Q16.16
/// threshold (`DriftRamp.ramp_window`, `Oscillation.oscillation_
/// window`, plus the `Plateau` / `VarianceExpansion` window fields).
///
/// For `scale_q16 = 1 << 16` (= 1.0), `scale_window(w, scale_q16) = w`
/// for every input — preserving the V0 = canonical property.
#[must_use]
pub fn scale_window(window: u32, scale_q16: i32) -> u32 {
    let scaled = (i64::from(window) * i64::from(scale_q16) + (1 << 15)) >> 16;
    if scaled < 1 {
        1
    } else if scaled > i64::from(u32::MAX) {
        u32::MAX
    } else {
        scaled as u32
    }
}

/// R.9.b — produce a `DetectorThresholds` whose every scalar
/// threshold and window field is scaled by `scale_q16`. For
/// `scale_q16 = 1 << 16` this returns a byte-identical copy of
/// the input; that's the V0-equals-canonical invariant.
///
/// Fields that semantically do not scale (`oscillation_alternations`
/// is an integer count, `history_window` is a hard ceiling) stay
/// at their canonical values.
#[must_use]
pub fn scale_thresholds(t: &DetectorThresholds, scale_q16: i32) -> DetectorThresholds {
    DetectorThresholds {
        spike_q16_raw: scale_q16_threshold(t.spike_q16_raw, scale_q16),
        sustain_q16_raw: scale_q16_threshold(t.sustain_q16_raw, scale_q16),
        slew_shock_q16_raw: scale_q16_threshold(t.slew_shock_q16_raw, scale_q16),
        plateau_min_q16_raw: scale_q16_threshold(t.plateau_min_q16_raw, scale_q16),
        plateau_slew_max_q16_raw: scale_q16_threshold(t.plateau_slew_max_q16_raw, scale_q16),
        plateau_windows: scale_window(t.plateau_windows, scale_q16),
        oscillation_window: scale_window(t.oscillation_window, scale_q16),
        oscillation_alternations: t.oscillation_alternations,
        deadband_low_q16_raw: scale_q16_threshold(t.deadband_low_q16_raw, scale_q16),
        deadband_high_q16_raw: scale_q16_threshold(t.deadband_high_q16_raw, scale_q16),
        error_burst_q16_raw: scale_q16_threshold(t.error_burst_q16_raw, scale_q16),
        coupling_lat_q16_raw: scale_q16_threshold(t.coupling_lat_q16_raw, scale_q16),
        coupling_err_q16_raw: scale_q16_threshold(t.coupling_err_q16_raw, scale_q16),
        variance_window: scale_window(t.variance_window, scale_q16),
        variance_threshold_q16_raw: scale_q16_threshold(t.variance_threshold_q16_raw, scale_q16),
        ramp_window: scale_window(t.ramp_window, scale_q16),
        recovery_min_norm_q16_raw: scale_q16_threshold(t.recovery_min_norm_q16_raw, scale_q16),
        clean_band_q16_raw: scale_q16_threshold(t.clean_band_q16_raw, scale_q16),
        confuser_min_q16_raw: scale_q16_threshold(t.confuser_min_q16_raw, scale_q16),
        fanout_drift_q16_raw: scale_q16_threshold(t.fanout_drift_q16_raw, scale_q16),
        entity_anomaly_factor_q16_raw: scale_q16_threshold(
            t.entity_anomaly_factor_q16_raw,
            scale_q16,
        ),
        history_window: t.history_window,
    }
}

impl DetectorCell {
    /// Test whether a specific motif fired on this cell.
    #[must_use]
    pub const fn fired(&self, class: MotifClass) -> bool {
        (self.detector_mask & class.bit_mask()) != 0
    }

    /// Number of motifs that fired.
    #[must_use]
    pub const fn count(&self) -> u32 {
        self.detector_mask.count_ones()
    }
}

/// Look up a cell in an entity-major grid. Sign and residual grids share
/// this layout because they are produced by the upstream stages with the
/// same convention.
#[inline]
const fn flat(entity_id: u32, window_idx: u32, n_windows: u32) -> usize {
    (entity_id * n_windows + window_idx) as usize
}

/// Evaluate the 16-detector grid over the entity-major residual and
/// sign cells.
///
/// The function returns a `Vec<DetectorCell>` in the same entity-major
/// layout as its inputs. Determinism: identical inputs produce
/// byte-identical output.
#[must_use]
pub fn evaluate(
    residuals: &[ResidualCell],
    signs: &[SignCell],
    thresholds: &DetectorThresholds,
    n_windows: u32,
    n_entities: u32,
) -> Vec<DetectorCell> {
    let total = (n_windows as usize) * (n_entities as usize);
    debug_assert_eq!(residuals.len(), total, "residual grid shape mismatch");
    debug_assert_eq!(signs.len(), total, "sign grid shape mismatch");

    let mut out: Vec<DetectorCell> = Vec::with_capacity(total);

    for entity_id in 0..n_entities {
        for window_idx in 0..n_windows {
            let mask = eval_motifs_for_cell(
                residuals, signs, thresholds, entity_id, window_idx, n_windows,
            );
            out.push(DetectorCell {
                window_idx,
                entity_id,
                detector_mask: mask,
            });
        }
    }
    out
}

/// R.9.b — Per-cell 16-motif evaluator. Returns a `u32` mask where
/// bit `i` is set when `MotifClass::from_bit_index(i)` fires on the
/// `(entity_id, window_idx)` cell under the supplied `thresholds`.
///
/// Extracted from `evaluate` so the wide-mask path (`evaluate_wide`)
/// can call it once per variant with a scaled-threshold copy. Single
/// source of truth: a future refactor to the motif predicates only
/// needs to touch this function, not two copies.
///
/// **Byte stability**: calling this with the canonical
/// `DetectorThresholds::CANONICAL` produces exactly the bytes the
/// pre-R.9.b `evaluate` produced for the same cell. The function
/// is unsafe-code-free and host-side; the CUDA kernel mirrors the
/// same predicate set bit-for-bit.
#[must_use]
pub fn eval_motifs_for_cell(
    residuals: &[ResidualCell],
    signs: &[SignCell],
    thresholds: &DetectorThresholds,
    entity_id: u32,
    window_idx: u32,
    n_windows: u32,
) -> u32 {
    let idx = flat(entity_id, window_idx, n_windows);
    let r = residuals[idx];
    let s = signs[idx];
    let mut mask = 0u32;

    if s.norm_q.raw() > thresholds.spike_q16_raw {
        mask |= MotifClass::ResidualSpike.bit_mask();
    }
    if s.drift_q.raw() > thresholds.sustain_q16_raw {
        mask |= MotifClass::SustainedResidualElevation.bit_mask();
    }
    if drift_ramp_fires(signs, entity_id, window_idx, n_windows, thresholds) {
        mask |= MotifClass::DriftRamp.bit_mask();
    }
    if s.slew_q.abs().raw() > thresholds.slew_shock_q16_raw {
        mask |= MotifClass::SlewShock.bit_mask();
    }
    if plateau_fires(signs, entity_id, window_idx, n_windows, thresholds) {
        mask |= MotifClass::Plateau.bit_mask();
    }
    if oscillation_fires(signs, entity_id, window_idx, n_windows, thresholds) {
        mask |= MotifClass::Oscillation.bit_mask();
    }
    if deadband_exit_fires(signs, entity_id, window_idx, n_windows, thresholds) {
        mask |= MotifClass::DeadbandExit.bit_mask();
    }
    if r.residual_error_q.raw() > thresholds.error_burst_q16_raw {
        mask |= MotifClass::ErrorRateBurst.bit_mask();
    }
    if r.residual_latency_q.raw() > thresholds.coupling_lat_q16_raw
        && r.residual_error_q.raw() > thresholds.coupling_err_q16_raw
    {
        mask |= MotifClass::LatencyErrorCoupling.bit_mask();
    }
    if entity_local_anomaly_fires(&s, thresholds) {
        mask |= MotifClass::EntityLocalAnomaly.bit_mask();
    }
    // Route-local anomaly: v0 single-cell proxy — fires when the
    // spike condition holds *and* the error axis is also non-zero,
    // marking a candidate that the consensus pass refines using
    // route distribution. Carrying a deterministic proxy here
    // keeps the bit position meaningful even before the route-
    // distribution pass lands.
    if (mask & MotifClass::ResidualSpike.bit_mask()) != 0 && r.residual_error_q.raw() > 0 {
        mask |= MotifClass::RouteLocalAnomaly.bit_mask();
    }
    if fanout_precursor_fires(&s, &r, thresholds) {
        mask |= MotifClass::FanoutPrecursor.bit_mask();
    }
    if variance_expansion_fires(signs, entity_id, window_idx, n_windows, thresholds) {
        mask |= MotifClass::VarianceExpansion.bit_mask();
    }
    if recovery_edge_fires(signs, entity_id, window_idx, n_windows, thresholds) {
        mask |= MotifClass::RecoveryEdge.bit_mask();
    }
    if confuser_like_transient_fires(signs, entity_id, window_idx, n_windows, thresholds) {
        mask |= MotifClass::ConfuserLikeTransient.bit_mask();
    }

    // Clean-window stability is the catch-all sentinel: every
    // non-clean bit must be zero for clean to fire. Mask out the
    // clean bit position itself before testing so the sentinel
    // doesn't reference itself.
    let any_non_clean = mask & !MotifClass::CleanWindowStability.bit_mask();
    if any_non_clean == 0
        && s.norm_q.abs().raw() <= thresholds.clean_band_q16_raw
        && s.drift_q.abs().raw() <= thresholds.clean_band_q16_raw
        && s.slew_q.abs().raw() <= thresholds.clean_band_q16_raw
    {
        mask |= MotifClass::CleanWindowStability.bit_mask();
    }

    mask
}

/// R.9.b — wide-mask detector evaluator. Runs the canonical 16-motif
/// predicate set at every variant's scaled threshold and packs the
/// resulting bits into a `DetectorMask2048` per cell. The bit
/// position for the `(motif_id, variant_id)` pair is
/// `motif_id * variants_per_motif + variant_id`, computed once at
/// construction and constant across cells.
///
/// **Profile support**: D16, D64, D128, and D205 are all routed
/// here at the current head. D16 produces a single variant
/// (V0 = canonical) so its wide-mask bits 0..15 are bit-identical
/// to the legacy `DetectorCell.detector_mask`; bits 16..2047 are
/// zero. D64 emits 16 motifs × 4 variants = 64 bits (bits 0..63).
/// D128 emits 16 motifs × 8 variants = 128 bits (bits 0..127,
/// occupying words 0..2 of the [u64; 32] mask). D205 (R.9.d.2)
/// iterates 16 motifs × 13 variants = 208 slots but gates firings
/// by `det_id < D205_ACTIVE_BITS = 205`, so the mask carries the
/// bottom 205 active bits plus three deterministic
/// reserved-not-fired slots (205, 206, 207). D512 / D1024 / D2000
/// are deferred to paper section 16 future work and panic at this
/// entry point until they are routed (a defensive backstop for
/// in-process callers; the dispatch sites already guard against
/// this case).
///
/// **Bridge invariants (panel-locked)**: V0..V3 of every wider
/// profile mirror D64's V0..V3 bit-for-bit, so for every cell the
/// wider mask's low 64 bits equal the D64 mask, and the
/// OR-projection chain `D205 OR ⊇ D128 OR ⊇ D64 OR ⊇ canonical D16`
/// holds. Pinned by the 10 R.9.d.1 acceptance tests in
/// `tests/r9_d_d128_acceptance.rs` and the additional R.9.d.2
/// tests in `tests/r9_d2_d205_acceptance.rs`.
///
/// **Byte equivalence**: the GPU's wide kernel mirrors this
/// per-cell evaluation order bit-for-bit at every supported
/// profile. GPU dispatch for D205 is honestly deferred to the
/// R.9.d.2.1 follow-on commit; this evaluator is the CPU
/// scaling-ladder proof.
///
/// # Panics
///
/// Panics if `profile` is a wider variant not yet implemented
/// (D512 and above). The dispatch sites guard against this; the
/// panic is a defensive backstop for in-process callers.
#[must_use]
pub fn evaluate_wide(
    profile: crate::motif::DetectorProfile,
    residuals: &[ResidualCell],
    signs: &[SignCell],
    thresholds: &DetectorThresholds,
    n_windows: u32,
    n_entities: u32,
) -> Vec<DetectorCellWide> {
    use crate::motif::DetectorProfile;
    let variants_per_motif: u32 = match profile {
        DetectorProfile::D16 => 1,
        DetectorProfile::D64 => D64_VARIANT_COUNT,
        DetectorProfile::D128 => D128_VARIANT_COUNT,
        DetectorProfile::D205 => D205_VARIANT_COUNT,
        DetectorProfile::D512 | DetectorProfile::D1024 | DetectorProfile::D2000 => {
            panic!(
                "DetectorProfile::{} not yet implemented in R.9.d.2; expected D16, D64, D128, or D205",
                profile.name()
            );
        }
    };

    let total = (n_windows as usize) * (n_entities as usize);
    debug_assert_eq!(residuals.len(), total, "residual grid shape mismatch");
    debug_assert_eq!(signs.len(), total, "sign grid shape mismatch");

    // R.9.d.2 — pick the variant-scale table for this profile.
    // D16 reuses D64's first slot (V0 = 1.0 canonical); D64 uses the
    // 4-entry D64 table; D128 uses the 8-entry D128 table whose
    // first four entries match D64 bit-for-bit (preserves the
    // V0..V3 bridge invariants); D205 uses the 13-entry D205 table
    // whose first eight entries match D128 bit-for-bit (preserves
    // the V0..V7 bridge invariants and therefore the chain
    // D205 OR ⊇ D128 OR ⊇ D64 OR ⊇ D16).
    let scales_slice: &[i32] = match profile {
        DetectorProfile::D16 | DetectorProfile::D64 => &D64_VARIANT_SCALES_Q16,
        DetectorProfile::D128 => &D128_VARIANT_SCALES_Q16,
        DetectorProfile::D205 => &D205_VARIANT_SCALES_Q16,
        _ => unreachable!("guarded above by the panic on D512+ profiles"),
    };

    // R.9.d.2 — active-bit gate. D16/D64/D128 emit a tight
    // motif × variants product (16 / 64 / 128) with no high-bit
    // gate needed. D205 iterates 16 × 13 = 208 candidate slots
    // but reports active_detector_count = 205, so the inner-loop
    // gate `det_id < D205_ACTIVE_BITS` masks the top 3 slots to
    // zero. For D16/D64/D128 the gate is unreachable
    // (`u32::MAX`) so the inner loop is unchanged.
    let active_bit_limit: u32 = match profile {
        DetectorProfile::D205 => D205_ACTIVE_BITS,
        _ => u32::MAX,
    };

    // Pre-compute scaled thresholds per variant once. The D16 case
    // reuses the canonical threshold table directly (no copy).
    let scaled: Vec<DetectorThresholds> = (0..variants_per_motif)
        .map(|v| scale_thresholds(thresholds, scales_slice[v as usize]))
        .collect();

    let mut out: Vec<DetectorCellWide> = Vec::with_capacity(total);
    for entity_id in 0..n_entities {
        for window_idx in 0..n_windows {
            let mut wide = DetectorCellWide {
                window_idx,
                entity_id,
                detector_mask: [0u64; 32],
            };
            for variant in 0..variants_per_motif {
                let scaled_thresh = &scaled[variant as usize];
                let d16_mask = eval_motifs_for_cell(
                    residuals,
                    signs,
                    scaled_thresh,
                    entity_id,
                    window_idx,
                    n_windows,
                );
                for motif_id in 0..16u32 {
                    if (d16_mask & (1u32 << motif_id)) != 0 {
                        let det_id = motif_id * variants_per_motif + variant;
                        if det_id < active_bit_limit {
                            wide.set_bit(det_id);
                        }
                    }
                }
            }
            out.push(wide);
        }
    }
    out
}

/// Detector 3: drift_ramp.
fn drift_ramp_fires(
    signs: &[SignCell],
    entity_id: u32,
    window_idx: u32,
    n_windows: u32,
    t: &DetectorThresholds,
) -> bool {
    if window_idx + 1 < t.ramp_window {
        return false;
    }
    let mut prev = i32::MIN;
    for k in 0..t.ramp_window {
        let w = window_idx + 1 - t.ramp_window + k;
        let idx = flat(entity_id, w, n_windows);
        let d = signs[idx].drift_q.raw();
        if d <= prev {
            return false;
        }
        prev = d;
    }
    true
}

/// Detector 5: plateau.
fn plateau_fires(
    signs: &[SignCell],
    entity_id: u32,
    window_idx: u32,
    n_windows: u32,
    t: &DetectorThresholds,
) -> bool {
    if window_idx + 1 < t.plateau_windows {
        return false;
    }
    for k in 0..t.plateau_windows {
        let w = window_idx + 1 - t.plateau_windows + k;
        let idx = flat(entity_id, w, n_windows);
        let c = &signs[idx];
        if c.norm_q.raw() < t.plateau_min_q16_raw {
            return false;
        }
        if c.slew_q.abs().raw() > t.plateau_slew_max_q16_raw {
            return false;
        }
    }
    true
}

/// Detector 6: oscillation. Counts sign alternations in slew across the
/// last `oscillation_window` cells.
fn oscillation_fires(
    signs: &[SignCell],
    entity_id: u32,
    window_idx: u32,
    n_windows: u32,
    t: &DetectorThresholds,
) -> bool {
    if window_idx + 1 < t.oscillation_window {
        return false;
    }
    let mut alternations = 0u32;
    let mut last_sign: i32 = 0;
    for k in 0..t.oscillation_window {
        let w = window_idx + 1 - t.oscillation_window + k;
        let idx = flat(entity_id, w, n_windows);
        let raw = signs[idx].slew_q.raw();
        let sign = match raw.cmp(&0) {
            core::cmp::Ordering::Greater => 1,
            core::cmp::Ordering::Less => -1,
            core::cmp::Ordering::Equal => 0,
        };
        if sign != 0 && last_sign != 0 && sign != last_sign {
            alternations += 1;
        }
        if sign != 0 {
            last_sign = sign;
        }
    }
    alternations >= t.oscillation_alternations
}

/// Detector 7: deadband_exit. Reads only the immediately preceding cell.
fn deadband_exit_fires(
    signs: &[SignCell],
    entity_id: u32,
    window_idx: u32,
    n_windows: u32,
    t: &DetectorThresholds,
) -> bool {
    if window_idx == 0 {
        return false;
    }
    let prev = &signs[flat(entity_id, window_idx - 1, n_windows)];
    let cur = &signs[flat(entity_id, window_idx, n_windows)];
    prev.norm_q.raw() < t.deadband_low_q16_raw && cur.norm_q.raw() > t.deadband_high_q16_raw
}

/// Detector 10: entity_local_anomaly. Single-cell proxy: norm
/// exceeds drift by the configured Q16 multiplier.
fn entity_local_anomaly_fires(s: &SignCell, t: &DetectorThresholds) -> bool {
    // `norm > factor * drift`. We compare in raw i64 to keep overflow
    // explicit.
    let factor = i64::from(t.entity_anomaly_factor_q16_raw);
    let drift = i64::from(s.drift_q.raw());
    let lhs = i64::from(s.norm_q.raw()) << 16; // align both sides to Q32 for the compare
    let rhs = factor.saturating_mul(drift);
    lhs > rhs && s.drift_q.raw() > 0
}

/// Detector 12: fanout_precursor. Drift rising past the fan-out threshold
/// while the cell already shows any non-zero error residual is a
/// precursor signal in v0.
fn fanout_precursor_fires(s: &SignCell, r: &ResidualCell, t: &DetectorThresholds) -> bool {
    s.drift_q.raw() > t.fanout_drift_q16_raw && r.residual_error_q.raw() > 0
}

/// Detector 13: variance_expansion. Uses max-minus-min spread of norm
/// across the variance window as a deterministic, sqrt-free proxy for
/// variance.
fn variance_expansion_fires(
    signs: &[SignCell],
    entity_id: u32,
    window_idx: u32,
    n_windows: u32,
    t: &DetectorThresholds,
) -> bool {
    if window_idx + 1 < t.variance_window {
        return false;
    }
    let mut hi = i32::MIN;
    let mut lo = i32::MAX;
    for k in 0..t.variance_window {
        let w = window_idx + 1 - t.variance_window + k;
        let idx = flat(entity_id, w, n_windows);
        let raw = signs[idx].norm_q.raw();
        if raw > hi {
            hi = raw;
        }
        if raw < lo {
            lo = raw;
        }
    }
    Q16::from_raw(hi).sat_sub(Q16::from_raw(lo)).raw() > t.variance_threshold_q16_raw
}

/// Detector 14: recovery_edge. Current drift below previous drift while
/// the absolute norm is still above the recovery floor.
fn recovery_edge_fires(
    signs: &[SignCell],
    entity_id: u32,
    window_idx: u32,
    n_windows: u32,
    t: &DetectorThresholds,
) -> bool {
    if window_idx == 0 {
        return false;
    }
    let prev = &signs[flat(entity_id, window_idx - 1, n_windows)];
    let cur = &signs[flat(entity_id, window_idx, n_windows)];
    cur.drift_q.raw() < prev.drift_q.raw() && cur.norm_q.raw() > t.recovery_min_norm_q16_raw
}

/// Detector 16: confuser_like_transient. Current cell is a spike while
/// the previous cell sat inside the clean band.
fn confuser_like_transient_fires(
    signs: &[SignCell],
    entity_id: u32,
    window_idx: u32,
    n_windows: u32,
    t: &DetectorThresholds,
) -> bool {
    if window_idx == 0 {
        return false;
    }
    let prev = &signs[flat(entity_id, window_idx - 1, n_windows)];
    let cur = &signs[flat(entity_id, window_idx, n_windows)];
    cur.norm_q.raw() > t.confuser_min_q16_raw && prev.norm_q.abs().raw() <= t.clean_band_q16_raw
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::fixture::{synthesize, DEFAULT_SEED, N_ENTITIES, N_WINDOWS, WINDOW_SIZE_NS};
    use crate::residual::{compute as residual_compute, Baseline};
    use crate::sign::compute as sign_compute;
    use crate::window::{compute_features, WindowFeature};

    const ALPHA: Q16 = Q16::from_raw(0x2000);

    /// End-to-end pipeline against the synthesized fixture, returning the
    /// detector grid for inspection.
    fn full_pipeline() -> Vec<DetectorCell> {
        let events = synthesize(DEFAULT_SEED);
        let features = compute_features(&events, N_WINDOWS, N_ENTITIES, WINDOW_SIZE_NS);
        let residuals = residual_compute(&features, &Baseline::CANONICAL);
        let signs = sign_compute(&residuals, ALPHA, N_WINDOWS, N_ENTITIES);
        evaluate(
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        )
    }

    #[test]
    fn detector_grid_has_expected_shape() {
        let grid = full_pipeline();
        assert_eq!(grid.len(), (N_WINDOWS as usize) * (N_ENTITIES as usize));
    }

    #[test]
    fn detector_evaluation_is_deterministic() {
        let a = full_pipeline();
        let b = full_pipeline();
        assert_eq!(a, b);
    }

    #[test]
    fn ramp_episode_fires_spike_and_sustain_and_ramp() {
        let grid = full_pipeline();
        // Pick a cell deep in the ramp on entity 3.
        let idx = WindowFeature::flat_index(3, 34, N_WINDOWS);
        let cell = grid[idx];
        assert!(cell.fired(MotifClass::ResidualSpike));
        assert!(cell.fired(MotifClass::SustainedResidualElevation));
        assert!(cell.fired(MotifClass::DriftRamp));
    }

    #[test]
    fn burst_episode_fires_error_rate_burst_and_coupling() {
        let grid = full_pipeline();
        // Middle of the error burst on entity 7.
        let idx = WindowFeature::flat_index(7, 62, N_WINDOWS);
        let cell = grid[idx];
        assert!(cell.fired(MotifClass::ErrorRateBurst));
        // Burst events still carry baseline latency, so coupling may or
        // may not fire — we just verify the error-axis bit is up.
    }

    #[test]
    fn shock_episode_fires_slew_shock_and_recovery_edge_in_subsequent_windows() {
        let grid = full_pipeline();
        let shock_idx = WindowFeature::flat_index(11, 90, N_WINDOWS);
        assert!(grid[shock_idx].fired(MotifClass::SlewShock));
        // At least one cell in the recovery range must report a recovery edge.
        let any_recovery = (91..96).any(|w| {
            let idx = WindowFeature::flat_index(11, w, N_WINDOWS);
            grid[idx].fired(MotifClass::RecoveryEdge)
        });
        assert!(
            any_recovery,
            "no recovery edge fired in the post-shock window range"
        );
    }

    #[test]
    fn clean_windows_fire_only_clean_stability_bit() {
        let grid = full_pipeline();
        // Pick a clean entity (entity 0) at a quiet window (window 5 —
        // far from all three episodes).
        let idx = WindowFeature::flat_index(0, 5, N_WINDOWS);
        let cell = grid[idx];
        if cell.fired(MotifClass::CleanWindowStability) {
            // If clean fired, no other bit may be set.
            let non_clean = cell.detector_mask & !MotifClass::CleanWindowStability.bit_mask();
            assert_eq!(non_clean, 0);
        }
    }

    #[test]
    fn confuser_detector_does_not_fire_on_sustained_ramp() {
        let grid = full_pipeline();
        // Confuser is meant to catch single-window spikes, not sustained
        // ramps. Entity 3's deep-ramp cells should not see the confuser bit.
        let idx = WindowFeature::flat_index(3, 34, N_WINDOWS);
        assert!(!grid[idx].fired(MotifClass::ConfuserLikeTransient));
    }

    // ====================================================================
    // R.9.b acceptance tests — wide-mask detector evaluator + D64.
    // ====================================================================

    use crate::motif::DetectorProfile;

    /// Helper: compute the canonical D16 mask via the legacy
    /// `evaluate` and the wide D16 mask via `evaluate_wide`. Returns
    /// both for cross-comparison.
    fn evaluate_both_d16(
        events: &[crate::event::TraceEvent],
    ) -> (Vec<DetectorCell>, Vec<DetectorCellWide>) {
        let features = compute_features(events, N_WINDOWS, N_ENTITIES, WINDOW_SIZE_NS);
        let residuals = residual_compute(&features, &Baseline::CANONICAL);
        let signs = sign_compute(&residuals, ALPHA, N_WINDOWS, N_ENTITIES);
        let legacy = evaluate(
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        );
        let wide = evaluate_wide(
            DetectorProfile::D16,
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        );
        (legacy, wide)
    }

    #[test]
    fn d16_legacy_and_wide_masks_match_bit_for_bit() {
        // Load-bearing R.9.b invariant: the canonical 16-detector
        // path produces a wide mask whose low 16 bits equal the
        // legacy `DetectorCell.detector_mask` exactly, and whose
        // remaining 2032 bits are zero. If this broke we'd have
        // silently divergent D16 byte forms in two code paths.
        let events = synthesize(DEFAULT_SEED);
        let (legacy, wide) = evaluate_both_d16(&events);
        assert_eq!(legacy.len(), wide.len());
        for (i, (a, b)) in legacy.iter().zip(wide.iter()).enumerate() {
            assert_eq!(a.window_idx, b.window_idx, "cell {i} window_idx mismatch");
            assert_eq!(a.entity_id, b.entity_id, "cell {i} entity_id mismatch");
            assert_eq!(
                u64::from(a.detector_mask),
                b.detector_mask[0],
                "cell {i} mask divergence: legacy={:08x} wide[0]={:016x}",
                a.detector_mask,
                b.detector_mask[0]
            );
            // Every higher word must be zero in D16.
            for (w, &word) in b.detector_mask.iter().enumerate().skip(1) {
                assert_eq!(word, 0, "cell {i} word {w} non-zero in D16 wide mask");
            }
        }
    }

    #[test]
    fn d64_v0_bits_match_d16_bits() {
        // The D64 design lock states that variant V0 uses the
        // canonical thresholds verbatim, so D64.detector_id =
        // motif_id * 4 + 0 must fire on exactly the same cells as
        // D16.detector_id = motif_id. This is the "strict superset"
        // property — every D16 firing is recoverable from the D64
        // mask by reading bit (motif_id * 4) of the wide cell.
        let events = synthesize(DEFAULT_SEED);
        let features = compute_features(&events, N_WINDOWS, N_ENTITIES, WINDOW_SIZE_NS);
        let residuals = residual_compute(&features, &Baseline::CANONICAL);
        let signs = sign_compute(&residuals, ALPHA, N_WINDOWS, N_ENTITIES);
        let d16 = evaluate(
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        );
        let d64 = evaluate_wide(
            DetectorProfile::D64,
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        );
        assert_eq!(d16.len(), d64.len());
        for (i, (cell16, cell64)) in d16.iter().zip(d64.iter()).enumerate() {
            for motif_id in 0..16u32 {
                let d16_bit = (cell16.detector_mask & (1u32 << motif_id)) != 0;
                let d64_det_id = motif_id * D64_VARIANT_COUNT;
                let d64_bit = cell64.fired_by_id(d64_det_id);
                assert_eq!(
                    d16_bit, d64_bit,
                    "cell {i} motif_id {motif_id}: D16.bit_{motif_id} != D64.bit_{d64_det_id}"
                );
            }
        }
    }

    #[test]
    fn d64_evaluation_is_deterministic_across_runs() {
        // Two consecutive calls produce byte-identical wide masks.
        // Catches any non-determinism the variant-scaling logic
        // might have introduced (e.g. address-dependent iteration
        // order or floating-point sneaking in).
        let events = synthesize(DEFAULT_SEED);
        let features = compute_features(&events, N_WINDOWS, N_ENTITIES, WINDOW_SIZE_NS);
        let residuals = residual_compute(&features, &Baseline::CANONICAL);
        let signs = sign_compute(&residuals, ALPHA, N_WINDOWS, N_ENTITIES);
        let a = evaluate_wide(
            DetectorProfile::D64,
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        );
        let b = evaluate_wide(
            DetectorProfile::D64,
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        );
        assert_eq!(a.len(), b.len());
        for (i, (ca, cb)) in a.iter().zip(b.iter()).enumerate() {
            assert_eq!(ca, cb, "D64 cell {i} differs between runs");
        }
    }

    #[test]
    fn d64_total_firings_strictly_exceed_d16() {
        // Sanity: V1 (sensitive, threshold × 0.5) MUST fire on at
        // least as many cells as V0 in aggregate, because lowering
        // a threshold can only add firings or leave them unchanged
        // for the standalone scalar-comparison motifs (ResidualSpike,
        // SustainedResidualElevation, etc.). The window-based motifs
        // are not strictly monotonic in window length, so we relax
        // the assertion to "total D64 firings ≥ 4× D16 firings of
        // the strict-superset subset" with a generous margin.
        //
        // The looser version: D64 total bit-count ≥ D16 total
        // bit-count, since D64 V0 ≡ D16 and V1..V3 add firings.
        // We assert exact equality on V0 and ≥ on totals.
        let events = synthesize(DEFAULT_SEED);
        let features = compute_features(&events, N_WINDOWS, N_ENTITIES, WINDOW_SIZE_NS);
        let residuals = residual_compute(&features, &Baseline::CANONICAL);
        let signs = sign_compute(&residuals, ALPHA, N_WINDOWS, N_ENTITIES);
        let d16 = evaluate(
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        );
        let d64 = evaluate_wide(
            DetectorProfile::D64,
            &residuals,
            &signs,
            &DetectorThresholds::CANONICAL,
            N_WINDOWS,
            N_ENTITIES,
        );
        let d16_total: u64 = d16
            .iter()
            .map(|c| u64::from(c.detector_mask.count_ones()))
            .sum();
        let d64_total: u64 = d64.iter().map(|c| u64::from(c.popcount())).sum();
        assert!(
            d64_total >= d16_total,
            "D64 total firings ({d64_total}) must be >= D16 total firings ({d16_total}) \
             because V0 ≡ canonical and V1..V3 add or repeat firings"
        );
    }

    #[test]
    fn scale_threshold_identity_at_unit_scale() {
        // For scale_q16 = 1.0 (= 1 << 16) every threshold value
        // must round-trip unchanged. This is the V0-equals-canonical
        // proof at the primitive level.
        let canon = DetectorThresholds::CANONICAL;
        let scaled = scale_thresholds(&canon, 1 << 16);
        assert_eq!(scaled, canon, "scale_thresholds(_, 1.0) must be identity");
    }

    #[test]
    fn scale_window_clamps_below_one() {
        // A scale factor of 0 would produce 0, which is invalid for
        // every motif that uses a window count. The function clamps
        // to ≥ 1 so the kernel can safely use the scaled value as a
        // loop bound.
        assert_eq!(scale_window(8, 0), 1, "scale × 0 must clamp to 1");
        assert_eq!(scale_window(8, 1), 1, "extreme small scale still clamps");
        assert_eq!(scale_window(8, 1 << 16), 8, "scale × 1.0 is identity");
        assert_eq!(scale_window(8, 1 << 17), 16, "scale × 2.0 doubles");
    }

    #[test]
    fn d64_variant_scales_v0_is_unity() {
        // The variant-scale array's V0 slot MUST be 1.0 in Q16.16
        // (= 1 << 16). If a future refactor changes this constant
        // accidentally, the D64 V0 bits would diverge from D16 and
        // every wider-profile case file would mis-commit.
        assert_eq!(
            D64_VARIANT_SCALES_Q16[0],
            1 << 16,
            "D64_VARIANT_SCALES_Q16[V0] must equal 1.0 in Q16.16"
        );
    }
}