repotoire 0.8.0

Graph-powered code analysis CLI. 110 detectors for security, architecture, bus factor, and code quality.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
//! Dual-branch findings: typed output shape for speculative detector
//! results.
//!
//! See `docs/superpowers/specs/2026-05-09-dual-branch-findings-design.md`
//! for the RFC and `docs/superpowers/specs/2026-05-09-dual-branch-phase1-architecture.md`
//! for the Phase 1 architecture amendment.
//!
//! # Design
//!
//! Some findings have two interpretations: a `RealBug` branch (this is a
//! vulnerability) and a `Benign` branch (this is intentional / non-security
//! use). Rather than forcing the detector to commit to a single severity at
//! emission time, dual-branch findings carry both interpretations with:
//!
//! - a **predicted** branch (rendered by default, fields on the parent
//!   `Finding`),
//! - an **alternative** branch (`Finding::alternative_branch`),
//! - **prediction reasons** (`Finding::prediction_reasons`) — typed
//!   evidence the predictor used,
//! - **resolution signals** (`Finding::resolution_signals`) — code changes
//!   the developer can make to definitively collapse the ambiguity.
//!
//! # Phase 1a scope (this file)
//!
//! This module ships **types only**. No detector emits dual-branch findings
//! yet; no postprocess pass populates these fields yet; no CLI or SARIF
//! consumer reads them yet. All new fields on `Finding` are
//! `serde(default, skip_serializing_if = ...)` so existing JSON parses
//! unchanged and existing output is byte-identical.
//!
//! Per RFC decision **D1**, this is intentionally an additive (non-breaking)
//! schema change on the existing `Finding` type, not a parallel
//! `DualBranchFinding` type. See the architecture note for rationale.
//!
//! # Existing-signal alignment
//!
//! The variants of `PredictionReasonKind` are deliberately named to match
//! existing signal sources in the codebase, so Phase 1b can map each
//! signal one-to-one without renaming:
//!
//! | This module's variant            | Existing source                                                      |
//! |----------------------------------|----------------------------------------------------------------------|
//! | `BundledCode`                    | `confidence_enrichment::ConfidenceSignal { signal: "bundled_code" }` |
//! | `NonProductionPath`              | `confidence_enrichment::ConfidenceSignal { signal: "non_production_path" }` |
//! | `MultiDetectorAgreement { count }` | `confidence_enrichment::ConfidenceSignal { signal: "multi_detector_agreement" }` |
//! | `TestFixtureFile`                | `confidence_enrichment::ConfidenceSignal { signal: "test_fixture_file" }` |
//! | `HierarchicalLevel { level_name, z_score }` | `predictive::LevelScore` (one per `predictive::Level`) |
//! | `KeywordArgument { name, value }`, `FirstArgIdentifier { name }`, `EnclosingScope`, `ImportPresence`, `FilePath`, `StructuralPattern` | new in Phase 2 (per-detector AST/graph signals) |
//! | `Custom { description }`         | escape hatch for one-off detector evidence                            |
//!
//! # Rejected alternative: single `dual_branch: Option<DualBranchPayload>` field
//!
//! An earlier draft considered consolidating the three new `Finding`
//! fields into a single nested payload:
//!
//! ```ignore
//! pub dual_branch: Option<DualBranchPayload>,  // { alt, reasons, signals }
//! ```
//!
//! This was rejected for three reasons:
//!
//! 1. **JSON shape uniformity.** Every other optional field on `Finding`
//!    is a flat sibling of the top-level keys (`category`, `cwe_id`,
//!    `confidence`, `original_severity`, ...). A nested payload would be
//!    the only top-level "envelope" object and would stand out visually
//!    in JSON output and SARIF mappings.
//!
//! 2. **Cardinality mismatch.** The three pieces of dual-branch data
//!    have different cardinalities: at most one alternative branch, N
//!    prediction reasons, N resolution signals. Modelling them as a
//!    single all-or-nothing payload misrepresents the data.
//!
//! 3. **Non-dual-branch findings benefit from prediction reasons too.**
//!    A `multi_detector_agreement` confidence boost is a prediction
//!    reason for a single-interpretation finding. With a payload
//!    envelope, attaching one reason would force constructing a fake
//!    "alternative branch" or making the alternative `Option` inside the
//!    payload — both of which are awkward. With three top-level fields,
//!    the bridge in Phase 1b just pushes onto `prediction_reasons`.
//!
//! The cost is `Finding` widens from 17 to 20 fields; that is acceptable
//! given the existing schema already has many flat optional fields.

use serde::{Deserialize, Serialize};

use crate::models::Severity;

// ─────────────────────────────────────────────────────────────────────────────
// BranchLabel
// ─────────────────────────────────────────────────────────────────────────────

/// Which branch of a dual-branch interpretation this describes.
///
/// A finding with `alternative_branch.is_some()` always covers both
/// interpretations: the primary `Finding` fields describe the predicted
/// branch (whichever the predictor chose); `alternative_branch` describes
/// the other.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BranchLabel {
    /// The finding describes a real vulnerability that should be fixed by
    /// changing the code's behavior (not by annotating it).
    RealBug,
    /// The finding describes intentional, non-vulnerable usage. The
    /// remediation (if any) is to *annotate* the code so future scans
    /// agree.
    Benign,
}

impl BranchLabel {
    /// The other branch.
    ///
    /// Implementation note: this is its own inverse, so two calls return
    /// the original. The `#[must_use]` is here because callers always want
    /// the returned value; ignoring it is always a bug.
    #[must_use]
    pub fn opposite(self) -> Self {
        match self {
            Self::RealBug => Self::Benign,
            Self::Benign => Self::RealBug,
        }
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// AlternativeBranch
// ─────────────────────────────────────────────────────────────────────────────

/// The alternative interpretation of a dual-branch finding — the branch
/// that was *not* chosen as the predicted default.
///
/// `AlternativeBranch` mirrors only the subset of `Finding` fields that
/// vary between the two interpretations. Identity and location fields
/// (id, detector, affected_files, line_start/end, category, cwe_id) are
/// shared with the primary interpretation and are not duplicated here.
///
/// # Naming
///
/// We use `title` and `description` (matching `Finding`'s field names)
/// rather than RFC-internal terms like `message`. This keeps the rendering
/// path uniform: any code that displays a `Finding` can display an
/// `AlternativeBranch` by reading the same field names.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct AlternativeBranch {
    /// Which interpretation this alternative represents.
    pub label: BranchLabel,
    /// Severity if this alternative were the primary interpretation.
    pub severity: Severity,
    /// One-line summary.
    pub title: String,
    /// Longer explanation of when this interpretation would apply.
    pub description: String,
    /// Suggested fix specific to this interpretation. `None` is valid
    /// (e.g. the fix is "do nothing, this is correct").
    ///
    /// Serialized as `null` when absent (see comment on
    /// `Finding::alternative_branch` for why we don't use
    /// `skip_serializing_if` — the in-house `bitcode` cache rejects it).
    #[serde(default)]
    pub suggested_fix: Option<String>,
}

// ─────────────────────────────────────────────────────────────────────────────
// PredictionReasonKind
// ���────────────────────────────────────────────────────────────────────────────

/// Typed evidence kinds the predictor uses to choose between branches.
///
/// Each variant captures a specific class of signal a detector or a
/// postprocess pass might supply. Variants are deliberately aligned with
/// existing signal sources (see module-level docs for the mapping); new
/// variants should only be added when an existing one cannot be reused.
///
/// # Serialization
///
/// Serialized as an internally-tagged enum with `kind` as the
/// discriminator and the variant fields flattened alongside, e.g.
/// `{"kind":"keyword_argument","name":"verify","value":"False"}`.
/// This format is convention in the `confidence_enrichment` provenance
/// strings already shipped, so JSON consumers see a familiar shape.
//
// `Eq` is not derived because `HierarchicalLevel { z_score: f64 }`
// carries an f64; equality is not defined on floats. `PartialEq` is
// sufficient for the test assertions in this module.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum PredictionReasonKind {
    // ── Existing signals (Phase 1b will populate these from
    // confidence_enrichment::ConfidenceSignal). ──
    /// Path matches a bundled-code pattern (`dist/`, `*.min.js`, etc.).
    /// Phase 1b source: `confidence_enrichment` signal `"bundled_code"`.
    BundledCode,

    /// Path is in a non-production directory (`scripts/`, `examples/`, etc.).
    /// Phase 1b source: `confidence_enrichment` signal `"non_production_path"`.
    NonProductionPath,

    /// Multiple detectors flagged the same location, increasing confidence
    /// the finding is real. Phase 1b source: `confidence_enrichment`
    /// signal `"multi_detector_agreement"`.
    MultiDetectorAgreement {
        /// How many detectors agree (always >= 2 when this variant fires).
        count: u32,
    },

    /// File path matches `/test`, `/fixture`, `/mock`. Phase 1b source:
    /// `confidence_enrichment` signal `"test_fixture_file"`.
    TestFixtureFile,

    /// One level of the hierarchical predictive coding engine flagged the
    /// entity as surprising. Phase 1b source: `predictive::LevelScore`.
    /// `level_name` matches `predictive::Level::label()` output (e.g.
    /// `"L1 Token"`, `"L4 Architectural"`).
    HierarchicalLevel { level_name: String, z_score: f64 },

    // ── New in Phase 2 (per-detector AST/graph evidence). ──
    /// A keyword argument's name and value at the call site informed the
    /// prediction (e.g. `verify=False`).
    KeywordArgument { name: String, value: String },

    /// The first positional argument's identifier name informed the
    /// prediction (e.g. argument named `password` vs `s`).
    FirstArgIdentifier { name: String },

    /// The enclosing scope (class, function, or module) informed the
    /// prediction. Sourced from the graph in Phase 1c.
    EnclosingScope { scope_kind: String, name: String },

    /// The set of imports in the file informed the prediction. Sourced
    /// from the graph in Phase 1c.
    ImportPresence { module: String },

    /// The file path informed the prediction (e.g. matches a known
    /// non-security path glob).
    FilePath { hint: String },

    /// A structural pattern at the call site informed the prediction
    /// (e.g. result truncated with `[:N]`, input includes `os.urandom`).
    StructuralPattern { description: String },

    /// One-off signal not covered by typed variants.
    ///
    /// # Allowed callers
    ///
    /// Only two call sites should ever produce `Custom`:
    ///
    /// 1. **Phase 1b bridge from `confidence_enrichment::ConfidenceSignal`.**
    ///    That struct's `signal: String` field is open-typed; when the
    ///    string doesn't match any known variant above, the bridge wraps
    ///    it in `Custom { description: signal_string }`. This preserves
    ///    forward-compatibility with signals defined before the
    ///    enrichment pipeline knows about typed variants.
    ///
    /// 2. **External / plugin detectors not shipped in-tree.** A plugin
    ///    that emits findings via the `Finding` schema may legitimately
    ///    have evidence the in-tree enum can't model.
    ///
    /// # Disallowed: in-tree detectors
    ///
    /// In-tree detectors authored in Phase 2+ **must** add a typed
    /// variant rather than reach for `Custom`. The point of typing the
    /// signals is to make the predictor's evidence machine-readable
    /// (for weight aggregation, IDE plugins, SARIF mappings); a `Custom`
    /// string defeats every downstream consumer. The
    /// `custom_variant_is_documented_escape_hatch_only` test below pins
    /// this policy.
    Custom { description: String },
}

// ─────────────────────────────────────────────────────────────────────────────
// PredictionReason
// ─────────────────────────────────────────────────────────────────────────────

/// A single piece of evidence the predictor used.
///
/// Conceptually this is the typed version of the existing
/// `confidence_enrichment::ConfidenceSignal { signal, delta, reason }`
/// struct — with `kind` replacing the string `signal`, `weight` replacing
/// the f64 `delta`, and `note` replacing `reason`.
///
/// # Weight conventions
///
/// `weight` is a contribution toward the predictor's softmax over the two
/// branches:
///
/// - **Positive** values lean toward the `Benign` branch.
/// - **Negative** values lean toward the `RealBug` branch.
/// - Magnitude near `1.0` is a near-collapsing signal; for true collapses,
///   use `ResolutionSignal` instead.
/// - Magnitude near `0.0` is a weak nudge.
///
/// The aggregated prediction is computed in Phase 1c; Phase 1a only
/// provides the carrier type.
///
/// # JSON shape
///
/// Because `kind` is a `#[serde(tag = "kind")]` enum, `PredictionReason`
/// uses `#[serde(flatten)]` so the JSON looks like:
///
/// ```json
/// {
///   "kind": "keyword_argument",
///   "name": "verify",
///   "value": "False",
///   "weight": -0.4,
///   "note": "verify=False on a TLS call leans RealBug."
/// }
/// ```
///
/// rather than a nested `kind` object. This is intentional and matches
/// how `confidence_enrichment` already serializes its provenance.
///
/// # Caveat (`serde(flatten)` + tagged enums)
///
/// Combining `#[serde(flatten)]` with internally-tagged enums has known
/// edge cases around `deny_unknown_fields` and field-order; we don't use
/// either here. The `prediction_reason_full_shape_roundtrips` test pins
/// the JSON shape so we'd catch a future regression.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PredictionReason {
    /// What kind of evidence this is.
    #[serde(flatten)]
    pub kind: PredictionReasonKind,
    /// Weight contribution in `[-1.0, 1.0]`. Positive leans `Benign`,
    /// negative leans `RealBug`. Phase 1a does not enforce the range
    /// (matches the existing `ConfidenceSignal::delta` which is also
    /// unconstrained at the type level); Phase 1c clamps when
    /// aggregating.
    pub weight: f32,
    /// Human-readable explanation, surfaced when the user asks "why this
    /// prediction?".
    pub note: String,
}

// ─────────────────────────────────────────────────────────────────────────────
// ResolutionKind
// ─────────────────────────────────────────────────────────────────────────────

/// Typed mechanism for a resolution signal — a code change the developer
/// can make that *definitively* collapses the dual-branch finding to a
/// single interpretation.
///
/// Resolution signals are authoritative: their presence in source code
/// causes the predictor to skip its weighted-evidence calculation entirely
/// and commit to the indicated branch.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum ResolutionKind {
    /// A keyword argument that, if added, collapses the finding (e.g.
    /// `usedforsecurity=False` on Python `hashlib` calls).
    KeywordArgument { name: String, value: String },
    /// A source-level annotation that, if added, collapses the finding
    /// (e.g. `# repotoire:protocol-required[RFC7616]`).
    SourceAnnotation { syntax: String },
    /// A structural pattern that, if matched at the call site, collapses
    /// the finding (e.g. "first argument identifier matches sensitive
    /// lexicon").
    ///
    /// # Field name
    ///
    /// The inner field is `pattern_description` (renamed via
    /// `#[serde(rename = "pattern_description")]`) rather than the more
    /// natural `description`. This is necessary because `ResolutionKind`
    /// is flattened into `ResolutionSignal` with `#[serde(flatten)]`,
    /// and `ResolutionSignal` itself has a top-level `description: String`
    /// field. Two flattened siblings with the same name would collide on
    /// serialize (last-write-wins in JSON object key order) and fail to
    /// round-trip on deserialize with `missing field "description"`.
    /// See `docs/superpowers/specs/2026-05-11-resolution-signal-flatten-fix.md`.
    StructuralPattern {
        #[serde(rename = "pattern_description")]
        description: String,
    },
    /// The presence of an import that, if added, collapses the finding
    /// (e.g. `import defusedxml.ElementTree`).
    ImportPresence { module: String },
    /// The enclosing scope matching a known pattern collapses the finding
    /// (e.g. enclosing class implements a known-protocol marker).
    EnclosingScope { scope_kind: String, name: String },
}

// ─────────────────────────────────────────────────────────────────────────────
// ResolutionSignal
// ─────────────────────────────────────────────────────────────────────────────

/// A definitive resolution signal — a code change that, if applied,
/// collapses the dual-branch finding to a single interpretation.
///
/// Each signal documents:
/// - what to add (`kind`),
/// - how to describe it to the user (`description`, `example`),
/// - which branch the addition collapses to (`collapses_to`).
///
/// When a finding's resolution signal is detected as already present in
/// source, the predictor in Phase 1c skips evidence aggregation and
/// commits to `collapses_to` directly.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ResolutionSignal {
    /// What the developer can add to definitively resolve this finding.
    #[serde(flatten)]
    pub kind: ResolutionKind,
    /// Human-readable description of what to add.
    pub description: String,
    /// Optional code snippet showing the addition in context.
    /// Serialized as `null` when absent.
    #[serde(default)]
    pub example: Option<String>,
    /// Which branch the addition collapses to.
    pub collapses_to: BranchLabel,
}

// ─────────────────────────────────────────────────────────────────────────────
// Tests
// ─────────────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    #[test]
    fn branch_label_opposite_is_involutive() {
        assert_eq!(BranchLabel::RealBug.opposite(), BranchLabel::Benign);
        assert_eq!(BranchLabel::Benign.opposite(), BranchLabel::RealBug);
        assert_eq!(
            BranchLabel::RealBug.opposite().opposite(),
            BranchLabel::RealBug
        );
    }

    #[test]
    fn branch_label_serializes_snake_case() {
        assert_eq!(
            serde_json::to_string(&BranchLabel::RealBug).expect("serialize"),
            "\"real_bug\""
        );
        assert_eq!(
            serde_json::to_string(&BranchLabel::Benign).expect("serialize"),
            "\"benign\""
        );
    }

    #[test]
    fn branch_label_roundtrips() {
        for original in [BranchLabel::RealBug, BranchLabel::Benign] {
            let json = serde_json::to_string(&original).expect("serialize");
            let parsed: BranchLabel = serde_json::from_str(&json).expect("deserialize");
            assert_eq!(original, parsed);
        }
    }

    #[test]
    fn alternative_branch_roundtrips() {
        let original = AlternativeBranch {
            label: BranchLabel::RealBug,
            severity: Severity::High,
            title: "If `s` carries sensitive data, SHA-1 is broken".into(),
            description: "Use SHA-256 or SHA-3 for security-sensitive hashing.".into(),
            suggested_fix: Some("hashlib.sha256(s)".into()),
        };
        let json = serde_json::to_string(&original).expect("serialize");
        let parsed: AlternativeBranch = serde_json::from_str(&json).expect("deserialize");
        assert_eq!(original, parsed);
    }

    #[test]
    fn alternative_branch_serializes_suggested_fix_as_null_when_none() {
        // The existing Finding schema serializes optional fields as
        // `null` (see `category`, `cwe_id`, etc.). Match that convention
        // for shape uniformity. We can't use `skip_serializing_if`
        // because `bitcode` rejects it.
        let alt = AlternativeBranch {
            label: BranchLabel::Benign,
            severity: Severity::Info,
            title: "Annotated".into(),
            description: "No fix needed.".into(),
            suggested_fix: None,
        };
        let json = serde_json::to_value(&alt).expect("serialize");
        assert_eq!(
            json["suggested_fix"],
            serde_json::Value::Null,
            "suggested_fix should be null when None, got: {json}"
        );
    }

    #[test]
    fn prediction_reason_kind_serializes_with_flat_tag() {
        let reason = PredictionReasonKind::KeywordArgument {
            name: "usedforsecurity".into(),
            value: "False".into(),
        };
        let json = serde_json::to_value(&reason).expect("serialize");
        assert_eq!(json["kind"], "keyword_argument");
        assert_eq!(json["name"], "usedforsecurity");
        assert_eq!(json["value"], "False");
    }

    #[test]
    fn prediction_reason_kind_unit_variants_serialize_as_kind_only() {
        // Variants with no fields should serialize as just `{"kind": ...}`.
        let json = serde_json::to_value(&PredictionReasonKind::BundledCode).expect("serialize");
        assert_eq!(json, json!({"kind": "bundled_code"}));
        let json =
            serde_json::to_value(&PredictionReasonKind::NonProductionPath).expect("serialize");
        assert_eq!(json, json!({"kind": "non_production_path"}));
        let json = serde_json::to_value(&PredictionReasonKind::TestFixtureFile).expect("serialize");
        assert_eq!(json, json!({"kind": "test_fixture_file"}));
    }

    #[test]
    fn prediction_reason_full_shape_roundtrips() {
        // Pins the JSON shape so a future serde-version regression on
        // `flatten` + tagged enums is caught immediately.
        let original = PredictionReason {
            kind: PredictionReasonKind::EnclosingScope {
                scope_kind: "class".into(),
                name: "DigestAuth".into(),
            },
            weight: 0.4,
            note: "Class name suggests RFC 7616 Digest authentication.".into(),
        };
        let json = serde_json::to_value(&original).expect("serialize");
        // The flattened shape: kind + variant fields + weight + note all
        // at the top level.
        assert_eq!(json["kind"], "enclosing_scope");
        assert_eq!(json["scope_kind"], "class");
        assert_eq!(json["name"], "DigestAuth");
        assert!((json["weight"].as_f64().expect("weight is f64") - 0.4).abs() < 1e-6);
        assert_eq!(
            json["note"],
            "Class name suggests RFC 7616 Digest authentication."
        );
        let parsed: PredictionReason = serde_json::from_value(json).expect("deserialize roundtrip");
        assert_eq!(original, parsed);
    }

    #[test]
    fn prediction_reason_existing_signal_aliases_match_strings() {
        // Phase 1b will translate from confidence_enrichment::Confidence
        // Signal { signal: String } to typed variants. The serde tag must
        // match the existing `signal` strings exactly so Phase 1b can map
        // them mechanically.
        for (variant, expected_tag) in [
            (PredictionReasonKind::BundledCode, "bundled_code"),
            (
                PredictionReasonKind::NonProductionPath,
                "non_production_path",
            ),
            (
                PredictionReasonKind::MultiDetectorAgreement { count: 3 },
                "multi_detector_agreement",
            ),
            (PredictionReasonKind::TestFixtureFile, "test_fixture_file"),
        ] {
            let json = serde_json::to_value(&variant).expect("serialize");
            assert_eq!(
                json["kind"], expected_tag,
                "variant {variant:?} must serialize with kind={expected_tag:?} \
                 to align with confidence_enrichment::ConfidenceSignal.signal"
            );
        }
    }

    #[test]
    fn prediction_reason_hierarchical_level_matches_predictive_label() {
        // `level_name` should be populated from `predictive::Level::label()`,
        // which returns strings like "L1 Token", "L4 Architectural". Pin
        // the convention here so Phase 1b uses the same string.
        let reason = PredictionReasonKind::HierarchicalLevel {
            level_name: "L4 Architectural".into(),
            z_score: 3.2,
        };
        let json = serde_json::to_value(&reason).expect("serialize");
        assert_eq!(json["kind"], "hierarchical_level");
        assert_eq!(json["level_name"], "L4 Architectural");
        assert!((json["z_score"].as_f64().expect("z_score is f64") - 3.2).abs() < 1e-6);
    }

    #[test]
    fn resolution_signal_collapses_to_serialized() {
        let signal = ResolutionSignal {
            kind: ResolutionKind::KeywordArgument {
                name: "usedforsecurity".into(),
                value: "False".into(),
            },
            description: "Python 3.9+ stdlib non-security annotation.".into(),
            example: Some("hashlib.sha1(s, usedforsecurity=False)".into()),
            collapses_to: BranchLabel::Benign,
        };
        let json = serde_json::to_value(&signal).expect("serialize");
        assert_eq!(json["kind"], "keyword_argument");
        assert_eq!(json["collapses_to"], "benign");
        assert_eq!(json["example"], "hashlib.sha1(s, usedforsecurity=False)");
    }

    #[test]
    fn resolution_signal_serializes_example_as_null_when_none() {
        // Match existing `Finding` field convention: serialize optional
        // fields as `null` (not omitted). Required because the in-house
        // `bitcode` cache rejects `skip_serializing_if`.
        let signal = ResolutionSignal {
            kind: ResolutionKind::SourceAnnotation {
                syntax: "# repotoire:ignore[InsecureCryptoDetector]".into(),
            },
            description: "Suppress this finding on this line.".into(),
            example: None,
            collapses_to: BranchLabel::Benign,
        };
        let json = serde_json::to_value(&signal).expect("serialize");
        assert_eq!(
            json["example"],
            serde_json::Value::Null,
            "example should be null when None, got: {json}"
        );
    }

    #[test]
    fn resolution_signal_parses_legacy_payload_without_example_field() {
        // Defensive: future clients may emit signals without `example`.
        // `serde(default)` on Option<String> handles this; pin the
        // contract explicitly.
        //
        // Note: outer delimiter is `r##"..."##` because the JSON body
        // contains the substring `"#` (closing-quote then hash) inside
        // the `# repotoire:protocol-required` comment text, which would
        // otherwise terminate a single-hash raw string early.
        let json = r##"{
            "kind": "source_annotation",
            "syntax": "# repotoire:protocol-required",
            "description": "Mark as protocol-required",
            "collapses_to": "benign"
        }"##;
        let parsed: ResolutionSignal =
            serde_json::from_str(json).expect("deserialize without example");
        assert_eq!(parsed.example, None);
        assert_eq!(parsed.collapses_to, BranchLabel::Benign);
    }

    /// Documents and pins the policy from `PredictionReasonKind::Custom`'s
    /// docstring. We can't statically prevent an in-tree detector from
    /// reaching for `Custom { description: "verify=False on TLS call" }`
    /// when `KeywordArgument { name: "verify", value: "False" }` would
    /// have been the right typed variant, but we *can* leave a clearly
    /// named test that surfaces the policy when a reviewer greps the
    /// source.
    ///
    /// The policy is asymmetric:
    /// - The Phase 1b bridge from `confidence_enrichment::ConfidenceSignal`
    ///   may emit `Custom { description: signal_string }` when `signal`
    ///   doesn't match a known typed variant. This is the forward-compat
    ///   escape hatch.
    /// - In-tree detectors authored in Phase 2+ must add a typed variant
    ///   above rather than emit `Custom`.
    ///
    /// Phase 1b will add a real test that asserts no in-tree detector
    /// produces `Custom`; for now (Phase 1a, types only, no detector
    /// emission) this test documents the intent.
    #[test]
    fn custom_variant_is_documented_escape_hatch_only() {
        // Sanity check that the variant exists and roundtrips, since
        // bridging code in Phase 1b will rely on it.
        let custom = PredictionReasonKind::Custom {
            description: "Unknown signal from external plugin".into(),
        };
        let json = serde_json::to_value(&custom).expect("serialize");
        assert_eq!(json["kind"], "custom");
        assert_eq!(json["description"], "Unknown signal from external plugin");
        let parsed: PredictionReasonKind = serde_json::from_value(json).expect("roundtrip");
        assert_eq!(parsed, custom);

        // Policy assertion via a static check on the variant name. If
        // a future author renames `Custom` they will hit this test and
        // be forced to also update the docstring + bridge contract.
        let variant_name = match &custom {
            PredictionReasonKind::Custom { .. } => "Custom",
            _ => "not Custom",
        };
        assert_eq!(
            variant_name, "Custom",
            "the escape-hatch variant must be named `Custom`; \
             see PredictionReasonKind::Custom docstring for the policy"
        );
    }

    // ─────────────────────────────────────────────────────────────────────────
    // Exhaustive round-trip tripwires
    //
    // `ResolutionSignal` uses `#[serde(flatten)]` to fold an
    // internally-tagged enum into the parent struct. This pattern
    // silently breaks if any variant of the flattened enum has a field
    // whose name collides with a parent sibling field (the parent and
    // the variant fight over the same JSON key; see
    // `2026-05-11-resolution-signal-flatten-fix.md`).
    //
    // The test below enumerates every variant via a `match` on a
    // sentinel value, so adding a new variant fails to compile until
    // it is round-tripped here. Adding a future colliding field fails
    // the round-trip assertion deterministically.
    // ─────────────────────────────────────────────────────────────────────────

    fn assert_resolution_signal_roundtrips(kind: ResolutionKind) {
        let original = ResolutionSignal {
            kind,
            description: "TRIPWIRE_PARENT_DESCRIPTION".into(),
            example: Some("TRIPWIRE_EXAMPLE".into()),
            collapses_to: BranchLabel::Benign,
        };
        let json = serde_json::to_value(&original)
            .unwrap_or_else(|e| panic!("serialize failed for {:?}: {e}", original.kind));
        // Verify the parent's `description` is preserved verbatim. If a
        // variant's inner field collides with the parent, the parent
        // value is overwritten on serialize and this assertion fires.
        assert_eq!(
            json["description"], "TRIPWIRE_PARENT_DESCRIPTION",
            "parent `description` was overwritten by a flattened \
             variant field for {:?}; JSON: {json}",
            original.kind
        );
        let parsed: ResolutionSignal = serde_json::from_value(json.clone()).unwrap_or_else(|e| {
            panic!(
                "deserialize failed for {:?}: {e}; JSON: {json}",
                original.kind
            )
        });
        assert_eq!(
            parsed, original,
            "round-trip mismatch for {:?}",
            original.kind
        );
    }

    #[test]
    fn resolution_kind_every_variant_roundtrips_through_signal() {
        // Sentinel values per variant. The `match` below ensures that
        // adding a new variant fails to compile until it is added to
        // the `cases` list above.
        let cases = vec![
            ResolutionKind::KeywordArgument {
                name: "usedforsecurity".into(),
                value: "False".into(),
            },
            ResolutionKind::SourceAnnotation {
                syntax: "# repotoire:protocol-required[RFC7616]".into(),
            },
            ResolutionKind::StructuralPattern {
                description: "TRIPWIRE_INNER_DESCRIPTION_DIFFERENT_FROM_PARENT".into(),
            },
            ResolutionKind::ImportPresence {
                module: "defusedxml.ElementTree".into(),
            },
            ResolutionKind::EnclosingScope {
                scope_kind: "class".into(),
                name: "DigestAuth".into(),
            },
        ];

        // Compile-time exhaustiveness guard: a future variant fails to
        // compile here until it is added to `cases` above. Touch every
        // variant explicitly; do NOT use a wildcard `_ =>` arm.
        let example = &cases[0];
        match example {
            ResolutionKind::KeywordArgument { .. }
            | ResolutionKind::SourceAnnotation { .. }
            | ResolutionKind::StructuralPattern { .. }
            | ResolutionKind::ImportPresence { .. }
            | ResolutionKind::EnclosingScope { .. } => {}
        }

        for case in cases {
            assert_resolution_signal_roundtrips(case);
        }
    }

    #[test]
    fn resolution_kind_structural_pattern_inner_description_does_not_collide() {
        // Regression pin for the v0.7.0 bug
        // (`docs/superpowers/specs/2026-05-11-resolution-signal-flatten-fix.md`).
        // The inner `description` of `StructuralPattern` MUST serialize
        // to a JSON key distinct from the parent `description` so the
        // two values survive the flatten round-trip.
        let signal = ResolutionSignal {
            kind: ResolutionKind::StructuralPattern {
                description: "INNER_VARIANT_VALUE".into(),
            },
            description: "PARENT_PROSE_VALUE".into(),
            example: None,
            collapses_to: BranchLabel::RealBug,
        };
        let json = serde_json::to_value(&signal).expect("serialize");

        // Parent prose stays under `description`.
        assert_eq!(json["description"], "PARENT_PROSE_VALUE");
        // Inner variant value must serialize to a different key
        // (`pattern_description`), per the rename.
        assert_eq!(json["pattern_description"], "INNER_VARIANT_VALUE");
        // Both values round-trip.
        let parsed: ResolutionSignal = serde_json::from_value(json).expect("deserialize");
        assert_eq!(parsed, signal);
    }

    // ─────────────────────────────────────────────────────────────────────────
    // PredictionReason audit
    //
    // Same flatten-collision class as `ResolutionSignal` above. The
    // audit in `2026-05-11-resolution-signal-flatten-fix.md` confirmed
    // no `PredictionReasonKind` variant currently collides with the
    // parent `PredictionReason`'s `weight` or `note` fields, but a
    // future variant could. The exhaustive round-trip test below is
    // the tripwire: it enumerates every variant via `match` so adding
    // a new variant fails to compile until it is round-tripped here,
    // and any future colliding-field regression fails the assertion
    // deterministically with the variant name in the message.
    // ─────────────────────────────────────────────────────────────────────────

    fn assert_prediction_reason_roundtrips(kind: PredictionReasonKind) {
        let original = PredictionReason {
            kind,
            weight: -0.42,
            note: "TRIPWIRE_PARENT_NOTE".into(),
        };
        let json = serde_json::to_value(&original)
            .unwrap_or_else(|e| panic!("serialize failed for {:?}: {e}", original.kind));
        // The parent `note` and `weight` keys must survive a flatten
        // across every variant. If a future variant adds an inner
        // `note` or `weight` field, this assertion fires.
        assert_eq!(
            json["note"], "TRIPWIRE_PARENT_NOTE",
            "parent `note` was overwritten by a flattened variant field \
             for {:?}; JSON: {json}",
            original.kind
        );
        assert!(
            (json["weight"].as_f64().expect("weight is f64") - (-0.42)).abs() < 1e-6,
            "parent `weight` was overwritten or mistyped for {:?}; JSON: {json}",
            original.kind
        );
        let parsed: PredictionReason = serde_json::from_value(json.clone()).unwrap_or_else(|e| {
            panic!(
                "deserialize failed for {:?}: {e}; JSON: {json}",
                original.kind
            )
        });
        assert_eq!(
            parsed, original,
            "round-trip mismatch for {:?}",
            original.kind
        );
    }

    #[test]
    fn prediction_reason_kind_every_variant_roundtrips_through_reason() {
        let cases = vec![
            PredictionReasonKind::BundledCode,
            PredictionReasonKind::NonProductionPath,
            PredictionReasonKind::MultiDetectorAgreement { count: 3 },
            PredictionReasonKind::TestFixtureFile,
            PredictionReasonKind::HierarchicalLevel {
                level_name: "L4 Architectural".into(),
                z_score: 3.2,
            },
            PredictionReasonKind::KeywordArgument {
                name: "verify".into(),
                value: "False".into(),
            },
            PredictionReasonKind::FirstArgIdentifier {
                name: "password".into(),
            },
            PredictionReasonKind::EnclosingScope {
                scope_kind: "function".into(),
                name: "load_user".into(),
            },
            PredictionReasonKind::ImportPresence {
                module: "defusedxml".into(),
            },
            PredictionReasonKind::FilePath {
                hint: "matches scripts/ glob".into(),
            },
            PredictionReasonKind::StructuralPattern {
                description: "first arg identifier matches sensitive lexicon".into(),
            },
            PredictionReasonKind::Custom {
                description: "Unknown plugin signal".into(),
            },
        ];

        // Compile-time exhaustiveness guard: a future variant fails to
        // compile here until it is added to `cases` above. Touch every
        // variant explicitly; do NOT use a wildcard `_ =>` arm.
        let example = &cases[0];
        match example {
            PredictionReasonKind::BundledCode
            | PredictionReasonKind::NonProductionPath
            | PredictionReasonKind::MultiDetectorAgreement { .. }
            | PredictionReasonKind::TestFixtureFile
            | PredictionReasonKind::HierarchicalLevel { .. }
            | PredictionReasonKind::KeywordArgument { .. }
            | PredictionReasonKind::FirstArgIdentifier { .. }
            | PredictionReasonKind::EnclosingScope { .. }
            | PredictionReasonKind::ImportPresence { .. }
            | PredictionReasonKind::FilePath { .. }
            | PredictionReasonKind::StructuralPattern { .. }
            | PredictionReasonKind::Custom { .. } => {}
        }

        for case in cases {
            assert_prediction_reason_roundtrips(case);
        }
    }
}