m1nd-mcp 1.2.0

Local MCP runtime for coding agents: structural retrieval, change reasoning, document grounding, and continuity.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
// === m1nd-mcp/src/trust_envelope.rs ===
//
// OMEGA Move 1 — the TRUST-GATED ANSWER ENVELOPE (first slice, on `seek`).
//
// Wraps every `seek` answer in a per-answer trust RECEIPT so an agent can
// mechanically decide to ACT on it, re-verify it, abstain, or treat it as
// unprovable — in one round-trip, without re-deriving the evidence by hand.
//
// The verdict is a CALIBRATED WEIGHTING over the available trust factors, NOT an
// any-red AND-fold. The critic's hard rule (§O.3 #1 / §O.10 Move 1): an AND over
// noisy probes spuriously abstains ~23% of the time, so agents route around the
// gate and the moat dies. Here a single red factor never forces abstention if
// the weighted majority is clean.
//
// REUSE-FIRST: the final decision reuses `calibration::verdict_for` (the same
// conformal τ/τ_low binning `predict` uses) over the `envelope` calibration row.
// The only net-new code is the two band→reliability maps and the ~one pure
// weighting function below — no new dependency, no new engine.
//
// HONESTY INVARIANTS (all enforced + unit-tested):
//   * `known:false` factors drop from BOTH numerator and denominator — never
//     counted as a pass OR a fail.
//   * A single red factor must NOT force `abstain` when the weighted majority is
//     clean (the anti-AND property).
//   * No `envelope` calibration row ⇒ verdict capped at `reverify`, `act`
//     UNREACHABLE, `calibrated:false`.
//   * ALL factors unknown (or a zero/degenerate weighted denominator, or a
//     non-finite score) ⇒ `unprovable` — never a fake number, never `act`.

use crate::protocol::layers::{TrustEnvelope, TrustFactor};
use m1nd_core::calibration::{self, CalibrationRow};

/// Verdict string for the honest "no provable signal" state. Sibling of
/// calibration's `VERDICT_ACT`/`VERDICT_REVERIFY`/`VERDICT_ABSTAIN`; the envelope
/// adds this fourth state for "nothing here is even measurable".
pub const VERDICT_UNPROVABLE: &str = "unprovable";

/// Named factors, so the composition site and the maps agree on stable keys.
pub const FACTOR_TRUST_BAND: &str = "trust_band";
pub const FACTOR_BINDING: &str = "binding";

// Default per-factor weights for slice 1. Stored as constants here but consumed
// through the weighting fn; a later move can lift these into the calibration
// table without touching the weighting math.
pub const WEIGHT_TRUST_BAND: f32 = 1.0;
pub const WEIGHT_BINDING: f32 = 1.0;

/// One composed factor fed to the pure weighting function.
///
/// `reliability` is the band already mapped to a [0,1] reliability value (higher
/// = more trustworthy). It is only consulted when `known` is true.
#[derive(Clone, Debug)]
pub struct FactorInput {
    pub name: String,
    pub band: String,
    pub weight: f32,
    pub known: bool,
    /// Reliability in [0,1]; ignored when `known == false`.
    pub reliability: f32,
    /// Optional repair call to surface if this factor drags the verdict off act.
    pub repair_hint: Option<&'static str>,
}

impl FactorInput {
    fn to_factor(&self) -> TrustFactor {
        TrustFactor {
            name: self.name.clone(),
            band: self.band.clone(),
            weight: self.weight,
            known: self.known,
        }
    }
}

/// Map a `trust_band` string (from `m1nd_core::trust::trust_band`) to a
/// reliability in [0,1].
///
/// NOTE the band vocabulary is RISK-named: "high" means HIGH RISK ⇒ LOW
/// reliability. `insufficient_evidence` is the cold-start band — there is no
/// evidence to weigh, so it is reported to the caller as `known:false` (handled
/// at the composition site), never mapped to a middle number.
pub fn trust_band_reliability(band: &str) -> Option<f32> {
    match band {
        "high" => Some(0.2),             // high risk  → low reliability
        "medium" => Some(0.5),           // medium risk
        "low" => Some(0.8),              // low risk    → high reliability
        "insufficient_evidence" => None, // cold start → known:false
        _ => None,                       // unknown vocabulary → drop honestly
    }
}

/// Map a lightweight trust-mode band (derived from cheap in-memory binding reads)
/// to a reliability in [0,1].
pub fn binding_reliability(band: &str) -> Option<f32> {
    match band {
        "full_trust" => Some(1.0),
        "needs_ingest" | "orientation_only" => Some(0.4),
        "stale_binding_suspected" | "degraded" => Some(0.15),
        _ => None,
    }
}

/// Pure, deterministic weighting: fold the composed factors into a calibrated
/// [`TrustEnvelope`]. No I/O, no clock, no state — fully unit-testable.
///
/// score = Σ_i (w_i · v_i · known_i) / Σ_i (w_i · known_i)
///
/// with `v_i` the per-factor reliability in [0,1]. Unknown factors contribute to
/// NEITHER sum. The verdict then bins `score` through the `envelope` calibration
/// row's τ/τ_low (`calibration::verdict_for`) — a weighted DECISION, not a
/// conjunction. See the module honesty invariants for the degradation rules.
pub fn weigh_factors(factors: &[FactorInput], cal_row: Option<&CalibrationRow>) -> TrustEnvelope {
    let factor_receipts: Vec<TrustFactor> = factors.iter().map(FactorInput::to_factor).collect();

    // Accumulate ONLY the known factors into the weighted score. Unknown factors
    // never touch either sum (the honest UNPROVABLE-per-factor invariant).
    let mut numerator = 0.0f32;
    let mut denominator = 0.0f32;
    let mut known_count = 0usize;
    for f in factors {
        if !f.known {
            continue;
        }
        // A negative / non-finite weight or reliability is not evidence — skip it
        // rather than let it poison the fold into a NaN/negative score.
        if !f.weight.is_finite() || f.weight <= 0.0 || !f.reliability.is_finite() {
            continue;
        }
        let v = f.reliability.clamp(0.0, 1.0);
        numerator += f.weight * v;
        denominator += f.weight;
        known_count += 1;
    }

    // ALL-UNKNOWN or degenerate denominator ⇒ honestly `unprovable` (never a fake
    // number, never `act`). This also catches an all-zero-weight known set.
    if known_count == 0 || denominator <= f32::EPSILON {
        return TrustEnvelope {
            verdict: VERDICT_UNPROVABLE.to_string(),
            score: 0.0,
            calibrated: cal_row.is_some(),
            factors: factor_receipts,
            reasons: vec![
                "no provable trust factor was available on this path — the answer is UNPROVABLE, not trusted; re-verify against local files".to_string(),
            ],
            next_repair_call: first_repair_hint(factors),
        };
    }

    let score = numerator / denominator;
    // NaN/non-finite guard (belt-and-suspenders — denominator is already > EPS,
    // but division could still surprise on subnormal inputs). Honest unprovable.
    if !score.is_finite() {
        return TrustEnvelope {
            verdict: VERDICT_UNPROVABLE.to_string(),
            score: 0.0,
            calibrated: cal_row.is_some(),
            factors: factor_receipts,
            reasons: vec![
                "the weighted trust score was non-finite — reporting UNPROVABLE rather than a fabricated verdict".to_string(),
            ],
            next_repair_call: first_repair_hint(factors),
        };
    }

    // Decide. With a calibration row, bin the score through the SAME conformal
    // τ/τ_low `predict` uses. WITHOUT a row, the envelope is honestly
    // uncalibrated: `act` is UNREACHABLE and the verdict is capped at `reverify`
    // (softened from predict's None→abstain, because some factors ARE known here).
    let (verdict, calibrated) = match cal_row {
        Some(row) => (
            calibration::verdict_for(score, row.tau, row.tau_low()).to_string(),
            true,
        ),
        None => (calibration::VERDICT_REVERIFY.to_string(), false),
    };

    let mut reasons = Vec::new();
    match calibrated {
        true => reasons.push(format!(
            "weighted trust score {score:.2} over {known_count} known factor(s), binned by the calibrated `envelope` threshold → {verdict}"
        )),
        false => reasons.push(format!(
            "weighted trust score {score:.2} over {known_count} known factor(s), but the `envelope` signal is UNCALIBRATED — `act` is unreachable and the verdict is capped at `reverify` until a calibration row is measured"
        )),
    }
    // Name any known factor that fell below a middling reliability so the agent
    // sees WHY the verdict is not `act`, not just THAT it is not.
    for f in factors {
        if f.known && f.reliability.is_finite() && f.reliability < 0.5 {
            reasons.push(format!(
                "factor `{}` is weak (band `{}`, reliability {:.2})",
                f.name, f.band, f.reliability
            ));
        }
    }
    // Name the deferred (unknown) factors honestly.
    for f in factors {
        if !f.known {
            reasons.push(format!("factor `{}` deferred ({})", f.name, f.band));
        }
    }

    // A repair call is meaningful only when the verdict is not `act`. Prefer the
    // hint from the weakest known factor; fall back to the first available hint.
    let next_repair_call = if verdict == calibration::VERDICT_ACT {
        None
    } else {
        weakest_repair_hint(factors).or_else(|| first_repair_hint(factors))
    };

    TrustEnvelope {
        verdict,
        score,
        calibrated,
        factors: factor_receipts,
        reasons,
        next_repair_call,
    }
}

/// Derive a lightweight trust-mode band from the cheap, in-memory binding reads
/// available inside `seek` (no re-hash, no file I/O).
///
/// This is the honest CHEAP SUBSET of `handle_session_handshake`'s trust_mode
/// classification (tools.rs): the handshake also folds host-tool-surface and
/// workspace-mismatch signals, which are NOT observable from inside seek — so we
/// only classify what the in-memory graph state actually PROVES:
///   * empty / unfinalized graph (nothing ingested) → `needs_ingest`
///   * a real, finalized, populated graph            → `full_trust`
///
/// The unobservable degradations (degraded host surface, wrong workspace,
/// content poisoning) are left to the full handshake / `trust_selftest`; this
/// never *fakes* `full_trust` when the cheap reads say the graph is empty.
///
/// DELIBERATELY does NOT use graph-file existence as a staleness signal: a
/// freshly-bound, populated, in-memory graph that has not yet been persisted has
/// no backing file, and treating that as `stale_binding_suspected` would fire a
/// FALSE alarm on the normal path. The `stale_binding_suspected`/`degraded`
/// bands still exist in `binding_reliability` for callers that CAN prove them
/// (e.g. the handshake); the cheap seek subset simply refuses to guess them.
pub fn cheap_trust_mode_band(node_count: u64, edge_count: u64, finalized: bool) -> &'static str {
    if node_count == 0 || edge_count == 0 || !finalized {
        "needs_ingest"
    } else {
        "full_trust"
    }
}

/// Compose the slice-1 trust factors for a `seek` answer from the cheap/available
/// signals, then fold them into a [`TrustEnvelope`] via [`weigh_factors`].
///
/// KNOWN (composed here):
///   * `trust_band` — the worst trust band across the top results (already
///     computed per-result). `insufficient_evidence`/absent ⇒ `known:false`.
///   * `binding`    — the cheap in-memory trust-mode band (see above).
///
/// DEFERRED (honest `known:false`, each with a reason naming the probe that
/// would produce it — NOT faked): `cross_verify` evidence-freshness (ingest-only,
/// structurally unavailable in seek), `am_i_stale` (per-file I/O), `closure`
/// (built only in why/impact), `mission_verify` evidence-class (needs an open
/// mission).
pub fn compose_seek_trust_envelope(
    top_trust_bands: &[String],
    binding_band: &str,
    cal_row: Option<&CalibrationRow>,
) -> TrustEnvelope {
    let mut factors: Vec<FactorInput> = Vec::new();

    // Factor: trust_band — worst-of-top band (most conservative). A "high" band
    // is HIGH RISK ⇒ low reliability, so "worst" = the band with the LOWEST
    // reliability. `insufficient_evidence`/unmappable ⇒ known:false.
    let worst = worst_trust_band(top_trust_bands);
    match worst.as_deref().and_then(trust_band_reliability) {
        Some(reliability) => factors.push(FactorInput {
            name: FACTOR_TRUST_BAND.to_string(),
            band: worst.unwrap_or_default(),
            weight: WEIGHT_TRUST_BAND,
            known: true,
            reliability,
            repair_hint: Some("cross_verify"),
        }),
        None => factors.push(FactorInput {
            name: FACTOR_TRUST_BAND.to_string(),
            band: worst
                .map(|b| format!("deferred: {b}"))
                .unwrap_or_else(|| "deferred: no results to band".to_string()),
            weight: WEIGHT_TRUST_BAND,
            known: false,
            reliability: 0.0,
            repair_hint: Some("cross_verify"),
        }),
    }

    // Factor: binding — the cheap in-memory trust-mode band.
    match binding_reliability(binding_band) {
        Some(reliability) => factors.push(FactorInput {
            name: FACTOR_BINDING.to_string(),
            band: binding_band.to_string(),
            weight: WEIGHT_BINDING,
            known: true,
            reliability,
            repair_hint: Some(binding_repair(binding_band)),
        }),
        None => factors.push(FactorInput {
            name: FACTOR_BINDING.to_string(),
            band: format!("deferred: {binding_band}"),
            weight: WEIGHT_BINDING,
            known: false,
            reliability: 0.0,
            repair_hint: Some("trust_selftest"),
        }),
    }

    // Deferred factors — structurally unavailable inside seek. Marked known:false
    // with a reason naming the probe; they touch neither sum.
    for (name, probe) in DEFERRED_FACTORS {
        factors.push(FactorInput {
            name: (*name).to_string(),
            band: format!("deferred: {probe}"),
            weight: 1.0,
            known: false,
            reliability: 0.0,
            repair_hint: None,
        });
    }

    weigh_factors(&factors, cal_row)
}

/// Deferred (structurally-unavailable-in-seek) factors and the probe that would
/// make each provable. Kept honest: named, not faked.
const DEFERRED_FACTORS: &[(&str, &str)] = &[
    (
        "evidence_freshness",
        "cross_verify (ingest-only, unavailable in seek)",
    ),
    ("am_i_stale", "am_i_stale (per-file I/O)"),
    ("closure", "why/impact closure (not built in seek)"),
    ("evidence_class", "mission_verify (needs an open mission)"),
];

/// The most conservative (lowest-reliability) trust band among the top results.
/// `None` when there are no bands. "high" is HIGH RISK, so it is the worst.
fn worst_trust_band(bands: &[String]) -> Option<String> {
    // Rank by risk: higher rank = worse (lower reliability). Unknown vocab and
    // `insufficient_evidence` rank between medium and high so an absent signal
    // does not falsely look safe; but if EVERY band is unmappable the factor ends
    // up known:false at the composition site anyway.
    fn risk_rank(band: &str) -> u8 {
        match band {
            "low" => 0,
            "medium" => 1,
            "insufficient_evidence" => 2,
            "high" => 3,
            _ => 2,
        }
    }
    bands.iter().max_by_key(|b| risk_rank(b)).cloned()
}

/// The repair call that best addresses a degraded binding band.
fn binding_repair(band: &str) -> &'static str {
    match band {
        "stale_binding_suspected" | "degraded" => "recovery_playbook",
        "needs_ingest" => "ingest",
        "orientation_only" => "trust_selftest",
        _ => "trust_selftest",
    }
}

/// The repair hint of the weakest known factor (lowest reliability), if any.
fn weakest_repair_hint(factors: &[FactorInput]) -> Option<String> {
    factors
        .iter()
        .filter(|f| f.known && f.reliability.is_finite() && f.repair_hint.is_some())
        .min_by(|a, b| {
            a.reliability
                .partial_cmp(&b.reliability)
                .unwrap_or(std::cmp::Ordering::Equal)
        })
        .and_then(|f| f.repair_hint.map(str::to_string))
}

/// The first available repair hint across all factors (known or not), if any.
fn first_repair_hint(factors: &[FactorInput]) -> Option<String> {
    factors
        .iter()
        .find_map(|f| f.repair_hint.map(str::to_string))
}

#[cfg(test)]
mod tests {
    use super::*;

    // A calibration row with tau=0.6 ⇒ tau_low=0.3. Mirrors calibration.rs's
    // `sample_row`: act ≥ 0.6, reverify in [0.3, 0.6), abstain < 0.3.
    fn cal_row() -> CalibrationRow {
        CalibrationRow {
            tau: 0.6,
            target_alpha: calibration::DEFAULT_TARGET_ALPHA,
            measured_precision: 0.85,
            coverage: 0.4,
            n: 100,
            calibrated_at_ms: 1_700_000_000_000,
        }
    }

    fn known(name: &str, reliability: f32, weight: f32) -> FactorInput {
        FactorInput {
            name: name.to_string(),
            band: "band".to_string(),
            weight,
            known: true,
            reliability,
            repair_hint: Some("cross_verify"),
        }
    }

    fn unknown(name: &str) -> FactorInput {
        FactorInput {
            name: name.to_string(),
            band: "deferred: probe".to_string(),
            weight: WEIGHT_TRUST_BAND,
            known: false,
            reliability: 0.0,
            repair_hint: None,
        }
    }

    // ── Exact weighting oracle: fixed factors → EXACT score → EXACT verdict ──
    // Two equal-weight known factors at reliability 0.8 and 1.0:
    // score = (1·0.8 + 1·1.0) / (1 + 1) = 1.8 / 2 = 0.90. 0.90 ≥ τ(0.6) ⇒ act.
    #[test]
    fn exact_score_and_act_verdict() {
        let factors = [known("binding", 1.0, 1.0), known("trust_band", 0.8, 1.0)];
        let env = weigh_factors(&factors, Some(&cal_row()));
        assert_eq!(env.score, 0.90, "exact weighted score");
        assert_eq!(env.verdict, "act");
        assert!(env.calibrated);
        assert!(env.next_repair_call.is_none(), "act ⇒ no repair call");
    }

    // ── Unknown factors drop out of BOTH sums ──
    // known-clean(0.8) + one-unknown MUST equal known-clean(0.8) alone: same
    // score, same verdict. The unknown touches neither numerator nor denominator.
    #[test]
    fn unknown_factor_drops_out_of_both_sums() {
        let clean_only = [known("trust_band", 0.8, 1.0)];
        let clean_plus_unknown = [known("trust_band", 0.8, 1.0), unknown("closure")];

        let a = weigh_factors(&clean_only, Some(&cal_row()));
        let b = weigh_factors(&clean_plus_unknown, Some(&cal_row()));

        assert_eq!(a.score, b.score, "unknown factor must not move the score");
        assert_eq!(a.verdict, b.verdict);
        assert_eq!(a.score, 0.80);
    }

    // ── ANTI-AND: a single red factor must NOT force abstain when the weighted
    //    majority is clean. THREE clean factors (1.0) + ONE red (0.05):
    // score = (1.0+1.0+1.0+0.05)/4 = 3.05/4 = 0.7625 ≥ τ(0.6) ⇒ act (NOT abstain).
    // An any-red AND-fold would have abstained here; the calibrated weighting acts.
    #[test]
    fn single_red_factor_does_not_force_abstain() {
        let factors = [
            known("a", 1.0, 1.0),
            known("b", 1.0, 1.0),
            known("c", 1.0, 1.0),
            known("red", 0.05, 1.0),
        ];
        let env = weigh_factors(&factors, Some(&cal_row()));
        assert!(
            (env.score - 0.7625).abs() < 1e-6,
            "weighted score should be 0.7625, got {}",
            env.score
        );
        assert_eq!(
            env.verdict, "act",
            "one red factor must NOT force abstain when the weighted majority is clean"
        );
    }

    // ── No calibration row ⇒ NEVER act; capped at reverify; calibrated:false. ──
    // Even an all-clean 1.0 score cannot reach `act` without a measured row.
    #[test]
    fn no_calibration_row_caps_at_reverify_never_act() {
        let factors = [known("binding", 1.0, 1.0), known("trust_band", 1.0, 1.0)];
        let env = weigh_factors(&factors, None);
        assert_eq!(env.score, 1.0, "score still computed");
        assert_eq!(env.verdict, "reverify", "uncalibrated ⇒ capped at reverify");
        assert!(!env.calibrated);
        assert_ne!(env.verdict, "act", "act is UNREACHABLE without calibration");
        assert!(env.next_repair_call.is_some());
    }

    // ── All factors unknown ⇒ unprovable (never a fake number, never act). ──
    #[test]
    fn all_unknown_is_unprovable() {
        let factors = [unknown("trust_band"), unknown("binding")];
        let env = weigh_factors(&factors, Some(&cal_row()));
        assert_eq!(env.verdict, "unprovable");
        assert_eq!(env.score, 0.0);
        assert_ne!(env.verdict, "act");
    }

    // Empty factor set ⇒ unprovable, no divide-by-zero.
    #[test]
    fn empty_factor_set_is_unprovable() {
        let env = weigh_factors(&[], Some(&cal_row()));
        assert_eq!(env.verdict, "unprovable");
        assert_eq!(env.score, 0.0);
    }

    // NaN / non-finite reliability on the only factor ⇒ that factor is skipped ⇒
    // no known factor remains ⇒ unprovable, never a NaN score.
    #[test]
    fn nan_reliability_yields_unprovable_not_nan() {
        let mut f = known("trust_band", f32::NAN, 1.0);
        f.known = true;
        let env = weigh_factors(&[f], Some(&cal_row()));
        assert_eq!(env.verdict, "unprovable");
        assert!(env.score.is_finite(), "score must never be NaN");
    }

    // Zero / non-finite weight on the only known factor ⇒ skipped ⇒ unprovable.
    #[test]
    fn zero_weight_factor_is_dropped() {
        let env = weigh_factors(&[known("trust_band", 0.9, 0.0)], Some(&cal_row()));
        assert_eq!(env.verdict, "unprovable");
    }

    // A genuinely weak weighted score bins to abstain (not act) — the honest
    // low end still works. score = 0.10 < τ_low(0.3) ⇒ abstain, with a repair.
    #[test]
    fn weak_score_abstains_with_repair() {
        let env = weigh_factors(&[known("binding", 0.10, 1.0)], Some(&cal_row()));
        assert_eq!(env.verdict, "abstain");
        assert!(env.next_repair_call.is_some());
    }

    // Borderline score bins to reverify. score = 0.45 ∈ [0.3, 0.6) ⇒ reverify.
    #[test]
    fn borderline_score_reverifies() {
        let env = weigh_factors(&[known("binding", 0.45, 1.0)], Some(&cal_row()));
        assert_eq!(env.verdict, "reverify");
        assert!(env.next_repair_call.is_some());
    }

    // Band → reliability maps are exact and risk-aware.
    #[test]
    fn band_maps_are_exact() {
        assert_eq!(trust_band_reliability("high"), Some(0.2));
        assert_eq!(trust_band_reliability("medium"), Some(0.5));
        assert_eq!(trust_band_reliability("low"), Some(0.8));
        assert_eq!(trust_band_reliability("insufficient_evidence"), None);
        assert_eq!(trust_band_reliability("garbage"), None);

        assert_eq!(binding_reliability("full_trust"), Some(1.0));
        assert_eq!(binding_reliability("needs_ingest"), Some(0.4));
        assert_eq!(binding_reliability("orientation_only"), Some(0.4));
        assert_eq!(binding_reliability("stale_binding_suspected"), Some(0.15));
        assert_eq!(binding_reliability("degraded"), Some(0.15));
        assert_eq!(binding_reliability("garbage"), None);
    }
}