Skip to main content

tacet_core/
result.rs

1//! Result types for adaptive Bayesian timing analysis.
2//!
3//! See spec Section 4.1 (Result Types) for the full specification.
4
5extern crate alloc;
6
7use alloc::string::String;
8use alloc::vec::Vec;
9use core::fmt;
10use serde::{Deserialize, Serialize};
11
12// ============================================================================
13// Outcome - The top-level result type
14// ============================================================================
15
16/// Top-level outcome of a timing test.
17///
18/// The adaptive Bayesian oracle returns one of four outcomes:
19/// - `Pass`: No timing leak detected (leak_probability < pass_threshold)
20/// - `Fail`: Timing leak confirmed (leak_probability > fail_threshold)
21/// - `Inconclusive`: Cannot reach a definitive conclusion
22/// - `Unmeasurable`: Operation too fast to measure on this platform
23///
24/// See spec Section 4.1 (Result Types).
25#[derive(Clone, Serialize, Deserialize)]
26#[allow(clippy::large_enum_variant)]
27pub enum Outcome {
28    /// No timing leak detected.
29    ///
30    /// The posterior probability of a timing leak is below the pass threshold
31    /// (default 0.05), meaning we're confident there is no exploitable leak.
32    Pass {
33        /// Posterior probability of timing leak: P(effect > theta | data).
34        /// Will be < pass_threshold (default 0.05).
35        leak_probability: f64,
36
37        /// Effect size estimate (shift and tail components).
38        effect: EffectEstimate,
39
40        /// Number of samples used in the analysis.
41        samples_used: usize,
42
43        /// Measurement quality assessment.
44        quality: MeasurementQuality,
45
46        /// Diagnostic information for debugging.
47        diagnostics: Diagnostics,
48
49        /// User's requested threshold in nanoseconds.
50        theta_user: f64,
51
52        /// Effective threshold used for inference (may be elevated due to measurement floor).
53        theta_eff: f64,
54
55        /// Measurement floor at final sample count.
56        theta_floor: f64,
57    },
58
59    /// Timing leak confirmed.
60    ///
61    /// The posterior probability of a timing leak exceeds the fail threshold
62    /// (default 0.95), meaning we're confident there is an exploitable leak.
63    Fail {
64        /// Posterior probability of timing leak: P(effect > theta | data).
65        /// Will be > fail_threshold (default 0.95).
66        leak_probability: f64,
67
68        /// Effect size estimate (shift and tail components).
69        effect: EffectEstimate,
70
71        /// Exploitability assessment based on effect magnitude.
72        exploitability: Exploitability,
73
74        /// Number of samples used in the analysis.
75        samples_used: usize,
76
77        /// Measurement quality assessment.
78        quality: MeasurementQuality,
79
80        /// Diagnostic information for debugging.
81        diagnostics: Diagnostics,
82
83        /// User's requested threshold in nanoseconds.
84        theta_user: f64,
85
86        /// Effective threshold used for inference (may be elevated due to measurement floor).
87        theta_eff: f64,
88
89        /// Measurement floor at final sample count.
90        theta_floor: f64,
91    },
92
93    /// Cannot reach a definitive conclusion.
94    ///
95    /// The posterior probability is between pass_threshold and fail_threshold,
96    /// or the analysis hit a limit (timeout, sample budget, noise).
97    Inconclusive {
98        /// Reason why the result is inconclusive.
99        reason: InconclusiveReason,
100
101        /// Current posterior probability of timing leak.
102        leak_probability: f64,
103
104        /// Effect size estimate (may have wide credible intervals).
105        effect: EffectEstimate,
106
107        /// Number of samples used in the analysis.
108        samples_used: usize,
109
110        /// Measurement quality assessment.
111        quality: MeasurementQuality,
112
113        /// Diagnostic information for debugging.
114        diagnostics: Diagnostics,
115
116        /// User's requested threshold in nanoseconds.
117        theta_user: f64,
118
119        /// Effective threshold used for inference (may be elevated due to measurement floor).
120        theta_eff: f64,
121
122        /// Measurement floor at final sample count.
123        theta_floor: f64,
124    },
125
126    /// Operation too fast to measure reliably on this platform.
127    ///
128    /// The operation completes faster than the timer's resolution allows
129    /// for meaningful measurement, even with adaptive batching.
130    Unmeasurable {
131        /// Estimated operation duration in nanoseconds.
132        operation_ns: f64,
133
134        /// Minimum measurable duration on this platform.
135        threshold_ns: f64,
136
137        /// Platform description (e.g., "Apple Silicon (cntvct)").
138        platform: String,
139
140        /// Suggested actions to make the operation measurable.
141        recommendation: String,
142    },
143
144    /// Research mode result.
145    ///
146    /// Returned when using `AttackerModel::Research`. Unlike Pass/Fail/Inconclusive
147    /// which make threshold-based decisions, research mode characterizes the
148    /// timing behavior relative to the measurement floor using CI-based semantics.
149    ///
150    /// See `ResearchOutcome` for details on the stopping conditions.
151    Research(ResearchOutcome),
152}
153
154// ============================================================================
155// InconclusiveReason - Why we couldn't reach a conclusion
156// ============================================================================
157
158/// Reason why a timing test result is inconclusive.
159///
160/// See spec Section 4.1 (Result Types).
161#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
162pub enum InconclusiveReason {
163    /// Data is too noisy to reach a conclusion.
164    ///
165    /// The measurement noise is high enough that we cannot distinguish
166    /// between "no leak" and "small leak" with the available samples.
167    DataTooNoisy {
168        /// Human-readable explanation.
169        message: String,
170        /// Suggested actions to improve measurement quality.
171        guidance: String,
172    },
173
174    /// Posterior is not converging toward either threshold.
175    ///
176    /// After collecting samples, the leak probability remains in the
177    /// inconclusive range and isn't trending toward pass or fail.
178    NotLearning {
179        /// Human-readable explanation.
180        message: String,
181        /// Suggested actions.
182        guidance: String,
183    },
184
185    /// Reaching a conclusion would take too long.
186    ///
187    /// Based on current convergence rate, reaching the pass or fail
188    /// threshold would exceed the configured time budget.
189    WouldTakeTooLong {
190        /// Estimated time in seconds to reach a conclusion.
191        estimated_time_secs: f64,
192        /// Estimated samples needed to reach a conclusion.
193        samples_needed: usize,
194        /// Suggested actions.
195        guidance: String,
196    },
197
198    /// Time budget exhausted.
199    ///
200    /// The configured time limit was reached before the posterior
201    /// converged to a conclusive result.
202    TimeBudgetExceeded {
203        /// Posterior probability when budget was exhausted.
204        current_probability: f64,
205        /// Number of samples collected.
206        samples_collected: usize,
207    },
208
209    /// Sample budget exhausted.
210    ///
211    /// The maximum number of samples was collected without reaching
212    /// a conclusive result.
213    SampleBudgetExceeded {
214        /// Posterior probability when budget was exhausted.
215        current_probability: f64,
216        /// Number of samples collected.
217        samples_collected: usize,
218    },
219
220    /// Measurement conditions changed during the test.
221    ///
222    /// Detected by comparing calibration statistics with post-test statistics.
223    /// This can indicate environmental interference (CPU frequency scaling,
224    /// concurrent processes, etc.) that invalidates the covariance estimate.
225    /// See spec §3.5.4, Gate 4 (Condition Drift).
226    ConditionsChanged {
227        /// Human-readable explanation.
228        message: String,
229        /// Suggested actions.
230        guidance: String,
231    },
232
233    /// Threshold was elevated and pass criterion was met at effective threshold.
234    ///
235    /// The measurement floor exceeded the user's requested threshold, so inference
236    /// was performed at an elevated effective threshold. The posterior probability
237    /// dropped below pass_threshold at θ_eff, but since θ_eff > θ_user + ε, we
238    /// cannot guarantee the user's original requirement is met.
239    ///
240    /// This is NOT a quality gate failure - it's a semantic constraint: Pass requires
241    /// both P < pass_threshold AND θ_eff ≤ θ_user + ε.
242    ///
243    /// See spec Section 3.5.3 (v5.5 Threshold Elevation Decision Rule).
244    ThresholdElevated {
245        /// User's requested threshold in nanoseconds (θ_user).
246        theta_user: f64,
247        /// Effective threshold used for inference (θ_eff = max(θ_user, θ_floor)).
248        theta_eff: f64,
249        /// Posterior probability at θ_eff (was < pass_threshold).
250        leak_probability_at_eff: f64,
251        /// True: P(leak > θ_eff) < pass_threshold (pass criterion met at elevated threshold).
252        meets_pass_criterion_at_eff: bool,
253        /// True: θ_floor at max_samples would be ≤ θ_user + ε (more samples could achieve user threshold).
254        achievable_at_max: bool,
255        /// Human-readable explanation.
256        message: String,
257        /// Suggested actions.
258        guidance: String,
259    },
260}
261
262// ============================================================================
263// EffectEstimate - Timing effect summary (spec §5.2)
264// ============================================================================
265
266/// Estimated timing effect with credible interval and top quantiles.
267///
268/// This struct summarizes the timing difference between baseline and sample classes.
269/// The effect is characterized by the maximum absolute quantile difference across
270/// all 9 deciles, with a 95% credible interval and details about which quantiles
271/// contribute most to any detected leak.
272///
273/// See spec Section 5.2 (Effect Reporting).
274#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct EffectEstimate {
276    /// Posterior mean of max_k |δ_k| in nanoseconds.
277    ///
278    /// This is the maximum absolute timing difference across all 9 deciles,
279    /// averaged over posterior samples. Positive values indicate detectable
280    /// timing differences between the two input classes.
281    pub max_effect_ns: f64,
282
283    /// 95% credible interval for max|δ| in nanoseconds.
284    ///
285    /// This is a Bayesian credible interval: there is a 95% posterior probability
286    /// that the true maximum effect lies within this range.
287    pub credible_interval_ns: (f64, f64),
288
289    /// Top 2-3 quantiles by exceedance probability.
290    ///
291    /// When a timing leak is detected, these are the specific quantiles that
292    /// contribute most to the leak detection. Each entry includes the quantile
293    /// probability (e.g., 0.9 for 90th percentile), the posterior mean effect,
294    /// the 95% marginal credible interval, and the exceedance probability.
295    ///
296    /// Empty when no leak is detected or effect is negligible.
297    pub top_quantiles: Vec<TopQuantile>,
298}
299
300impl EffectEstimate {
301    /// Create a new EffectEstimate with the given values.
302    pub fn new(
303        max_effect_ns: f64,
304        credible_interval_ns: (f64, f64),
305        top_quantiles: Vec<TopQuantile>,
306    ) -> Self {
307        Self {
308            max_effect_ns,
309            credible_interval_ns,
310            top_quantiles,
311        }
312    }
313
314    /// Check if the effect is negligible (max effect below threshold).
315    pub fn is_negligible(&self, threshold_ns: f64) -> bool {
316        self.max_effect_ns.abs() < threshold_ns
317    }
318
319    /// Get the total effect magnitude (same as max_effect_ns for API compatibility).
320    pub fn total_effect_ns(&self) -> f64 {
321        self.max_effect_ns
322    }
323}
324
325impl Default for EffectEstimate {
326    fn default() -> Self {
327        Self {
328            max_effect_ns: 0.0,
329            credible_interval_ns: (0.0, 0.0),
330            top_quantiles: Vec::new(),
331        }
332    }
333}
334
335// ============================================================================
336// Exploitability - Risk assessment
337// ============================================================================
338
339/// Exploitability assessment based on effect magnitude.
340///
341/// Based on Crosby et al. (2009) thresholds for timing attack feasibility.
342/// These thresholds are heuristics based on academic research for risk
343/// prioritization, not guarantees. The thresholds reflect modern attack
344/// techniques including HTTP/2 multiplexing (Timeless Timing Attacks) and
345/// shared-hardware attacks (KyberSlash, Flush+Reload).
346///
347/// See spec Section 5.4 (Exploitability).
348#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
349pub enum Exploitability {
350    /// Effect < 10 ns: Requires shared hardware to exploit.
351    ///
352    /// Only exploitable by attackers with physical co-location: SGX enclaves,
353    /// hyperthreading on same core, containers on same host, or cross-VM on
354    /// shared cache. Remote exploitation is impractical.
355    ///
356    /// References: KyberSlash (2024), Flush+Reload, Prime+Probe literature
357    SharedHardwareOnly,
358
359    /// 10-100 ns: Exploitable via HTTP/2 request multiplexing.
360    ///
361    /// Requires ~100k concurrent HTTP/2 requests to exploit. The "Timeless
362    /// Timing Attacks" technique eliminates network jitter by sending requests
363    /// that arrive simultaneously, making response order reveal timing differences.
364    ///
365    /// Reference: Van Goethem et al., "Timeless Timing Attacks" (USENIX Security 2020)
366    Http2Multiplexing,
367
368    /// 100 ns - 10 μs: Exploitable with standard remote timing.
369    ///
370    /// Requires ~1k-10k requests using traditional timing techniques.
371    /// Exploitable on LAN with any protocol, or over internet with HTTP/2.
372    ///
373    /// References: Crosby et al. (2009), Brumley & Boneh (2005)
374    StandardRemote,
375
376    /// > 10 μs: Obvious timing leak, trivially exploitable.
377    ///
378    /// Detectable with < 100 requests. Exploitable over the internet even
379    /// with high-jitter connections using traditional timing techniques.
380    ObviousLeak,
381}
382
383impl Exploitability {
384    /// Determine exploitability from effect size in nanoseconds.
385    ///
386    /// Thresholds are based on:
387    /// - < 10 ns: Below HTTP/2 timing precision, requires shared hardware
388    /// - 10-100 ns: Within HTTP/2 "Timeless Timing Attacks" range
389    /// - 100 ns - 10 μs: Standard remote timing attack range
390    /// - > 10 μs: Trivially observable
391    pub fn from_effect_ns(effect_ns: f64) -> Self {
392        let effect_ns = effect_ns.abs();
393        if effect_ns < 10.0 {
394            Exploitability::SharedHardwareOnly
395        } else if effect_ns < 100.0 {
396            Exploitability::Http2Multiplexing
397        } else if effect_ns < 10_000.0 {
398            Exploitability::StandardRemote
399        } else {
400            Exploitability::ObviousLeak
401        }
402    }
403}
404
405// ============================================================================
406// MeasurementQuality - Assessment of measurement reliability
407// ============================================================================
408
409/// Measurement quality assessment based on noise level.
410///
411/// Quality is determined primarily by the minimum detectable effect (MDE)
412/// relative to the configured threshold.
413///
414/// See spec Section 5.5 (Quality Assessment).
415#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
416pub enum MeasurementQuality {
417    /// Low noise, high confidence (MDE < 5 ns).
418    Excellent,
419
420    /// Normal noise levels (MDE 5-20 ns).
421    Good,
422
423    /// High noise, results less reliable (MDE 20-100 ns).
424    Poor,
425
426    /// Cannot produce meaningful results (MDE > 100 ns).
427    TooNoisy,
428}
429
430impl MeasurementQuality {
431    /// Determine quality from minimum detectable effect.
432    ///
433    /// Invalid MDE values (less than or equal to 0 or non-finite) indicate a measurement problem
434    /// and are classified as `TooNoisy`.
435    ///
436    /// Very small MDE (< 0.01 ns) also indicates timer resolution issues
437    /// where most samples have identical values.
438    pub fn from_mde_ns(mde_ns: f64) -> Self {
439        // Invalid MDE indicates measurement failure
440        if mde_ns <= 0.01 || !mde_ns.is_finite() {
441            return MeasurementQuality::TooNoisy;
442        }
443
444        if mde_ns < 5.0 {
445            MeasurementQuality::Excellent
446        } else if mde_ns < 20.0 {
447            MeasurementQuality::Good
448        } else if mde_ns < 100.0 {
449            MeasurementQuality::Poor
450        } else {
451            MeasurementQuality::TooNoisy
452        }
453    }
454}
455
456// ============================================================================
457// ResearchOutcome - Result type for research mode
458// ============================================================================
459
460/// Status of a research mode run.
461///
462/// Research mode (AttackerModel::Research) doesn't make Pass/Fail decisions.
463/// Instead, it characterizes the timing behavior with respect to the measurement floor.
464#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
465pub enum ResearchStatus {
466    /// CI clearly above θ_floor — timing difference detected.
467    ///
468    /// The 95% credible interval lower bound is clearly above the measurement
469    /// floor (> 1.1 × θ_floor), indicating a confidently detectable effect.
470    EffectDetected,
471
472    /// CI clearly below θ_floor — no timing difference above noise.
473    ///
474    /// The 95% credible interval upper bound is clearly below the measurement
475    /// floor (< 0.9 × θ_floor), indicating no detectable effect.
476    NoEffectDetected,
477
478    /// Hit timer resolution limit; θ_floor is as good as it gets.
479    ///
480    /// Further sampling won't improve the measurement floor because we've
481    /// hit the fundamental timer tick resolution.
482    ResolutionLimitReached,
483
484    /// Data quality issue detected.
485    ///
486    /// A quality gate triggered during research mode. Unlike standard mode,
487    /// this doesn't block the result but is reported for transparency.
488    QualityIssue(InconclusiveReason),
489
490    /// Ran out of time/samples before reaching conclusion.
491    ///
492    /// The budget was exhausted before the CI could confidently settle
493    /// above or below the measurement floor.
494    BudgetExhausted,
495}
496
497/// Research mode outcome (spec v4.1 research mode).
498///
499/// This struct is returned when using `AttackerModel::Research`. Unlike the
500/// standard `Outcome` which makes Pass/Fail decisions, research mode characterizes
501/// the timing behavior relative to the measurement floor.
502///
503/// Key differences from standard mode:
504/// - No Pass/Fail verdict (no threshold comparison)
505/// - Reports measurement floor (`theta_floor`) at final sample size
506/// - `detectable` field indicates if CI lower bound > floor
507/// - `model_mismatch` is non-blocking (tracked but doesn't stop analysis)
508#[derive(Debug, Clone, Serialize, Deserialize)]
509pub struct ResearchOutcome {
510    /// Research outcome status.
511    pub status: ResearchStatus,
512
513    /// Maximum effect across quantiles: max_k |(Xβ)_k| in nanoseconds.
514    /// This is the posterior mean of the maximum absolute predicted effect.
515    pub max_effect_ns: f64,
516
517    /// 95% credible interval for maximum effect: (2.5th, 97.5th percentile).
518    pub max_effect_ci: (f64, f64),
519
520    /// Measurement floor at final sample size.
521    /// This is the minimum detectable effect given measurement noise.
522    pub theta_floor: f64,
523
524    /// True if the effect is detectable: CI lower bound > theta_floor.
525    pub detectable: bool,
526
527    /// True if model mismatch was detected (Q > q_thresh).
528    /// In research mode, this is non-blocking but adds a caveat to interpretation.
529    pub model_mismatch: bool,
530
531    /// Effect size estimate with decomposition.
532    /// If `model_mismatch` is true, `interpretation_caveat` will be set.
533    pub effect: EffectEstimate,
534
535    /// Number of samples used.
536    pub samples_used: usize,
537
538    /// Measurement quality assessment.
539    pub quality: MeasurementQuality,
540
541    /// Diagnostic information.
542    pub diagnostics: Diagnostics,
543}
544
545impl ResearchOutcome {
546    /// Check if a timing effect was confidently detected.
547    pub fn is_effect_detected(&self) -> bool {
548        matches!(self.status, ResearchStatus::EffectDetected)
549    }
550
551    /// Check if no effect was confidently detected.
552    pub fn is_no_effect_detected(&self) -> bool {
553        matches!(self.status, ResearchStatus::NoEffectDetected)
554    }
555
556    /// Check if the resolution limit was reached.
557    pub fn is_resolution_limit_reached(&self) -> bool {
558        matches!(self.status, ResearchStatus::ResolutionLimitReached)
559    }
560
561    /// Check if there was a quality issue.
562    pub fn has_quality_issue(&self) -> bool {
563        matches!(self.status, ResearchStatus::QualityIssue(_))
564    }
565
566    /// Get the effect estimate.
567    pub fn effect(&self) -> &EffectEstimate {
568        &self.effect
569    }
570
571    /// Get the measurement quality.
572    pub fn quality(&self) -> MeasurementQuality {
573        self.quality
574    }
575
576    /// Get the diagnostics.
577    pub fn diagnostics(&self) -> &Diagnostics {
578        &self.diagnostics
579    }
580}
581
582impl fmt::Display for ResearchStatus {
583    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
584        match self {
585            ResearchStatus::EffectDetected => write!(f, "effect detected"),
586            ResearchStatus::NoEffectDetected => write!(f, "no effect detected"),
587            ResearchStatus::ResolutionLimitReached => write!(f, "resolution limit reached"),
588            ResearchStatus::QualityIssue(reason) => write!(f, "quality issue: {}", reason),
589            ResearchStatus::BudgetExhausted => write!(f, "budget exhausted"),
590        }
591    }
592}
593
594impl fmt::Display for ResearchOutcome {
595    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
596        writeln!(f, "Research Mode: {}", self.status)?;
597        writeln!(
598            f,
599            "  Max effect: {:.2}ns (CI: {:.2}-{:.2}ns)",
600            self.max_effect_ns, self.max_effect_ci.0, self.max_effect_ci.1
601        )?;
602        writeln!(f, "  Measurement floor: {:.2}ns", self.theta_floor)?;
603        writeln!(
604            f,
605            "  Detectable: {}",
606            if self.detectable { "yes" } else { "no" }
607        )?;
608        if self.model_mismatch {
609            writeln!(f, "  Warning: model mismatch detected")?;
610        }
611        writeln!(f, "  Samples: {}", self.samples_used)?;
612        writeln!(f, "  Quality: {}", self.quality)?;
613        Ok(())
614    }
615}
616
617// ============================================================================
618// TopQuantile - Information about significant quantiles (spec §5.2)
619// ============================================================================
620
621/// Information about a quantile with high exceedance probability.
622///
623/// When a timing leak is detected, this struct provides information about
624/// which specific quantiles (deciles) contribute most to the leak detection.
625/// The top 2-3 quantiles by exceedance probability are included in
626/// `EffectEstimate.top_quantiles` to help users understand where timing
627/// differences are concentrated.
628///
629/// See spec Section 5.2 (Effect Reporting).
630#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
631pub struct TopQuantile {
632    /// Quantile probability (e.g., 0.9 for 90th percentile).
633    pub quantile_p: f64,
634
635    /// Posterior mean δ_k in nanoseconds.
636    pub mean_ns: f64,
637
638    /// 95% marginal credible interval (lower, upper) in nanoseconds.
639    pub ci95_ns: (f64, f64),
640
641    /// P(|δ_k| > θ_eff | Δ) - per-quantile exceedance probability.
642    ///
643    /// This is the probability that this individual quantile's effect
644    /// exceeds the threshold, computed from the marginal posterior.
645    pub exceed_prob: f64,
646}
647
648impl TopQuantile {
649    /// Create a new TopQuantile entry.
650    pub fn new(quantile_p: f64, mean_ns: f64, ci95_ns: (f64, f64), exceed_prob: f64) -> Self {
651        Self {
652            quantile_p,
653            mean_ns,
654            ci95_ns,
655            exceed_prob,
656        }
657    }
658}
659
660// ============================================================================
661// Diagnostics - Detailed diagnostic information (spec §6)
662// ============================================================================
663
664/// Diagnostic information for debugging and analysis.
665///
666/// See spec Section 6 (Quality Metrics).
667#[derive(Debug, Clone, Serialize, Deserialize)]
668pub struct Diagnostics {
669    /// Block size used for bootstrap (Politis-White automatic selection).
670    pub dependence_length: usize,
671
672    /// Effective sample size accounting for autocorrelation (ESS approx n / dependence_length).
673    pub effective_sample_size: usize,
674
675    /// Non-stationarity: ratio of inference to calibration variance.
676    /// Values 0.5-2.0 are normal; >5.0 indicates non-stationarity.
677    pub stationarity_ratio: f64,
678
679    /// True if stationarity ratio is within acceptable bounds (0.5-2.0).
680    pub stationarity_ok: bool,
681
682    /// Outlier rate for baseline class (fraction trimmed).
683    pub outlier_rate_baseline: f64,
684
685    /// Outlier rate for sample class (fraction trimmed).
686    pub outlier_rate_sample: f64,
687
688    /// True if outlier rates are symmetric (both <1%, ratio <3x, diff <2%).
689    pub outlier_asymmetry_ok: bool,
690
691    /// Whether discrete timer mode was used (low timer resolution).
692    pub discrete_mode: bool,
693
694    /// Timer resolution in nanoseconds.
695    pub timer_resolution_ns: f64,
696
697    /// Fraction of samples with duplicate timing values (0.0-1.0).
698    pub duplicate_fraction: f64,
699
700    /// True if preflight checks passed (sanity, generator, system).
701    pub preflight_ok: bool,
702
703    /// Number of samples used for calibration (covariance estimation).
704    pub calibration_samples: usize,
705
706    /// Total time spent on the analysis in seconds.
707    pub total_time_secs: f64,
708
709    /// Human-readable warnings (empty if all checks pass).
710    pub warnings: Vec<String>,
711
712    /// Quality issues detected during measurement.
713    pub quality_issues: Vec<QualityIssue>,
714
715    /// Preflight warnings from calibration phase.
716    ///
717    /// These warnings are categorized by severity:
718    /// - `Informational`: Sampling efficiency issues (results still valid)
719    /// - `ResultUndermining`: Statistical assumption violations (results may be unreliable)
720    pub preflight_warnings: Vec<PreflightWarningInfo>,
721
722    // =========================================================================
723    // Reproduction info (for verbose/debug output)
724    // =========================================================================
725    /// Measurement seed used for reproducibility.
726    pub seed: Option<u64>,
727
728    /// Attacker model name (e.g., "AdjacentNetwork", "SharedHardware").
729    pub attacker_model: Option<String>,
730
731    /// Effect threshold (theta) in nanoseconds.
732    pub threshold_ns: f64,
733
734    /// Timer implementation name (e.g., "rdtsc", "cntvct_el0", "kperf").
735    pub timer_name: String,
736
737    /// Platform description (e.g., "macos-aarch64").
738    pub platform: String,
739
740    /// Reason the timer fell back from high-precision PMU (if applicable).
741    ///
742    /// Used to generate context-aware recommendations in output.
743    /// - "concurrent access": kperf locked by another process
744    /// - "no sudo": not running with elevated privileges
745    /// - "unavailable": PMU init failed despite privileges
746    /// - None: using high-precision timer or x86_64 (rdtsc is already ~0.3ns)
747    #[serde(skip_serializing_if = "Option::is_none")]
748    pub timer_fallback_reason: Option<String>,
749
750    // =========================================================================
751    // v5.4 Gibbs sampler diagnostics
752    // =========================================================================
753    /// v5.4: Total number of Gibbs iterations.
754    pub gibbs_iters_total: usize,
755
756    /// v5.4: Number of burn-in iterations.
757    pub gibbs_burnin: usize,
758
759    /// v5.4: Number of retained samples.
760    pub gibbs_retained: usize,
761
762    /// v5.4: Posterior mean of latent scale λ.
763    pub lambda_mean: f64,
764
765    /// v5.4: Posterior standard deviation of λ.
766    pub lambda_sd: f64,
767
768    /// v5.4: Coefficient of variation of λ (λ_sd / λ_mean).
769    pub lambda_cv: f64,
770
771    /// v5.4: Effective sample size of λ chain.
772    pub lambda_ess: f64,
773
774    /// v5.4: Whether λ chain mixed well (CV ≥ 0.1 AND ESS ≥ 20).
775    pub lambda_mixing_ok: bool,
776
777    // =========================================================================
778    // v5.6 Gibbs sampler κ (kappa) diagnostics - robust t-likelihood
779    // =========================================================================
780    /// v5.6: Posterior mean of likelihood precision κ.
781    pub kappa_mean: f64,
782
783    /// v5.6: Posterior standard deviation of κ.
784    pub kappa_sd: f64,
785
786    /// v5.6: Coefficient of variation of κ (kappa_sd / kappa_mean).
787    pub kappa_cv: f64,
788
789    /// v5.6: Effective sample size of κ chain.
790    pub kappa_ess: f64,
791
792    /// v5.6: Whether κ chain mixed well (CV ≥ 0.1 AND ESS ≥ 20).
793    pub kappa_mixing_ok: bool,
794}
795
796impl Diagnostics {
797    /// Create diagnostics indicating all checks passed.
798    ///
799    /// Uses placeholder values for numeric fields; prefer constructing
800    /// explicitly with actual measured values.
801    pub fn all_ok() -> Self {
802        Self {
803            dependence_length: 1,
804            effective_sample_size: 0,
805            stationarity_ratio: 1.0,
806            stationarity_ok: true,
807            outlier_rate_baseline: 0.0,
808            outlier_rate_sample: 0.0,
809            outlier_asymmetry_ok: true,
810            discrete_mode: false,
811            timer_resolution_ns: 1.0,
812            duplicate_fraction: 0.0,
813            preflight_ok: true,
814            calibration_samples: 0,
815            total_time_secs: 0.0,
816            warnings: Vec::new(),
817            quality_issues: Vec::new(),
818            preflight_warnings: Vec::new(),
819            seed: None,
820            attacker_model: None,
821            threshold_ns: 0.0,
822            timer_name: String::new(),
823            platform: String::new(),
824            timer_fallback_reason: None,
825            // v5.4 Gibbs sampler diagnostics
826            gibbs_iters_total: 256,
827            gibbs_burnin: 64,
828            gibbs_retained: 192,
829            lambda_mean: 1.0,
830            lambda_sd: 0.0,
831            lambda_cv: 0.0,
832            lambda_ess: 0.0,
833            lambda_mixing_ok: true,
834            // v5.6 kappa diagnostics
835            kappa_mean: 1.0,
836            kappa_sd: 0.0,
837            kappa_cv: 0.0,
838            kappa_ess: 0.0,
839            kappa_mixing_ok: true,
840        }
841    }
842
843    /// Check if all diagnostics are OK.
844    pub fn all_checks_passed(&self) -> bool {
845        self.stationarity_ok && self.outlier_asymmetry_ok && self.preflight_ok
846    }
847}
848
849impl Default for Diagnostics {
850    fn default() -> Self {
851        Self::all_ok()
852    }
853}
854
855// ============================================================================
856// QualityIssue - Specific quality problems
857// ============================================================================
858
859/// A specific quality issue detected during measurement.
860#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
861pub struct QualityIssue {
862    /// Issue code for programmatic handling.
863    pub code: IssueCode,
864
865    /// Human-readable description of the issue.
866    pub message: String,
867
868    /// Suggested actions to address the issue.
869    pub guidance: String,
870}
871
872/// Issue codes for programmatic handling of quality problems.
873///
874/// Consolidated to 8 categories per spec §6.1 (v6.0).
875#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
876pub enum IssueCode {
877    /// High temporal dependence reduces effective sample size.
878    ///
879    /// Covers: high autocorrelation, low effective sample size.
880    /// The block bootstrap accounts for this, but it means more samples
881    /// were needed to reach the same confidence level.
882    DependenceHigh,
883
884    /// Low measurement precision due to setup issues.
885    ///
886    /// Covers: small sample count for discrete mode, generator cost asymmetry,
887    /// low entropy in random inputs. These affect measurement quality but
888    /// results are still valid.
889    PrecisionLow,
890
891    /// Timer has low resolution, using discrete mode.
892    ///
893    /// The timer resolution is coarse enough that many samples have identical
894    /// values. The bootstrap handles this, but sensitivity is reduced.
895    DiscreteMode,
896
897    /// Threshold was adjusted due to measurement limitations.
898    ///
899    /// Covers: threshold elevated due to measurement floor, threshold clamped
900    /// to timer resolution. The effective threshold may differ from the
901    /// user-requested threshold.
902    ThresholdIssue,
903
904    /// Outlier filtering was applied to the data.
905    ///
906    /// Covers: high winsorization rate, quantiles filtered from analysis.
907    /// Some data points were trimmed as outliers. This is normal but
908    /// excessive filtering may indicate environmental issues.
909    FilteringApplied,
910
911    /// Stationarity of timing distribution is suspect.
912    ///
913    /// The timing distribution may have changed during measurement,
914    /// violating the i.i.d. assumption. This can occur due to CPU
915    /// frequency scaling, thermal throttling, or concurrent processes.
916    StationarityIssue,
917
918    /// Numerical issues in Gibbs sampler.
919    ///
920    /// Covers: lambda chain poor mixing, kappa chain poor mixing.
921    /// The MCMC chains showed poor convergence (CV < 0.1 or ESS < 20).
922    /// Results may be less reliable.
923    NumericalIssue,
924
925    /// Likelihood covariance was inflated for robustness.
926    ///
927    /// The robust t-likelihood inflated covariance by ~1/κ_mean to accommodate
928    /// data that doesn't match the estimated Σₙ. Effect estimates remain valid
929    /// but uncertainty was increased for robustness (kappa_mean < 0.3).
930    LikelihoodInflated,
931}
932
933// ============================================================================
934// PreflightWarning - Preflight check results
935// ============================================================================
936
937/// Severity of a preflight warning.
938///
939/// This distinction is critical for interpreting results:
940///
941/// - **Informational**: Affects sampling efficiency but not result validity.
942///   The Bayesian posterior is still trustworthy; you just needed more samples
943///   to reach the same confidence level. Examples: high autocorrelation,
944///   coarse timer resolution, suboptimal CPU governor.
945///
946/// - **ResultUndermining**: Violates statistical assumptions the Bayesian model
947///   relies on. The posterior confidence may be misplaced because the model's
948///   assumptions don't hold. Examples: non-monotonic timer (measurements are
949///   garbage), severe non-stationarity (distribution changed during measurement),
950///   broken harness with mutable state (Fixed-vs-Fixed inconsistency).
951#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
952pub enum PreflightSeverity {
953    /// Sampling efficiency issue - doesn't invalidate results.
954    ///
955    /// These warnings indicate that the measurement setup is suboptimal and
956    /// required more samples to reach a conclusion, but the Bayesian posterior
957    /// is still valid. The result can be trusted.
958    ///
959    /// Examples:
960    /// - High autocorrelation (reduces effective sample size)
961    /// - Coarse timer resolution (requires more samples)
962    /// - Suboptimal CPU governor (adds variance)
963    /// - Generator cost asymmetry (may inflate differences but doesn't invalidate)
964    Informational,
965
966    /// Statistical assumption violation - undermines result confidence.
967    ///
968    /// These warnings indicate that fundamental assumptions of the Bayesian
969    /// model may be violated. Even if the posterior appears confident, that
970    /// confidence may be misplaced.
971    ///
972    /// Examples:
973    /// - Non-monotonic timer (measurements are meaningless)
974    /// - Severe non-stationarity (distribution changed during measurement)
975    /// - Fixed-vs-Fixed inconsistency with randomization (likely mutable state bug)
976    ResultUndermining,
977}
978
979/// Category of preflight check.
980///
981/// Used for organizing warnings in output and for programmatic filtering.
982#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
983pub enum PreflightCategory {
984    /// Timer sanity checks (monotonicity, basic functionality).
985    ///
986    /// **Severity**: ResultUndermining if failed - measurements are unreliable.
987    TimerSanity,
988
989    /// Fixed-vs-Fixed internal consistency check.
990    ///
991    /// **Severity**: ResultUndermining if triggered - may indicate mutable state
992    /// captured in test closure, or severe environmental interference.
993    /// Note: May be intentional for FPR validation testing.
994    Sanity,
995
996    /// Autocorrelation in timing samples.
997    ///
998    /// **Severity**: Informational - reduces effective sample size but the
999    /// block bootstrap accounts for this.
1000    Autocorrelation,
1001
1002    /// System configuration (CPU governor, turbo boost, etc.).
1003    ///
1004    /// **Severity**: Informational - suboptimal config adds variance but
1005    /// doesn't invalidate results.
1006    System,
1007
1008    /// Timer resolution and precision.
1009    ///
1010    /// **Severity**: Informational - coarse timers require more samples but
1011    /// adaptive batching compensates for this.
1012    Resolution,
1013
1014    /// Stationarity of timing distribution.
1015    ///
1016    /// **Severity**: ResultUndermining if severely violated - indicates the
1017    /// timing distribution changed during measurement.
1018    Stationarity,
1019}
1020
1021/// Information about a preflight warning.
1022///
1023/// Preflight warnings are collected during the calibration phase and reported
1024/// to help users understand measurement quality and potential issues.
1025#[derive(Debug, Clone, Serialize, Deserialize)]
1026pub struct PreflightWarningInfo {
1027    /// Category of the check that generated this warning.
1028    pub category: PreflightCategory,
1029
1030    /// Severity of this warning.
1031    ///
1032    /// - `Informational`: Sampling efficiency issue, results still valid.
1033    /// - `ResultUndermining`: Statistical assumption violation, results may be unreliable.
1034    pub severity: PreflightSeverity,
1035
1036    /// Human-readable description of the warning.
1037    pub message: String,
1038
1039    /// Optional guidance for addressing the issue.
1040    pub guidance: Option<String>,
1041}
1042
1043impl PreflightWarningInfo {
1044    /// Create a new preflight warning.
1045    pub fn new(
1046        category: PreflightCategory,
1047        severity: PreflightSeverity,
1048        message: impl Into<String>,
1049    ) -> Self {
1050        Self {
1051            category,
1052            severity,
1053            message: message.into(),
1054            guidance: None,
1055        }
1056    }
1057
1058    /// Create a new preflight warning with guidance.
1059    pub fn with_guidance(
1060        category: PreflightCategory,
1061        severity: PreflightSeverity,
1062        message: impl Into<String>,
1063        guidance: impl Into<String>,
1064    ) -> Self {
1065        Self {
1066            category,
1067            severity,
1068            message: message.into(),
1069            guidance: Some(guidance.into()),
1070        }
1071    }
1072
1073    /// Check if this warning undermines result confidence.
1074    pub fn is_result_undermining(&self) -> bool {
1075        self.severity == PreflightSeverity::ResultUndermining
1076    }
1077}
1078
1079// ============================================================================
1080// MinDetectableEffect - Sensitivity information (spec §3.3)
1081// ============================================================================
1082
1083/// Minimum detectable effect at current noise level.
1084///
1085/// The MDE tells you the smallest effect that could be reliably detected
1086/// given the measurement noise. If MDE > threshold, a "pass" result means
1087/// insufficient sensitivity, not necessarily safety.
1088///
1089/// See spec Section 3.3 (Minimum Detectable Effect).
1090#[derive(Debug, Clone, Serialize, Deserialize)]
1091pub struct MinDetectableEffect {
1092    /// Minimum detectable effect in nanoseconds.
1093    ///
1094    /// This is the smallest timing difference that could be reliably detected
1095    /// at 50% power given the measurement noise. Computed from the covariance
1096    /// of the quantile differences.
1097    pub mde_ns: f64,
1098}
1099
1100impl MinDetectableEffect {
1101    /// Create a new MinDetectableEffect with the given value.
1102    pub fn new(mde_ns: f64) -> Self {
1103        Self { mde_ns }
1104    }
1105}
1106
1107impl Default for MinDetectableEffect {
1108    fn default() -> Self {
1109        Self {
1110            mde_ns: f64::INFINITY,
1111        }
1112    }
1113}
1114
1115// ============================================================================
1116// BatchingInfo - Metadata about batching
1117// ============================================================================
1118
1119/// Information about batching configuration used during collection.
1120#[derive(Debug, Clone, Serialize, Deserialize)]
1121pub struct BatchingInfo {
1122    /// Whether batching was enabled.
1123    pub enabled: bool,
1124
1125    /// Iterations per batch (1 if batching disabled).
1126    pub k: u32,
1127
1128    /// Effective ticks per batch measurement.
1129    pub ticks_per_batch: f64,
1130
1131    /// Explanation of why batching was enabled/disabled.
1132    pub rationale: String,
1133
1134    /// Whether the operation was too fast to measure reliably.
1135    pub unmeasurable: Option<UnmeasurableInfo>,
1136}
1137
1138/// Information about why an operation is unmeasurable.
1139#[derive(Debug, Clone, Serialize, Deserialize)]
1140pub struct UnmeasurableInfo {
1141    /// Estimated operation duration in nanoseconds.
1142    pub operation_ns: f64,
1143
1144    /// Minimum measurable threshold in nanoseconds.
1145    pub threshold_ns: f64,
1146
1147    /// Ticks per call (below MIN_TICKS_SINGLE_CALL).
1148    pub ticks_per_call: f64,
1149}
1150
1151// ============================================================================
1152// Metadata - Runtime information
1153// ============================================================================
1154
1155/// Metadata for debugging and analysis.
1156#[derive(Debug, Clone, Serialize, Deserialize)]
1157pub struct Metadata {
1158    /// Samples per class after outlier filtering.
1159    pub samples_per_class: usize,
1160
1161    /// Cycles per nanosecond (for conversion).
1162    pub cycles_per_ns: f64,
1163
1164    /// Timer type used.
1165    pub timer: String,
1166
1167    /// Timer resolution in nanoseconds.
1168    pub timer_resolution_ns: f64,
1169
1170    /// Batching configuration and rationale.
1171    pub batching: BatchingInfo,
1172
1173    /// Total runtime in seconds.
1174    pub runtime_secs: f64,
1175}
1176
1177// ============================================================================
1178// UnreliablePolicy - How to handle unreliable results
1179// ============================================================================
1180
1181/// Policy for handling unreliable measurements in test assertions.
1182#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1183pub enum UnreliablePolicy {
1184    /// Log warning and skip assertions. Test passes.
1185    /// Use when: noisy CI, parallel tests, "some coverage is better than none".
1186    #[default]
1187    FailOpen,
1188
1189    /// Panic. Test fails.
1190    /// Use when: security-critical code, dedicated quiet CI runners.
1191    FailClosed,
1192}
1193
1194impl UnreliablePolicy {
1195    /// Get policy from environment variable, or use default.
1196    ///
1197    /// Checks `TIMING_ORACLE_UNRELIABLE_POLICY`:
1198    /// - "fail_open" or "skip" → FailOpen
1199    /// - "fail_closed" or "panic" → FailClosed
1200    /// - unset or other → default
1201    #[cfg(feature = "std")]
1202    pub fn from_env_or(default: Self) -> Self {
1203        match std::env::var("TIMING_ORACLE_UNRELIABLE_POLICY")
1204            .ok()
1205            .as_deref()
1206        {
1207            Some("fail_open") | Some("skip") => Self::FailOpen,
1208            Some("fail_closed") | Some("panic") => Self::FailClosed,
1209            _ => default,
1210        }
1211    }
1212
1213    /// Get policy from environment variable, or use default.
1214    ///
1215    /// In no_std mode, always returns the default.
1216    #[cfg(not(feature = "std"))]
1217    pub fn from_env_or(default: Self) -> Self {
1218        default
1219    }
1220}
1221
1222// ============================================================================
1223// Outcome implementation
1224// ============================================================================
1225
1226impl Outcome {
1227    /// Check if the test passed (no timing leak detected).
1228    pub fn passed(&self) -> bool {
1229        matches!(self, Outcome::Pass { .. })
1230    }
1231
1232    /// Check if the test failed (timing leak detected).
1233    pub fn failed(&self) -> bool {
1234        matches!(self, Outcome::Fail { .. })
1235    }
1236
1237    /// Check if the result is conclusive (either Pass or Fail).
1238    pub fn is_conclusive(&self) -> bool {
1239        matches!(self, Outcome::Pass { .. } | Outcome::Fail { .. })
1240    }
1241
1242    /// Check if the operation was measurable.
1243    pub fn is_measurable(&self) -> bool {
1244        !matches!(self, Outcome::Unmeasurable { .. })
1245    }
1246
1247    /// Get the leak probability if available.
1248    ///
1249    /// Returns `None` for `Unmeasurable` and `Research` (research mode uses CI, not probability).
1250    pub fn leak_probability(&self) -> Option<f64> {
1251        match self {
1252            Outcome::Pass {
1253                leak_probability, ..
1254            } => Some(*leak_probability),
1255            Outcome::Fail {
1256                leak_probability, ..
1257            } => Some(*leak_probability),
1258            Outcome::Inconclusive {
1259                leak_probability, ..
1260            } => Some(*leak_probability),
1261            Outcome::Unmeasurable { .. } => None,
1262            Outcome::Research(_) => None, // Research mode uses CI-based semantics
1263        }
1264    }
1265
1266    /// Get the effect estimate if available.
1267    pub fn effect(&self) -> Option<&EffectEstimate> {
1268        match self {
1269            Outcome::Pass { effect, .. } => Some(effect),
1270            Outcome::Fail { effect, .. } => Some(effect),
1271            Outcome::Inconclusive { effect, .. } => Some(effect),
1272            Outcome::Unmeasurable { .. } => None,
1273            Outcome::Research(res) => Some(&res.effect),
1274        }
1275    }
1276
1277    /// Get the measurement quality if available.
1278    pub fn quality(&self) -> Option<MeasurementQuality> {
1279        match self {
1280            Outcome::Pass { quality, .. } => Some(*quality),
1281            Outcome::Fail { quality, .. } => Some(*quality),
1282            Outcome::Inconclusive { quality, .. } => Some(*quality),
1283            Outcome::Unmeasurable { .. } => None,
1284            Outcome::Research(res) => Some(res.quality),
1285        }
1286    }
1287
1288    /// Get the diagnostics if available.
1289    pub fn diagnostics(&self) -> Option<&Diagnostics> {
1290        match self {
1291            Outcome::Pass { diagnostics, .. } => Some(diagnostics),
1292            Outcome::Fail { diagnostics, .. } => Some(diagnostics),
1293            Outcome::Inconclusive { diagnostics, .. } => Some(diagnostics),
1294            Outcome::Unmeasurable { .. } => None,
1295            Outcome::Research(res) => Some(&res.diagnostics),
1296        }
1297    }
1298
1299    /// Get the number of samples used if available.
1300    pub fn samples_used(&self) -> Option<usize> {
1301        match self {
1302            Outcome::Pass { samples_used, .. } => Some(*samples_used),
1303            Outcome::Fail { samples_used, .. } => Some(*samples_used),
1304            Outcome::Inconclusive { samples_used, .. } => Some(*samples_used),
1305            Outcome::Unmeasurable { .. } => None,
1306            Outcome::Research(res) => Some(res.samples_used),
1307        }
1308    }
1309
1310    /// Check if the measurement is reliable enough for assertions.
1311    ///
1312    /// Returns `true` if:
1313    /// - Test is conclusive (Pass or Fail), AND
1314    /// - Quality is not TooNoisy, OR posterior is very conclusive (< 0.1 or > 0.9)
1315    ///
1316    /// The key insight: a very conclusive posterior is trustworthy even with noisy
1317    /// measurements - the signal overcame the noise.
1318    ///
1319    /// For Research mode, reliability is based on whether the CI is clearly above
1320    /// or below the measurement floor.
1321    pub fn is_reliable(&self) -> bool {
1322        match self {
1323            Outcome::Unmeasurable { .. } => false,
1324            Outcome::Inconclusive { .. } => false,
1325            Outcome::Pass {
1326                quality,
1327                leak_probability,
1328                ..
1329            } => *quality != MeasurementQuality::TooNoisy || *leak_probability < 0.01,
1330            Outcome::Fail {
1331                quality,
1332                leak_probability,
1333                ..
1334            } => *quality != MeasurementQuality::TooNoisy || *leak_probability > 0.99,
1335            Outcome::Research(res) => {
1336                // Research mode is reliable if we reached a confident conclusion
1337                matches!(
1338                    res.status,
1339                    ResearchStatus::EffectDetected | ResearchStatus::NoEffectDetected
1340                )
1341            }
1342        }
1343    }
1344
1345    /// Unwrap a Pass result, panicking otherwise.
1346    pub fn unwrap_pass(self) -> (f64, EffectEstimate, MeasurementQuality, Diagnostics) {
1347        match self {
1348            Outcome::Pass {
1349                leak_probability,
1350                effect,
1351                quality,
1352                diagnostics,
1353                ..
1354            } => (leak_probability, effect, quality, diagnostics),
1355            _ => panic!("Expected Pass outcome, got {:?}", self),
1356        }
1357    }
1358
1359    /// Unwrap a Fail result, panicking otherwise.
1360    pub fn unwrap_fail(
1361        self,
1362    ) -> (
1363        f64,
1364        EffectEstimate,
1365        Exploitability,
1366        MeasurementQuality,
1367        Diagnostics,
1368    ) {
1369        match self {
1370            Outcome::Fail {
1371                leak_probability,
1372                effect,
1373                exploitability,
1374                quality,
1375                diagnostics,
1376                ..
1377            } => (
1378                leak_probability,
1379                effect,
1380                exploitability,
1381                quality,
1382                diagnostics,
1383            ),
1384            _ => panic!("Expected Fail outcome, got {:?}", self),
1385        }
1386    }
1387
1388    /// Handle unreliable results according to policy.
1389    ///
1390    /// Returns `Some(self)` if the result is reliable.
1391    /// For unreliable results:
1392    /// - `FailOpen`: prints warning, returns `None`
1393    /// - `FailClosed`: panics
1394    ///
1395    /// # Example
1396    ///
1397    /// ```ignore
1398    /// let outcome = oracle.test(...);
1399    /// if let Some(result) = outcome.handle_unreliable("test_name", UnreliablePolicy::FailOpen) {
1400    ///     assert!(result.passed());
1401    /// }
1402    /// ```
1403    #[cfg(feature = "std")]
1404    pub fn handle_unreliable(self, test_name: &str, policy: UnreliablePolicy) -> Option<Self> {
1405        if self.is_reliable() {
1406            return Some(self);
1407        }
1408
1409        let reason = match &self {
1410            Outcome::Unmeasurable { recommendation, .. } => {
1411                format!("unmeasurable: {}", recommendation)
1412            }
1413            Outcome::Inconclusive { reason, .. } => {
1414                format!("inconclusive: {:?}", reason)
1415            }
1416            Outcome::Pass { quality, .. } | Outcome::Fail { quality, .. } => {
1417                format!("unreliable quality: {:?}", quality)
1418            }
1419            Outcome::Research(research) => {
1420                format!("research mode: {:?}", research.status)
1421            }
1422        };
1423
1424        match policy {
1425            UnreliablePolicy::FailOpen => {
1426                eprintln!("[SKIPPED] {}: {} (fail-open policy)", test_name, reason);
1427                None
1428            }
1429            UnreliablePolicy::FailClosed => {
1430                panic!("[FAILED] {}: {} (fail-closed policy)", test_name, reason);
1431            }
1432        }
1433    }
1434
1435    /// Handle unreliable results according to policy (no_std version).
1436    ///
1437    /// In no_std mode, this always panics on unreliable results with FailClosed,
1438    /// and returns None with FailOpen (no printing).
1439    #[cfg(not(feature = "std"))]
1440    pub fn handle_unreliable(self, _test_name: &str, policy: UnreliablePolicy) -> Option<Self> {
1441        if self.is_reliable() {
1442            return Some(self);
1443        }
1444
1445        match policy {
1446            UnreliablePolicy::FailOpen => None,
1447            UnreliablePolicy::FailClosed => {
1448                panic!("Unreliable result with fail-closed policy");
1449            }
1450        }
1451    }
1452}
1453
1454// ============================================================================
1455// Display implementations
1456// ============================================================================
1457
1458impl fmt::Display for Outcome {
1459    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1460        write!(f, "{}", crate::formatting::format_outcome_plain(self))
1461    }
1462}
1463
1464impl fmt::Display for Exploitability {
1465    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1466        match self {
1467            Exploitability::SharedHardwareOnly => write!(f, "shared hardware only"),
1468            Exploitability::Http2Multiplexing => write!(f, "HTTP/2 multiplexing"),
1469            Exploitability::StandardRemote => write!(f, "standard remote"),
1470            Exploitability::ObviousLeak => write!(f, "obvious leak"),
1471        }
1472    }
1473}
1474
1475impl fmt::Display for MeasurementQuality {
1476    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1477        match self {
1478            MeasurementQuality::Excellent => write!(f, "excellent"),
1479            MeasurementQuality::Good => write!(f, "good"),
1480            MeasurementQuality::Poor => write!(f, "poor"),
1481            MeasurementQuality::TooNoisy => write!(f, "too noisy"),
1482        }
1483    }
1484}
1485
1486impl fmt::Display for InconclusiveReason {
1487    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1488        match self {
1489            InconclusiveReason::DataTooNoisy { message, guidance } => {
1490                write!(f, "Data too noisy: {}\n  \u{2192} {}", message, guidance)
1491            }
1492            InconclusiveReason::NotLearning { message, guidance } => {
1493                write!(f, "Not learning: {}\n  \u{2192} {}", message, guidance)
1494            }
1495            InconclusiveReason::WouldTakeTooLong {
1496                estimated_time_secs,
1497                samples_needed,
1498                guidance,
1499            } => {
1500                write!(
1501                    f,
1502                    "Would take too long: ~{:.0}s / {} samples needed\n  \u{2192} {}",
1503                    estimated_time_secs, samples_needed, guidance
1504                )
1505            }
1506            InconclusiveReason::TimeBudgetExceeded { .. } => {
1507                write!(f, "Time budget exceeded")
1508            }
1509            InconclusiveReason::SampleBudgetExceeded { .. } => {
1510                write!(f, "Sample budget exceeded")
1511            }
1512            InconclusiveReason::ConditionsChanged { message, guidance } => {
1513                write!(
1514                    f,
1515                    "Conditions changed: {}\n  \u{2192} {}",
1516                    message, guidance
1517                )
1518            }
1519            InconclusiveReason::ThresholdElevated {
1520                theta_user,
1521                theta_eff,
1522                leak_probability_at_eff,
1523                achievable_at_max,
1524                guidance,
1525                ..
1526            } => {
1527                let achievability = if *achievable_at_max {
1528                    "achievable with more samples"
1529                } else {
1530                    "not achievable at max samples"
1531                };
1532                write!(
1533                    f,
1534                    "Threshold elevated: requested {:.1}ns, used {:.1}ns (P={:.1}% at θ_eff, {})\n  \u{2192} {}",
1535                    theta_user, theta_eff, leak_probability_at_eff * 100.0, achievability, guidance
1536                )
1537            }
1538        }
1539    }
1540}
1541
1542// ============================================================================
1543// Debug implementation for Outcome
1544// ============================================================================
1545
1546impl fmt::Debug for Outcome {
1547    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1548        write!(f, "{}", crate::formatting::format_debug_summary_plain(self))
1549    }
1550}