tacet_core/result.rs
1//! Result types for adaptive Bayesian timing analysis.
2//!
3//! See spec Section 4.1 (Result Types) for the full specification.
4
5extern crate alloc;
6
7use alloc::string::String;
8use alloc::vec::Vec;
9use core::fmt;
10use serde::{Deserialize, Serialize};
11
12// ============================================================================
13// Outcome - The top-level result type
14// ============================================================================
15
16/// Top-level outcome of a timing test.
17///
18/// The adaptive Bayesian oracle returns one of four outcomes:
19/// - `Pass`: No timing leak detected (leak_probability < pass_threshold)
20/// - `Fail`: Timing leak confirmed (leak_probability > fail_threshold)
21/// - `Inconclusive`: Cannot reach a definitive conclusion
22/// - `Unmeasurable`: Operation too fast to measure on this platform
23///
24/// See spec Section 4.1 (Result Types).
25#[derive(Clone, Serialize, Deserialize)]
26#[allow(clippy::large_enum_variant)]
27pub enum Outcome {
28 /// No timing leak detected.
29 ///
30 /// The posterior probability of a timing leak is below the pass threshold
31 /// (default 0.05), meaning we're confident there is no exploitable leak.
32 Pass {
33 /// Posterior probability of timing leak: P(effect > theta | data).
34 /// Will be < pass_threshold (default 0.05).
35 leak_probability: f64,
36
37 /// Effect size estimate (shift and tail components).
38 effect: EffectEstimate,
39
40 /// Number of samples used in the analysis.
41 samples_used: usize,
42
43 /// Measurement quality assessment.
44 quality: MeasurementQuality,
45
46 /// Diagnostic information for debugging.
47 diagnostics: Diagnostics,
48
49 /// User's requested threshold in nanoseconds.
50 theta_user: f64,
51
52 /// Effective threshold used for inference (may be elevated due to measurement floor).
53 theta_eff: f64,
54
55 /// Measurement floor at final sample count.
56 theta_floor: f64,
57 },
58
59 /// Timing leak confirmed.
60 ///
61 /// The posterior probability of a timing leak exceeds the fail threshold
62 /// (default 0.95), meaning we're confident there is an exploitable leak.
63 Fail {
64 /// Posterior probability of timing leak: P(effect > theta | data).
65 /// Will be > fail_threshold (default 0.95).
66 leak_probability: f64,
67
68 /// Effect size estimate (shift and tail components).
69 effect: EffectEstimate,
70
71 /// Exploitability assessment based on effect magnitude.
72 exploitability: Exploitability,
73
74 /// Number of samples used in the analysis.
75 samples_used: usize,
76
77 /// Measurement quality assessment.
78 quality: MeasurementQuality,
79
80 /// Diagnostic information for debugging.
81 diagnostics: Diagnostics,
82
83 /// User's requested threshold in nanoseconds.
84 theta_user: f64,
85
86 /// Effective threshold used for inference (may be elevated due to measurement floor).
87 theta_eff: f64,
88
89 /// Measurement floor at final sample count.
90 theta_floor: f64,
91 },
92
93 /// Cannot reach a definitive conclusion.
94 ///
95 /// The posterior probability is between pass_threshold and fail_threshold,
96 /// or the analysis hit a limit (timeout, sample budget, noise).
97 Inconclusive {
98 /// Reason why the result is inconclusive.
99 reason: InconclusiveReason,
100
101 /// Current posterior probability of timing leak.
102 leak_probability: f64,
103
104 /// Effect size estimate (may have wide credible intervals).
105 effect: EffectEstimate,
106
107 /// Number of samples used in the analysis.
108 samples_used: usize,
109
110 /// Measurement quality assessment.
111 quality: MeasurementQuality,
112
113 /// Diagnostic information for debugging.
114 diagnostics: Diagnostics,
115
116 /// User's requested threshold in nanoseconds.
117 theta_user: f64,
118
119 /// Effective threshold used for inference (may be elevated due to measurement floor).
120 theta_eff: f64,
121
122 /// Measurement floor at final sample count.
123 theta_floor: f64,
124 },
125
126 /// Operation too fast to measure reliably on this platform.
127 ///
128 /// The operation completes faster than the timer's resolution allows
129 /// for meaningful measurement, even with adaptive batching.
130 Unmeasurable {
131 /// Estimated operation duration in nanoseconds.
132 operation_ns: f64,
133
134 /// Minimum measurable duration on this platform.
135 threshold_ns: f64,
136
137 /// Platform description (e.g., "Apple Silicon (cntvct)").
138 platform: String,
139
140 /// Suggested actions to make the operation measurable.
141 recommendation: String,
142 },
143
144 /// Research mode result.
145 ///
146 /// Returned when using `AttackerModel::Research`. Unlike Pass/Fail/Inconclusive
147 /// which make threshold-based decisions, research mode characterizes the
148 /// timing behavior relative to the measurement floor using CI-based semantics.
149 ///
150 /// See `ResearchOutcome` for details on the stopping conditions.
151 Research(ResearchOutcome),
152}
153
154// ============================================================================
155// InconclusiveReason - Why we couldn't reach a conclusion
156// ============================================================================
157
158/// Reason why a timing test result is inconclusive.
159///
160/// See spec Section 4.1 (Result Types).
161#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
162pub enum InconclusiveReason {
163 /// Data is too noisy to reach a conclusion.
164 ///
165 /// The measurement noise is high enough that we cannot distinguish
166 /// between "no leak" and "small leak" with the available samples.
167 DataTooNoisy {
168 /// Human-readable explanation.
169 message: String,
170 /// Suggested actions to improve measurement quality.
171 guidance: String,
172 },
173
174 /// Posterior is not converging toward either threshold.
175 ///
176 /// After collecting samples, the leak probability remains in the
177 /// inconclusive range and isn't trending toward pass or fail.
178 NotLearning {
179 /// Human-readable explanation.
180 message: String,
181 /// Suggested actions.
182 guidance: String,
183 },
184
185 /// Reaching a conclusion would take too long.
186 ///
187 /// Based on current convergence rate, reaching the pass or fail
188 /// threshold would exceed the configured time budget.
189 WouldTakeTooLong {
190 /// Estimated time in seconds to reach a conclusion.
191 estimated_time_secs: f64,
192 /// Estimated samples needed to reach a conclusion.
193 samples_needed: usize,
194 /// Suggested actions.
195 guidance: String,
196 },
197
198 /// Time budget exhausted.
199 ///
200 /// The configured time limit was reached before the posterior
201 /// converged to a conclusive result.
202 TimeBudgetExceeded {
203 /// Posterior probability when budget was exhausted.
204 current_probability: f64,
205 /// Number of samples collected.
206 samples_collected: usize,
207 },
208
209 /// Sample budget exhausted.
210 ///
211 /// The maximum number of samples was collected without reaching
212 /// a conclusive result.
213 SampleBudgetExceeded {
214 /// Posterior probability when budget was exhausted.
215 current_probability: f64,
216 /// Number of samples collected.
217 samples_collected: usize,
218 },
219
220 /// Measurement conditions changed during the test.
221 ///
222 /// Detected by comparing calibration statistics with post-test statistics.
223 /// This can indicate environmental interference (CPU frequency scaling,
224 /// concurrent processes, etc.) that invalidates the covariance estimate.
225 /// See spec Section 2.6, Gate 6.
226 ConditionsChanged {
227 /// Human-readable explanation.
228 message: String,
229 /// Suggested actions.
230 guidance: String,
231 },
232
233 /// Threshold was elevated and pass criterion was met at effective threshold.
234 ///
235 /// The measurement floor exceeded the user's requested threshold, so inference
236 /// was performed at an elevated effective threshold. The posterior probability
237 /// dropped below pass_threshold at θ_eff, but since θ_eff > θ_user + ε, we
238 /// cannot guarantee the user's original requirement is met.
239 ///
240 /// This is NOT a quality gate failure - it's a semantic constraint: Pass requires
241 /// both P < pass_threshold AND θ_eff ≤ θ_user + ε.
242 ///
243 /// See spec Section 3.5.3 (v5.5 Threshold Elevation Decision Rule).
244 ThresholdElevated {
245 /// User's requested threshold in nanoseconds (θ_user).
246 theta_user: f64,
247 /// Effective threshold used for inference (θ_eff = max(θ_user, θ_floor)).
248 theta_eff: f64,
249 /// Posterior probability at θ_eff (was < pass_threshold).
250 leak_probability_at_eff: f64,
251 /// True: P(leak > θ_eff) < pass_threshold (pass criterion met at elevated threshold).
252 meets_pass_criterion_at_eff: bool,
253 /// True: θ_floor at max_samples would be ≤ θ_user + ε (more samples could achieve user threshold).
254 achievable_at_max: bool,
255 /// Human-readable explanation.
256 message: String,
257 /// Suggested actions.
258 guidance: String,
259 },
260}
261
262// ============================================================================
263// EffectEstimate - Decomposed timing effect
264// ============================================================================
265
266/// Estimated timing effect decomposed into shift and tail components.
267///
268/// The effect is decomposed using a 2-component linear model:
269/// - **Shift**: Uniform timing difference across all quantiles (e.g., different code path)
270/// - **Tail**: Upper quantiles shift more than lower (e.g., cache misses)
271///
272/// See spec Section 2.5 (Bayesian Inference).
273#[derive(Debug, Clone, Serialize, Deserialize)]
274pub struct EffectEstimate {
275 /// Uniform shift in nanoseconds.
276 ///
277 /// Positive value means the sample class is slower (timing leak detected).
278 /// Negative value means the sample class is faster (no leak, or unusual).
279 /// This captures effects like branch timing where all operations
280 /// take a fixed additional time.
281 pub shift_ns: f64,
282
283 /// Tail effect in nanoseconds.
284 ///
285 /// Positive value means the sample class has a heavier upper tail.
286 /// This captures effects like cache misses that occur probabilistically.
287 pub tail_ns: f64,
288
289 /// 95% credible interval for the total effect magnitude in nanoseconds.
290 ///
291 /// This is a Bayesian credible interval, not a frequentist confidence interval.
292 /// There is a 95% posterior probability that the true effect lies within this range.
293 pub credible_interval_ns: (f64, f64),
294
295 /// Classification of the dominant effect pattern.
296 pub pattern: EffectPattern,
297
298 /// When Some, the (μ, τ) decomposition may be unreliable.
299 ///
300 /// This is set when model fit is poor (Q > q_thresh), indicating that the
301 /// observed quantile pattern is not well-explained by the shift+tail basis.
302 /// The shift_ns and tail_ns values should be interpreted with caution.
303 #[serde(skip_serializing_if = "Option::is_none")]
304 pub interpretation_caveat: Option<String>,
305}
306
307impl EffectEstimate {
308 /// Compute the total effect magnitude (L2 norm of shift and tail).
309 #[cfg(feature = "std")]
310 pub fn total_effect_ns(&self) -> f64 {
311 (self.shift_ns.powi(2) + self.tail_ns.powi(2)).sqrt()
312 }
313
314 /// Compute the total effect magnitude (L2 norm of shift and tail).
315 #[cfg(not(feature = "std"))]
316 pub fn total_effect_ns(&self) -> f64 {
317 libm::sqrt(self.shift_ns * self.shift_ns + self.tail_ns * self.tail_ns)
318 }
319
320 /// Check if the effect is negligible (both components near zero).
321 pub fn is_negligible(&self, threshold_ns: f64) -> bool {
322 self.shift_ns.abs() < threshold_ns && self.tail_ns.abs() < threshold_ns
323 }
324}
325
326impl Default for EffectEstimate {
327 fn default() -> Self {
328 Self {
329 shift_ns: 0.0,
330 tail_ns: 0.0,
331 credible_interval_ns: (0.0, 0.0),
332 pattern: EffectPattern::Indeterminate,
333 interpretation_caveat: None,
334 }
335 }
336}
337
338// ============================================================================
339// EffectPattern - Classification of timing effect type
340// ============================================================================
341
342/// Pattern of timing difference.
343///
344/// Classifies the dominant type of timing difference based on the
345/// relative magnitudes of shift and tail components.
346#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
347pub enum EffectPattern {
348 /// Uniform shift across all quantiles.
349 ///
350 /// All quantiles shift by approximately the same amount.
351 /// Typical cause: branch on secret data, different code path.
352 UniformShift,
353
354 /// Primarily affects upper tail.
355 ///
356 /// Upper quantiles (e.g., 80th, 90th percentile) shift more than
357 /// lower quantiles. Typical cause: cache misses, memory access patterns.
358 TailEffect,
359
360 /// Mixed pattern with both shift and tail components.
361 ///
362 /// Both uniform shift and tail effect are significant.
363 Mixed,
364
365 /// Neither shift nor tail is statistically significant.
366 ///
367 /// The effect magnitude is below the detection threshold or
368 /// uncertainty is too high to classify.
369 #[default]
370 Indeterminate,
371}
372
373// ============================================================================
374// Exploitability - Risk assessment
375// ============================================================================
376
377/// Exploitability assessment based on effect magnitude.
378///
379/// Based on Crosby et al. (2009) thresholds for timing attack feasibility.
380/// These thresholds are heuristics based on academic research for risk
381/// prioritization, not guarantees. The thresholds reflect modern attack
382/// techniques including HTTP/2 multiplexing (Timeless Timing Attacks) and
383/// shared-hardware attacks (KyberSlash, Flush+Reload).
384///
385/// See spec Section 5.4 (Exploitability).
386#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
387pub enum Exploitability {
388 /// Effect < 10 ns: Requires shared hardware to exploit.
389 ///
390 /// Only exploitable by attackers with physical co-location: SGX enclaves,
391 /// hyperthreading on same core, containers on same host, or cross-VM on
392 /// shared cache. Remote exploitation is impractical.
393 ///
394 /// References: KyberSlash (2024), Flush+Reload, Prime+Probe literature
395 SharedHardwareOnly,
396
397 /// 10-100 ns: Exploitable via HTTP/2 request multiplexing.
398 ///
399 /// Requires ~100k concurrent HTTP/2 requests to exploit. The "Timeless
400 /// Timing Attacks" technique eliminates network jitter by sending requests
401 /// that arrive simultaneously, making response order reveal timing differences.
402 ///
403 /// Reference: Van Goethem et al., "Timeless Timing Attacks" (USENIX Security 2020)
404 Http2Multiplexing,
405
406 /// 100 ns - 10 μs: Exploitable with standard remote timing.
407 ///
408 /// Requires ~1k-10k requests using traditional timing techniques.
409 /// Exploitable on LAN with any protocol, or over internet with HTTP/2.
410 ///
411 /// References: Crosby et al. (2009), Brumley & Boneh (2005)
412 StandardRemote,
413
414 /// > 10 μs: Obvious timing leak, trivially exploitable.
415 ///
416 /// Detectable with < 100 requests. Exploitable over the internet even
417 /// with high-jitter connections using traditional timing techniques.
418 ObviousLeak,
419}
420
421impl Exploitability {
422 /// Determine exploitability from effect size in nanoseconds.
423 ///
424 /// Thresholds are based on:
425 /// - < 10 ns: Below HTTP/2 timing precision, requires shared hardware
426 /// - 10-100 ns: Within HTTP/2 "Timeless Timing Attacks" range
427 /// - 100 ns - 10 μs: Standard remote timing attack range
428 /// - > 10 μs: Trivially observable
429 pub fn from_effect_ns(effect_ns: f64) -> Self {
430 let effect_ns = effect_ns.abs();
431 if effect_ns < 10.0 {
432 Exploitability::SharedHardwareOnly
433 } else if effect_ns < 100.0 {
434 Exploitability::Http2Multiplexing
435 } else if effect_ns < 10_000.0 {
436 Exploitability::StandardRemote
437 } else {
438 Exploitability::ObviousLeak
439 }
440 }
441}
442
443// ============================================================================
444// MeasurementQuality - Assessment of measurement reliability
445// ============================================================================
446
447/// Measurement quality assessment based on noise level.
448///
449/// Quality is determined primarily by the minimum detectable effect (MDE)
450/// relative to the configured threshold.
451///
452/// See spec Section 5.5 (Quality Assessment).
453#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
454pub enum MeasurementQuality {
455 /// Low noise, high confidence (MDE < 5 ns).
456 Excellent,
457
458 /// Normal noise levels (MDE 5-20 ns).
459 Good,
460
461 /// High noise, results less reliable (MDE 20-100 ns).
462 Poor,
463
464 /// Cannot produce meaningful results (MDE > 100 ns).
465 TooNoisy,
466}
467
468impl MeasurementQuality {
469 /// Determine quality from minimum detectable effect.
470 ///
471 /// Invalid MDE values (less than or equal to 0 or non-finite) indicate a measurement problem
472 /// and are classified as `TooNoisy`.
473 ///
474 /// Very small MDE (< 0.01 ns) also indicates timer resolution issues
475 /// where most samples have identical values.
476 pub fn from_mde_ns(mde_ns: f64) -> Self {
477 // Invalid MDE indicates measurement failure
478 if mde_ns <= 0.01 || !mde_ns.is_finite() {
479 return MeasurementQuality::TooNoisy;
480 }
481
482 if mde_ns < 5.0 {
483 MeasurementQuality::Excellent
484 } else if mde_ns < 20.0 {
485 MeasurementQuality::Good
486 } else if mde_ns < 100.0 {
487 MeasurementQuality::Poor
488 } else {
489 MeasurementQuality::TooNoisy
490 }
491 }
492}
493
494// ============================================================================
495// ResearchOutcome - Result type for research mode
496// ============================================================================
497
498/// Status of a research mode run.
499///
500/// Research mode (AttackerModel::Research) doesn't make Pass/Fail decisions.
501/// Instead, it characterizes the timing behavior with respect to the measurement floor.
502#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
503pub enum ResearchStatus {
504 /// CI clearly above θ_floor — timing difference detected.
505 ///
506 /// The 95% credible interval lower bound is clearly above the measurement
507 /// floor (> 1.1 × θ_floor), indicating a confidently detectable effect.
508 EffectDetected,
509
510 /// CI clearly below θ_floor — no timing difference above noise.
511 ///
512 /// The 95% credible interval upper bound is clearly below the measurement
513 /// floor (< 0.9 × θ_floor), indicating no detectable effect.
514 NoEffectDetected,
515
516 /// Hit timer resolution limit; θ_floor is as good as it gets.
517 ///
518 /// Further sampling won't improve the measurement floor because we've
519 /// hit the fundamental timer tick resolution.
520 ResolutionLimitReached,
521
522 /// Data quality issue detected.
523 ///
524 /// A quality gate triggered during research mode. Unlike standard mode,
525 /// this doesn't block the result but is reported for transparency.
526 QualityIssue(InconclusiveReason),
527
528 /// Ran out of time/samples before reaching conclusion.
529 ///
530 /// The budget was exhausted before the CI could confidently settle
531 /// above or below the measurement floor.
532 BudgetExhausted,
533}
534
535/// Research mode outcome (spec v4.1 research mode).
536///
537/// This struct is returned when using `AttackerModel::Research`. Unlike the
538/// standard `Outcome` which makes Pass/Fail decisions, research mode characterizes
539/// the timing behavior relative to the measurement floor.
540///
541/// Key differences from standard mode:
542/// - No Pass/Fail verdict (no threshold comparison)
543/// - Reports measurement floor (`theta_floor`) at final sample size
544/// - `detectable` field indicates if CI lower bound > floor
545/// - `model_mismatch` is non-blocking (tracked but doesn't stop analysis)
546#[derive(Debug, Clone, Serialize, Deserialize)]
547pub struct ResearchOutcome {
548 /// Research outcome status.
549 pub status: ResearchStatus,
550
551 /// Maximum effect across quantiles: max_k |(Xβ)_k| in nanoseconds.
552 /// This is the posterior mean of the maximum absolute predicted effect.
553 pub max_effect_ns: f64,
554
555 /// 95% credible interval for maximum effect: (2.5th, 97.5th percentile).
556 pub max_effect_ci: (f64, f64),
557
558 /// Measurement floor at final sample size.
559 /// This is the minimum detectable effect given measurement noise.
560 pub theta_floor: f64,
561
562 /// True if the effect is detectable: CI lower bound > theta_floor.
563 pub detectable: bool,
564
565 /// True if model mismatch was detected (Q > q_thresh).
566 /// In research mode, this is non-blocking but adds a caveat to interpretation.
567 pub model_mismatch: bool,
568
569 /// Effect size estimate with decomposition.
570 /// If `model_mismatch` is true, `interpretation_caveat` will be set.
571 pub effect: EffectEstimate,
572
573 /// Number of samples used.
574 pub samples_used: usize,
575
576 /// Measurement quality assessment.
577 pub quality: MeasurementQuality,
578
579 /// Diagnostic information.
580 pub diagnostics: Diagnostics,
581}
582
583impl ResearchOutcome {
584 /// Check if a timing effect was confidently detected.
585 pub fn is_effect_detected(&self) -> bool {
586 matches!(self.status, ResearchStatus::EffectDetected)
587 }
588
589 /// Check if no effect was confidently detected.
590 pub fn is_no_effect_detected(&self) -> bool {
591 matches!(self.status, ResearchStatus::NoEffectDetected)
592 }
593
594 /// Check if the resolution limit was reached.
595 pub fn is_resolution_limit_reached(&self) -> bool {
596 matches!(self.status, ResearchStatus::ResolutionLimitReached)
597 }
598
599 /// Check if there was a quality issue.
600 pub fn has_quality_issue(&self) -> bool {
601 matches!(self.status, ResearchStatus::QualityIssue(_))
602 }
603
604 /// Get the effect estimate.
605 pub fn effect(&self) -> &EffectEstimate {
606 &self.effect
607 }
608
609 /// Get the measurement quality.
610 pub fn quality(&self) -> MeasurementQuality {
611 self.quality
612 }
613
614 /// Get the diagnostics.
615 pub fn diagnostics(&self) -> &Diagnostics {
616 &self.diagnostics
617 }
618}
619
620impl fmt::Display for ResearchStatus {
621 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
622 match self {
623 ResearchStatus::EffectDetected => write!(f, "effect detected"),
624 ResearchStatus::NoEffectDetected => write!(f, "no effect detected"),
625 ResearchStatus::ResolutionLimitReached => write!(f, "resolution limit reached"),
626 ResearchStatus::QualityIssue(reason) => write!(f, "quality issue: {}", reason),
627 ResearchStatus::BudgetExhausted => write!(f, "budget exhausted"),
628 }
629 }
630}
631
632impl fmt::Display for ResearchOutcome {
633 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
634 writeln!(f, "Research Mode: {}", self.status)?;
635 writeln!(
636 f,
637 " Max effect: {:.2}ns (CI: {:.2}-{:.2}ns)",
638 self.max_effect_ns, self.max_effect_ci.0, self.max_effect_ci.1
639 )?;
640 writeln!(f, " Measurement floor: {:.2}ns", self.theta_floor)?;
641 writeln!(
642 f,
643 " Detectable: {}",
644 if self.detectable { "yes" } else { "no" }
645 )?;
646 if self.model_mismatch {
647 writeln!(f, " Warning: model mismatch detected")?;
648 }
649 writeln!(f, " Samples: {}", self.samples_used)?;
650 writeln!(f, " Quality: {}", self.quality)?;
651 Ok(())
652 }
653}
654
655// ============================================================================
656// TopQuantile - Information about significant quantiles
657// ============================================================================
658
659/// Information about a significant quantile (for projection mismatch reporting).
660///
661/// When the 2D (shift, tail) projection doesn't fit the data well, this struct
662/// provides information about which individual quantiles are driving the leak
663/// detection. This helps diagnose effects that don't fit the shift+tail model
664/// (e.g., effects concentrated at a single quantile).
665///
666/// See spec Section 7.5 (Per-Quantile Exceedance).
667#[derive(Debug, Clone, Serialize, Deserialize)]
668pub struct TopQuantile {
669 /// Quantile probability (e.g., 0.9 for 90th percentile).
670 pub quantile_p: f64,
671
672 /// Posterior mean δ_k in nanoseconds.
673 pub mean_ns: f64,
674
675 /// 95% marginal credible interval (lower, upper) in nanoseconds.
676 pub ci95_ns: (f64, f64),
677
678 /// P(|δ_k| > θ_eff | Δ) - per-quantile exceedance probability.
679 ///
680 /// This is the probability that this individual quantile's effect
681 /// exceeds the threshold, computed from the marginal posterior.
682 pub exceed_prob: f64,
683}
684
685impl TopQuantile {
686 /// Create a new TopQuantile entry.
687 pub fn new(quantile_p: f64, mean_ns: f64, ci95_ns: (f64, f64), exceed_prob: f64) -> Self {
688 Self {
689 quantile_p,
690 mean_ns,
691 ci95_ns,
692 exceed_prob,
693 }
694 }
695}
696
697// ============================================================================
698// Diagnostics - Detailed diagnostic information
699// ============================================================================
700
701/// Diagnostic information for debugging and analysis.
702///
703/// See spec Section 4.1 (Result Types).
704#[derive(Debug, Clone, Serialize, Deserialize)]
705pub struct Diagnostics {
706 /// Block size used for bootstrap (Politis-White automatic selection).
707 pub dependence_length: usize,
708
709 /// Effective sample size accounting for autocorrelation (ESS approx n / dependence_length).
710 pub effective_sample_size: usize,
711
712 /// Non-stationarity: ratio of inference to calibration variance.
713 /// Values 0.5-2.0 are normal; >5.0 indicates non-stationarity.
714 pub stationarity_ratio: f64,
715
716 /// True if stationarity ratio is within acceptable bounds (0.5-2.0).
717 pub stationarity_ok: bool,
718
719 /// Projection mismatch Q statistic.
720 ///
721 /// Measures how well the 2D (shift, tail) model fits the 9D quantile differences.
722 /// A high value indicates the effect is concentrated at specific quantiles
723 /// rather than following the shift+tail pattern.
724 pub projection_mismatch_q: f64,
725
726 /// Bootstrap-calibrated threshold for projection mismatch Q statistic.
727 /// Q > threshold indicates the 2D projection may be unreliable.
728 pub projection_mismatch_threshold: f64,
729
730 /// True if projection fits well (Q <= threshold).
731 ///
732 /// When false, the shift_ns and tail_ns estimates should be interpreted
733 /// with caution; use top_quantiles for more detailed information.
734 pub projection_mismatch_ok: bool,
735
736 /// Top quantiles by exceedance probability (when projection mismatch detected).
737 ///
738 /// When projection_mismatch_ok is false, this field contains detailed
739 /// information about which quantiles drive the leak detection. This helps
740 /// diagnose effects that don't fit the shift+tail model (e.g., effects
741 /// concentrated at a single quantile).
742 #[serde(skip_serializing_if = "Option::is_none")]
743 pub top_quantiles: Option<Vec<TopQuantile>>,
744
745 /// Outlier rate for baseline class (fraction trimmed).
746 pub outlier_rate_baseline: f64,
747
748 /// Outlier rate for sample class (fraction trimmed).
749 pub outlier_rate_sample: f64,
750
751 /// True if outlier rates are symmetric (both <1%, ratio <3x, diff <2%).
752 pub outlier_asymmetry_ok: bool,
753
754 /// Whether discrete timer mode was used (low timer resolution).
755 pub discrete_mode: bool,
756
757 /// Timer resolution in nanoseconds.
758 pub timer_resolution_ns: f64,
759
760 /// Fraction of samples with duplicate timing values (0.0-1.0).
761 pub duplicate_fraction: f64,
762
763 /// True if preflight checks passed (sanity, generator, system).
764 pub preflight_ok: bool,
765
766 /// Number of samples used for calibration (covariance estimation).
767 pub calibration_samples: usize,
768
769 /// Total time spent on the analysis in seconds.
770 pub total_time_secs: f64,
771
772 /// Human-readable warnings (empty if all checks pass).
773 pub warnings: Vec<String>,
774
775 /// Quality issues detected during measurement.
776 pub quality_issues: Vec<QualityIssue>,
777
778 /// Preflight warnings from calibration phase.
779 ///
780 /// These warnings are categorized by severity:
781 /// - `Informational`: Sampling efficiency issues (results still valid)
782 /// - `ResultUndermining`: Statistical assumption violations (results may be unreliable)
783 pub preflight_warnings: Vec<PreflightWarningInfo>,
784
785 // =========================================================================
786 // Reproduction info (for verbose/debug output)
787 // =========================================================================
788 /// Measurement seed used for reproducibility.
789 pub seed: Option<u64>,
790
791 /// Attacker model name (e.g., "AdjacentNetwork", "SharedHardware").
792 pub attacker_model: Option<String>,
793
794 /// Effect threshold (theta) in nanoseconds.
795 pub threshold_ns: f64,
796
797 /// Timer implementation name (e.g., "rdtsc", "cntvct_el0", "kperf").
798 pub timer_name: String,
799
800 /// Platform description (e.g., "macos-aarch64").
801 pub platform: String,
802
803 /// Reason the timer fell back from high-precision PMU (if applicable).
804 ///
805 /// Used to generate context-aware recommendations in output.
806 /// - "concurrent access": kperf locked by another process
807 /// - "no sudo": not running with elevated privileges
808 /// - "unavailable": PMU init failed despite privileges
809 /// - None: using high-precision timer or x86_64 (rdtsc is already ~0.3ns)
810 #[serde(skip_serializing_if = "Option::is_none")]
811 pub timer_fallback_reason: Option<String>,
812
813 // =========================================================================
814 // v5.4 Gibbs sampler diagnostics
815 // =========================================================================
816 /// v5.4: Total number of Gibbs iterations.
817 pub gibbs_iters_total: usize,
818
819 /// v5.4: Number of burn-in iterations.
820 pub gibbs_burnin: usize,
821
822 /// v5.4: Number of retained samples.
823 pub gibbs_retained: usize,
824
825 /// v5.4: Posterior mean of latent scale λ.
826 pub lambda_mean: f64,
827
828 /// v5.4: Posterior standard deviation of λ.
829 pub lambda_sd: f64,
830
831 /// v5.4: Coefficient of variation of λ (λ_sd / λ_mean).
832 pub lambda_cv: f64,
833
834 /// v5.4: Effective sample size of λ chain.
835 pub lambda_ess: f64,
836
837 /// v5.4: Whether λ chain mixed well (CV ≥ 0.1 AND ESS ≥ 20).
838 pub lambda_mixing_ok: bool,
839
840 // =========================================================================
841 // v5.6 Gibbs sampler κ (kappa) diagnostics - robust t-likelihood
842 // =========================================================================
843 /// v5.6: Posterior mean of likelihood precision κ.
844 pub kappa_mean: f64,
845
846 /// v5.6: Posterior standard deviation of κ.
847 pub kappa_sd: f64,
848
849 /// v5.6: Coefficient of variation of κ (kappa_sd / kappa_mean).
850 pub kappa_cv: f64,
851
852 /// v5.6: Effective sample size of κ chain.
853 pub kappa_ess: f64,
854
855 /// v5.6: Whether κ chain mixed well (CV ≥ 0.1 AND ESS ≥ 20).
856 pub kappa_mixing_ok: bool,
857}
858
859impl Diagnostics {
860 /// Create diagnostics indicating all checks passed.
861 ///
862 /// Uses placeholder values for numeric fields; prefer constructing
863 /// explicitly with actual measured values.
864 pub fn all_ok() -> Self {
865 Self {
866 dependence_length: 1,
867 effective_sample_size: 0,
868 stationarity_ratio: 1.0,
869 stationarity_ok: true,
870 projection_mismatch_q: 0.0,
871 projection_mismatch_threshold: 18.48, // chi-squared(7, 0.99) as default
872 projection_mismatch_ok: true,
873 top_quantiles: None,
874 outlier_rate_baseline: 0.0,
875 outlier_rate_sample: 0.0,
876 outlier_asymmetry_ok: true,
877 discrete_mode: false,
878 timer_resolution_ns: 1.0,
879 duplicate_fraction: 0.0,
880 preflight_ok: true,
881 calibration_samples: 0,
882 total_time_secs: 0.0,
883 warnings: Vec::new(),
884 quality_issues: Vec::new(),
885 preflight_warnings: Vec::new(),
886 seed: None,
887 attacker_model: None,
888 threshold_ns: 0.0,
889 timer_name: String::new(),
890 platform: String::new(),
891 timer_fallback_reason: None,
892 // v5.4 Gibbs sampler diagnostics
893 gibbs_iters_total: 256,
894 gibbs_burnin: 64,
895 gibbs_retained: 192,
896 lambda_mean: 1.0,
897 lambda_sd: 0.0,
898 lambda_cv: 0.0,
899 lambda_ess: 0.0,
900 lambda_mixing_ok: true,
901 // v5.6 kappa diagnostics
902 kappa_mean: 1.0,
903 kappa_sd: 0.0,
904 kappa_cv: 0.0,
905 kappa_ess: 0.0,
906 kappa_mixing_ok: true,
907 }
908 }
909
910 /// Check if all diagnostics are OK.
911 pub fn all_checks_passed(&self) -> bool {
912 self.stationarity_ok
913 && self.projection_mismatch_ok
914 && self.outlier_asymmetry_ok
915 && self.preflight_ok
916 }
917}
918
919impl Default for Diagnostics {
920 fn default() -> Self {
921 Self::all_ok()
922 }
923}
924
925// ============================================================================
926// QualityIssue - Specific quality problems
927// ============================================================================
928
929/// A specific quality issue detected during measurement.
930#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
931pub struct QualityIssue {
932 /// Issue code for programmatic handling.
933 pub code: IssueCode,
934
935 /// Human-readable description of the issue.
936 pub message: String,
937
938 /// Suggested actions to address the issue.
939 pub guidance: String,
940}
941
942/// Issue codes for programmatic handling of quality problems.
943#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
944pub enum IssueCode {
945 /// High autocorrelation reduces effective sample size.
946 HighDependence,
947
948 /// Effective sample size is too low for reliable inference.
949 LowEffectiveSamples,
950
951 /// Timing distribution appears to drift during measurement.
952 StationaritySuspect,
953
954 /// Timer has low resolution, using discrete mode.
955 DiscreteTimer,
956
957 /// Sample count is small for discrete mode bootstrap.
958 SmallSampleDiscrete,
959
960 /// Generator cost differs between classes.
961 HighGeneratorCost,
962
963 /// Low entropy in random inputs (possible API misuse).
964 LowUniqueInputs,
965
966 /// Some quantiles were filtered from analysis.
967 QuantilesFiltered,
968
969 /// Threshold was clamped to timer resolution.
970 ThresholdClamped,
971
972 /// High fraction of samples were winsorized.
973 HighWinsorRate,
974
975 /// User's threshold was elevated due to measurement floor.
976 ThresholdElevated,
977
978 /// Slab component dominates posterior (v5.2 mixture prior).
979 ///
980 /// The wide "slab" prior component has higher posterior weight than the
981 /// narrow component, indicating evidence strongly favors a large effect
982 /// (well above the threshold). This is informational, not a problem.
983 ///
984 /// DEPRECATED: v5.4 uses Student's t prior with Gibbs sampling instead
985 /// of mixture prior. This code is kept for backwards compatibility.
986 SlabDominant,
987
988 /// v5.4: Gibbs sampler's lambda chain did not mix well.
989 ///
990 /// The latent scale variable λ showed poor mixing (CV < 0.1 or ESS < 20),
991 /// indicating the posterior may be unreliable. This typically occurs with
992 /// very small or very large effects where the posterior is concentrated.
993 LambdaMixingPoor,
994
995 /// v5.6: Gibbs sampler's kappa chain did not mix well.
996 ///
997 /// The likelihood precision variable κ showed poor mixing (CV < 0.1 or ESS < 20),
998 /// indicating the posterior may be unreliable.
999 KappaMixingPoor,
1000
1001 /// v5.6: Likelihood covariance was inflated (kappa_mean < 0.3).
1002 ///
1003 /// The robust t-likelihood inflated covariance by ~1/κ_mean to accommodate
1004 /// data that doesn't match the estimated Σₙ. Effect estimates remain valid
1005 /// but uncertainty was increased for robustness.
1006 LikelihoodInflated,
1007}
1008
1009// ============================================================================
1010// PreflightWarning - Preflight check results
1011// ============================================================================
1012
1013/// Severity of a preflight warning.
1014///
1015/// This distinction is critical for interpreting results:
1016///
1017/// - **Informational**: Affects sampling efficiency but not result validity.
1018/// The Bayesian posterior is still trustworthy; you just needed more samples
1019/// to reach the same confidence level. Examples: high autocorrelation,
1020/// coarse timer resolution, suboptimal CPU governor.
1021///
1022/// - **ResultUndermining**: Violates statistical assumptions the Bayesian model
1023/// relies on. The posterior confidence may be misplaced because the model's
1024/// assumptions don't hold. Examples: non-monotonic timer (measurements are
1025/// garbage), severe non-stationarity (distribution changed during measurement),
1026/// broken harness with mutable state (Fixed-vs-Fixed inconsistency).
1027#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
1028pub enum PreflightSeverity {
1029 /// Sampling efficiency issue - doesn't invalidate results.
1030 ///
1031 /// These warnings indicate that the measurement setup is suboptimal and
1032 /// required more samples to reach a conclusion, but the Bayesian posterior
1033 /// is still valid. The result can be trusted.
1034 ///
1035 /// Examples:
1036 /// - High autocorrelation (reduces effective sample size)
1037 /// - Coarse timer resolution (requires more samples)
1038 /// - Suboptimal CPU governor (adds variance)
1039 /// - Generator cost asymmetry (may inflate differences but doesn't invalidate)
1040 Informational,
1041
1042 /// Statistical assumption violation - undermines result confidence.
1043 ///
1044 /// These warnings indicate that fundamental assumptions of the Bayesian
1045 /// model may be violated. Even if the posterior appears confident, that
1046 /// confidence may be misplaced.
1047 ///
1048 /// Examples:
1049 /// - Non-monotonic timer (measurements are meaningless)
1050 /// - Severe non-stationarity (distribution changed during measurement)
1051 /// - Fixed-vs-Fixed inconsistency with randomization (likely mutable state bug)
1052 ResultUndermining,
1053}
1054
1055/// Category of preflight check.
1056///
1057/// Used for organizing warnings in output and for programmatic filtering.
1058#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
1059pub enum PreflightCategory {
1060 /// Timer sanity checks (monotonicity, basic functionality).
1061 ///
1062 /// **Severity**: ResultUndermining if failed - measurements are unreliable.
1063 TimerSanity,
1064
1065 /// Fixed-vs-Fixed internal consistency check.
1066 ///
1067 /// **Severity**: ResultUndermining if triggered - may indicate mutable state
1068 /// captured in test closure, or severe environmental interference.
1069 /// Note: May be intentional for FPR validation testing.
1070 Sanity,
1071
1072 /// Autocorrelation in timing samples.
1073 ///
1074 /// **Severity**: Informational - reduces effective sample size but the
1075 /// block bootstrap accounts for this.
1076 Autocorrelation,
1077
1078 /// System configuration (CPU governor, turbo boost, etc.).
1079 ///
1080 /// **Severity**: Informational - suboptimal config adds variance but
1081 /// doesn't invalidate results.
1082 System,
1083
1084 /// Timer resolution and precision.
1085 ///
1086 /// **Severity**: Informational - coarse timers require more samples but
1087 /// adaptive batching compensates for this.
1088 Resolution,
1089
1090 /// Stationarity of timing distribution.
1091 ///
1092 /// **Severity**: ResultUndermining if severely violated - indicates the
1093 /// timing distribution changed during measurement.
1094 Stationarity,
1095}
1096
1097/// Information about a preflight warning.
1098///
1099/// Preflight warnings are collected during the calibration phase and reported
1100/// to help users understand measurement quality and potential issues.
1101#[derive(Debug, Clone, Serialize, Deserialize)]
1102pub struct PreflightWarningInfo {
1103 /// Category of the check that generated this warning.
1104 pub category: PreflightCategory,
1105
1106 /// Severity of this warning.
1107 ///
1108 /// - `Informational`: Sampling efficiency issue, results still valid.
1109 /// - `ResultUndermining`: Statistical assumption violation, results may be unreliable.
1110 pub severity: PreflightSeverity,
1111
1112 /// Human-readable description of the warning.
1113 pub message: String,
1114
1115 /// Optional guidance for addressing the issue.
1116 pub guidance: Option<String>,
1117}
1118
1119impl PreflightWarningInfo {
1120 /// Create a new preflight warning.
1121 pub fn new(
1122 category: PreflightCategory,
1123 severity: PreflightSeverity,
1124 message: impl Into<String>,
1125 ) -> Self {
1126 Self {
1127 category,
1128 severity,
1129 message: message.into(),
1130 guidance: None,
1131 }
1132 }
1133
1134 /// Create a new preflight warning with guidance.
1135 pub fn with_guidance(
1136 category: PreflightCategory,
1137 severity: PreflightSeverity,
1138 message: impl Into<String>,
1139 guidance: impl Into<String>,
1140 ) -> Self {
1141 Self {
1142 category,
1143 severity,
1144 message: message.into(),
1145 guidance: Some(guidance.into()),
1146 }
1147 }
1148
1149 /// Check if this warning undermines result confidence.
1150 pub fn is_result_undermining(&self) -> bool {
1151 self.severity == PreflightSeverity::ResultUndermining
1152 }
1153}
1154
1155// ============================================================================
1156// MinDetectableEffect - Sensitivity information
1157// ============================================================================
1158
1159/// Minimum detectable effect at current noise level.
1160///
1161/// The MDE tells you the smallest effect that could be reliably detected
1162/// given the measurement noise. If MDE > threshold, a "pass" result means
1163/// insufficient sensitivity, not necessarily safety.
1164///
1165/// See spec Section 2.7 (Minimum Detectable Effect).
1166#[derive(Debug, Clone, Serialize, Deserialize)]
1167pub struct MinDetectableEffect {
1168 /// Minimum detectable uniform shift in nanoseconds.
1169 pub shift_ns: f64,
1170
1171 /// Minimum detectable tail effect in nanoseconds.
1172 pub tail_ns: f64,
1173}
1174
1175impl Default for MinDetectableEffect {
1176 fn default() -> Self {
1177 Self {
1178 shift_ns: f64::INFINITY,
1179 tail_ns: f64::INFINITY,
1180 }
1181 }
1182}
1183
1184// ============================================================================
1185// BatchingInfo - Metadata about batching
1186// ============================================================================
1187
1188/// Information about batching configuration used during collection.
1189#[derive(Debug, Clone, Serialize, Deserialize)]
1190pub struct BatchingInfo {
1191 /// Whether batching was enabled.
1192 pub enabled: bool,
1193
1194 /// Iterations per batch (1 if batching disabled).
1195 pub k: u32,
1196
1197 /// Effective ticks per batch measurement.
1198 pub ticks_per_batch: f64,
1199
1200 /// Explanation of why batching was enabled/disabled.
1201 pub rationale: String,
1202
1203 /// Whether the operation was too fast to measure reliably.
1204 pub unmeasurable: Option<UnmeasurableInfo>,
1205}
1206
1207/// Information about why an operation is unmeasurable.
1208#[derive(Debug, Clone, Serialize, Deserialize)]
1209pub struct UnmeasurableInfo {
1210 /// Estimated operation duration in nanoseconds.
1211 pub operation_ns: f64,
1212
1213 /// Minimum measurable threshold in nanoseconds.
1214 pub threshold_ns: f64,
1215
1216 /// Ticks per call (below MIN_TICKS_SINGLE_CALL).
1217 pub ticks_per_call: f64,
1218}
1219
1220// ============================================================================
1221// Metadata - Runtime information
1222// ============================================================================
1223
1224/// Metadata for debugging and analysis.
1225#[derive(Debug, Clone, Serialize, Deserialize)]
1226pub struct Metadata {
1227 /// Samples per class after outlier filtering.
1228 pub samples_per_class: usize,
1229
1230 /// Cycles per nanosecond (for conversion).
1231 pub cycles_per_ns: f64,
1232
1233 /// Timer type used.
1234 pub timer: String,
1235
1236 /// Timer resolution in nanoseconds.
1237 pub timer_resolution_ns: f64,
1238
1239 /// Batching configuration and rationale.
1240 pub batching: BatchingInfo,
1241
1242 /// Total runtime in seconds.
1243 pub runtime_secs: f64,
1244}
1245
1246// ============================================================================
1247// UnreliablePolicy - How to handle unreliable results
1248// ============================================================================
1249
1250/// Policy for handling unreliable measurements in test assertions.
1251#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1252pub enum UnreliablePolicy {
1253 /// Log warning and skip assertions. Test passes.
1254 /// Use when: noisy CI, parallel tests, "some coverage is better than none".
1255 #[default]
1256 FailOpen,
1257
1258 /// Panic. Test fails.
1259 /// Use when: security-critical code, dedicated quiet CI runners.
1260 FailClosed,
1261}
1262
1263impl UnreliablePolicy {
1264 /// Get policy from environment variable, or use default.
1265 ///
1266 /// Checks `TIMING_ORACLE_UNRELIABLE_POLICY`:
1267 /// - "fail_open" or "skip" → FailOpen
1268 /// - "fail_closed" or "panic" → FailClosed
1269 /// - unset or other → default
1270 #[cfg(feature = "std")]
1271 pub fn from_env_or(default: Self) -> Self {
1272 match std::env::var("TIMING_ORACLE_UNRELIABLE_POLICY")
1273 .ok()
1274 .as_deref()
1275 {
1276 Some("fail_open") | Some("skip") => Self::FailOpen,
1277 Some("fail_closed") | Some("panic") => Self::FailClosed,
1278 _ => default,
1279 }
1280 }
1281
1282 /// Get policy from environment variable, or use default.
1283 ///
1284 /// In no_std mode, always returns the default.
1285 #[cfg(not(feature = "std"))]
1286 pub fn from_env_or(default: Self) -> Self {
1287 default
1288 }
1289}
1290
1291// ============================================================================
1292// Outcome implementation
1293// ============================================================================
1294
1295impl Outcome {
1296 /// Check if the test passed (no timing leak detected).
1297 pub fn passed(&self) -> bool {
1298 matches!(self, Outcome::Pass { .. })
1299 }
1300
1301 /// Check if the test failed (timing leak detected).
1302 pub fn failed(&self) -> bool {
1303 matches!(self, Outcome::Fail { .. })
1304 }
1305
1306 /// Check if the result is conclusive (either Pass or Fail).
1307 pub fn is_conclusive(&self) -> bool {
1308 matches!(self, Outcome::Pass { .. } | Outcome::Fail { .. })
1309 }
1310
1311 /// Check if the operation was measurable.
1312 pub fn is_measurable(&self) -> bool {
1313 !matches!(self, Outcome::Unmeasurable { .. })
1314 }
1315
1316 /// Get the leak probability if available.
1317 ///
1318 /// Returns `None` for `Unmeasurable` and `Research` (research mode uses CI, not probability).
1319 pub fn leak_probability(&self) -> Option<f64> {
1320 match self {
1321 Outcome::Pass {
1322 leak_probability, ..
1323 } => Some(*leak_probability),
1324 Outcome::Fail {
1325 leak_probability, ..
1326 } => Some(*leak_probability),
1327 Outcome::Inconclusive {
1328 leak_probability, ..
1329 } => Some(*leak_probability),
1330 Outcome::Unmeasurable { .. } => None,
1331 Outcome::Research(_) => None, // Research mode uses CI-based semantics
1332 }
1333 }
1334
1335 /// Get the effect estimate if available.
1336 pub fn effect(&self) -> Option<&EffectEstimate> {
1337 match self {
1338 Outcome::Pass { effect, .. } => Some(effect),
1339 Outcome::Fail { effect, .. } => Some(effect),
1340 Outcome::Inconclusive { effect, .. } => Some(effect),
1341 Outcome::Unmeasurable { .. } => None,
1342 Outcome::Research(res) => Some(&res.effect),
1343 }
1344 }
1345
1346 /// Get the measurement quality if available.
1347 pub fn quality(&self) -> Option<MeasurementQuality> {
1348 match self {
1349 Outcome::Pass { quality, .. } => Some(*quality),
1350 Outcome::Fail { quality, .. } => Some(*quality),
1351 Outcome::Inconclusive { quality, .. } => Some(*quality),
1352 Outcome::Unmeasurable { .. } => None,
1353 Outcome::Research(res) => Some(res.quality),
1354 }
1355 }
1356
1357 /// Get the diagnostics if available.
1358 pub fn diagnostics(&self) -> Option<&Diagnostics> {
1359 match self {
1360 Outcome::Pass { diagnostics, .. } => Some(diagnostics),
1361 Outcome::Fail { diagnostics, .. } => Some(diagnostics),
1362 Outcome::Inconclusive { diagnostics, .. } => Some(diagnostics),
1363 Outcome::Unmeasurable { .. } => None,
1364 Outcome::Research(res) => Some(&res.diagnostics),
1365 }
1366 }
1367
1368 /// Get the number of samples used if available.
1369 pub fn samples_used(&self) -> Option<usize> {
1370 match self {
1371 Outcome::Pass { samples_used, .. } => Some(*samples_used),
1372 Outcome::Fail { samples_used, .. } => Some(*samples_used),
1373 Outcome::Inconclusive { samples_used, .. } => Some(*samples_used),
1374 Outcome::Unmeasurable { .. } => None,
1375 Outcome::Research(res) => Some(res.samples_used),
1376 }
1377 }
1378
1379 /// Check if the measurement is reliable enough for assertions.
1380 ///
1381 /// Returns `true` if:
1382 /// - Test is conclusive (Pass or Fail), AND
1383 /// - Quality is not TooNoisy, OR posterior is very conclusive (< 0.1 or > 0.9)
1384 ///
1385 /// The key insight: a very conclusive posterior is trustworthy even with noisy
1386 /// measurements - the signal overcame the noise.
1387 ///
1388 /// For Research mode, reliability is based on whether the CI is clearly above
1389 /// or below the measurement floor.
1390 pub fn is_reliable(&self) -> bool {
1391 match self {
1392 Outcome::Unmeasurable { .. } => false,
1393 Outcome::Inconclusive { .. } => false,
1394 Outcome::Pass {
1395 quality,
1396 leak_probability,
1397 ..
1398 } => *quality != MeasurementQuality::TooNoisy || *leak_probability < 0.01,
1399 Outcome::Fail {
1400 quality,
1401 leak_probability,
1402 ..
1403 } => *quality != MeasurementQuality::TooNoisy || *leak_probability > 0.99,
1404 Outcome::Research(res) => {
1405 // Research mode is reliable if we reached a confident conclusion
1406 matches!(
1407 res.status,
1408 ResearchStatus::EffectDetected | ResearchStatus::NoEffectDetected
1409 )
1410 }
1411 }
1412 }
1413
1414 /// Unwrap a Pass result, panicking otherwise.
1415 pub fn unwrap_pass(self) -> (f64, EffectEstimate, MeasurementQuality, Diagnostics) {
1416 match self {
1417 Outcome::Pass {
1418 leak_probability,
1419 effect,
1420 quality,
1421 diagnostics,
1422 ..
1423 } => (leak_probability, effect, quality, diagnostics),
1424 _ => panic!("Expected Pass outcome, got {:?}", self),
1425 }
1426 }
1427
1428 /// Unwrap a Fail result, panicking otherwise.
1429 pub fn unwrap_fail(
1430 self,
1431 ) -> (
1432 f64,
1433 EffectEstimate,
1434 Exploitability,
1435 MeasurementQuality,
1436 Diagnostics,
1437 ) {
1438 match self {
1439 Outcome::Fail {
1440 leak_probability,
1441 effect,
1442 exploitability,
1443 quality,
1444 diagnostics,
1445 ..
1446 } => (
1447 leak_probability,
1448 effect,
1449 exploitability,
1450 quality,
1451 diagnostics,
1452 ),
1453 _ => panic!("Expected Fail outcome, got {:?}", self),
1454 }
1455 }
1456
1457 /// Handle unreliable results according to policy.
1458 ///
1459 /// Returns `Some(self)` if the result is reliable.
1460 /// For unreliable results:
1461 /// - `FailOpen`: prints warning, returns `None`
1462 /// - `FailClosed`: panics
1463 ///
1464 /// # Example
1465 ///
1466 /// ```ignore
1467 /// let outcome = oracle.test(...);
1468 /// if let Some(result) = outcome.handle_unreliable("test_name", UnreliablePolicy::FailOpen) {
1469 /// assert!(result.passed());
1470 /// }
1471 /// ```
1472 #[cfg(feature = "std")]
1473 pub fn handle_unreliable(self, test_name: &str, policy: UnreliablePolicy) -> Option<Self> {
1474 if self.is_reliable() {
1475 return Some(self);
1476 }
1477
1478 let reason = match &self {
1479 Outcome::Unmeasurable { recommendation, .. } => {
1480 format!("unmeasurable: {}", recommendation)
1481 }
1482 Outcome::Inconclusive { reason, .. } => {
1483 format!("inconclusive: {:?}", reason)
1484 }
1485 Outcome::Pass { quality, .. } | Outcome::Fail { quality, .. } => {
1486 format!("unreliable quality: {:?}", quality)
1487 }
1488 Outcome::Research(research) => {
1489 format!("research mode: {:?}", research.status)
1490 }
1491 };
1492
1493 match policy {
1494 UnreliablePolicy::FailOpen => {
1495 eprintln!("[SKIPPED] {}: {} (fail-open policy)", test_name, reason);
1496 None
1497 }
1498 UnreliablePolicy::FailClosed => {
1499 panic!("[FAILED] {}: {} (fail-closed policy)", test_name, reason);
1500 }
1501 }
1502 }
1503
1504 /// Handle unreliable results according to policy (no_std version).
1505 ///
1506 /// In no_std mode, this always panics on unreliable results with FailClosed,
1507 /// and returns None with FailOpen (no printing).
1508 #[cfg(not(feature = "std"))]
1509 pub fn handle_unreliable(self, _test_name: &str, policy: UnreliablePolicy) -> Option<Self> {
1510 if self.is_reliable() {
1511 return Some(self);
1512 }
1513
1514 match policy {
1515 UnreliablePolicy::FailOpen => None,
1516 UnreliablePolicy::FailClosed => {
1517 panic!("Unreliable result with fail-closed policy");
1518 }
1519 }
1520 }
1521}
1522
1523// ============================================================================
1524// Display implementations
1525// ============================================================================
1526
1527impl fmt::Display for Outcome {
1528 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1529 write!(f, "{}", crate::formatting::format_outcome_plain(self))
1530 }
1531}
1532
1533impl fmt::Display for EffectPattern {
1534 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1535 match self {
1536 EffectPattern::UniformShift => write!(f, "uniform shift"),
1537 EffectPattern::TailEffect => write!(f, "tail effect"),
1538 EffectPattern::Mixed => write!(f, "mixed"),
1539 EffectPattern::Indeterminate => write!(f, "indeterminate"),
1540 }
1541 }
1542}
1543
1544impl fmt::Display for Exploitability {
1545 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1546 match self {
1547 Exploitability::SharedHardwareOnly => write!(f, "shared hardware only"),
1548 Exploitability::Http2Multiplexing => write!(f, "HTTP/2 multiplexing"),
1549 Exploitability::StandardRemote => write!(f, "standard remote"),
1550 Exploitability::ObviousLeak => write!(f, "obvious leak"),
1551 }
1552 }
1553}
1554
1555impl fmt::Display for MeasurementQuality {
1556 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1557 match self {
1558 MeasurementQuality::Excellent => write!(f, "excellent"),
1559 MeasurementQuality::Good => write!(f, "good"),
1560 MeasurementQuality::Poor => write!(f, "poor"),
1561 MeasurementQuality::TooNoisy => write!(f, "too noisy"),
1562 }
1563 }
1564}
1565
1566impl fmt::Display for InconclusiveReason {
1567 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1568 match self {
1569 InconclusiveReason::DataTooNoisy { message, guidance } => {
1570 write!(f, "Data too noisy: {}\n \u{2192} {}", message, guidance)
1571 }
1572 InconclusiveReason::NotLearning { message, guidance } => {
1573 write!(f, "Not learning: {}\n \u{2192} {}", message, guidance)
1574 }
1575 InconclusiveReason::WouldTakeTooLong {
1576 estimated_time_secs,
1577 samples_needed,
1578 guidance,
1579 } => {
1580 write!(
1581 f,
1582 "Would take too long: ~{:.0}s / {} samples needed\n \u{2192} {}",
1583 estimated_time_secs, samples_needed, guidance
1584 )
1585 }
1586 InconclusiveReason::TimeBudgetExceeded { .. } => {
1587 write!(f, "Time budget exceeded")
1588 }
1589 InconclusiveReason::SampleBudgetExceeded { .. } => {
1590 write!(f, "Sample budget exceeded")
1591 }
1592 InconclusiveReason::ConditionsChanged { message, guidance } => {
1593 write!(
1594 f,
1595 "Conditions changed: {}\n \u{2192} {}",
1596 message, guidance
1597 )
1598 }
1599 InconclusiveReason::ThresholdElevated {
1600 theta_user,
1601 theta_eff,
1602 leak_probability_at_eff,
1603 achievable_at_max,
1604 guidance,
1605 ..
1606 } => {
1607 let achievability = if *achievable_at_max {
1608 "achievable with more samples"
1609 } else {
1610 "not achievable at max samples"
1611 };
1612 write!(
1613 f,
1614 "Threshold elevated: requested {:.1}ns, used {:.1}ns (P={:.1}% at θ_eff, {})\n \u{2192} {}",
1615 theta_user, theta_eff, leak_probability_at_eff * 100.0, achievability, guidance
1616 )
1617 }
1618 }
1619 }
1620}
1621
1622// ============================================================================
1623// Debug implementation for Outcome
1624// ============================================================================
1625
1626impl fmt::Debug for Outcome {
1627 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1628 write!(f, "{}", crate::formatting::format_debug_summary_plain(self))
1629 }
1630}