nd300 3.4.0

Cross-platform network diagnostic tool
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
//! Diagnostic-driven fix loop driver.
//!
//! The flow:
//!
//! 1. Run baseline diagnostics.
//! 2. If everything passes, exit cleanly.
//! 3. Otherwise, in a bounded loop:
//!    a. Detect hard blocks (captive portal / ISP outage / no link / enterprise VPN) — exit cleanly with guidance.
//!    b. Compute the actionable failure set, group by root cause, and build a plan.
//!    c. Apply the plan's actions one by one, prompting Y/N for any High-risk action.
//!    d. After each action, sleep its `stabilization` window.
//!    e. Re-run diagnostics; if all pass, exit; else continue.
//! 4. Bounded by iteration count, wall clock, and per-action attempt caps.

use std::time::{Duration, Instant};

use crate::config::{Config, OutputFormat};
use crate::diagnostics;

use super::action::{self, DiagnosticKey};
use super::session::{FinalOutcome, Reporter, RestoreRegistry, Session, DEFAULT_ITERATION_DELAY};
use super::triage::{
    actionable_failures, build_plan, confirmed_failures, hard_block_detected,
    intermittent_failures, requires_confirmation, requires_high_risk_consent, HardBlock,
    MAX_ITERATIONS,
};

/// Outer timeout on the whole restore drain. The drain runs on every terminal
/// path (normal end, Ctrl-C, panic, wall-clock cap); each op is individually
/// bounded too, but this caps the aggregate so cleanup itself can never hang.
const DRAIN_CAP: Duration = Duration::from_secs(90);

/// Seam for injecting scripted diagnostics into the triage loop in tests.
/// The real implementation wraps `diagnostics::run_all` with the loop's
/// speed-skipping config.
pub(crate) trait DiagProbe {
    async fn probe(&mut self) -> diagnostics::DiagnosticResults;
}

struct RealProbe {
    config: Config,
}

impl DiagProbe for RealProbe {
    async fn probe(&mut self) -> diagnostics::DiagnosticResults {
        diagnostics::run_all(&self.config, diagnostics::run_all_cap(&self.config)).await
    }
}

/// Runs the full triage loop, populating the caller-owned `session` so the
/// report stays rich even if the run is interrupted or panics. Returns the
/// `FinalOutcome`. Destructive actions register inverse ops on `restore`; the
/// caller drains it on every terminal path.
pub async fn run(
    config: &Config,
    session: &mut Session,
    restore: &RestoreRegistry,
) -> FinalOutcome {
    // Diagnostics inside the fix loop never run the speed test: no action
    // targets Speed (pinned by `triage::tests::no_action_targets_speed`), and
    // a ~40s+ sequential speed test per pass would spend the 240s wall-clock
    // budget on re-probing instead of repairing.
    let mut probe = RealProbe {
        config: config.clone().with_skip_speed(),
    };
    run_with_probe(&mut probe, config, session, restore).await
}

async fn run_with_probe(
    probe: &mut impl DiagProbe,
    config: &Config,
    session: &mut Session,
    restore: &RestoreRegistry,
) -> FinalOutcome {
    let interactive = is_interactive(config);
    let reporter = Reporter::new(config);

    if interactive {
        reporter.header();
    }

    // Iteration 1: baseline diagnostics.
    let baseline = probe.probe().await;
    session.record_baseline(baseline.clone());

    let first_failures = actionable_failures(&baseline);

    if interactive {
        reporter.baseline_summary(first_failures.len());
    }

    let mut current = baseline;

    // Evidence gate: a failing baseline is re-confirmed with a second pass
    // before the first repair plan. Only failures present in BOTH passes are
    // actionable in iteration 1 — a transient blip self-clears here instead
    // of triggering a repair. Failures that flicker between the passes are
    // recorded as intermittent so their later natural recoveries earn no
    // effectiveness credit.
    let mut confirmed_for_iter1: Option<std::collections::HashSet<DiagnosticKey>> = None;
    if !first_failures.is_empty() {
        if interactive {
            reporter.confirmation_pass();
        }
        let second = probe.probe().await;
        let second_failures = actionable_failures(&second);
        let confirmed = confirmed_failures(&first_failures, &second_failures);
        let intermittent = intermittent_failures(&first_failures, &second_failures);
        if interactive {
            reporter.confirmation_result(confirmed.len(), intermittent.len());
        }
        session.record_confirmation(second.clone(), intermittent);
        // The freshest snapshot drives the loop (hard-block detection
        // included), so a persistent hard-block shape still short-circuits
        // and a transient one self-clears.
        current = second;
        confirmed_for_iter1 = Some(confirmed);
    }

    for iteration in 1..=MAX_ITERATIONS {
        // Wall-clock cap.
        if session.wall_clock_exhausted() {
            let remaining: Vec<DiagnosticKey> = actionable_failures(&current).into_iter().collect();
            let outcome = FinalOutcome::Timeout(remaining);
            session.final_outcome = Some(outcome.clone());
            if interactive {
                reporter.final_verdict(&outcome, None);
            }
            return outcome;
        }

        // Iteration 1 plans against the confirmed evidence set; later
        // iterations use their single re-probe as today (every failure they
        // see has already been observed in at least two runs).
        let failures = confirmed_for_iter1
            .take()
            .unwrap_or_else(|| actionable_failures(&current));
        if failures.is_empty() {
            let outcome = FinalOutcome::Fixed;
            session.final_outcome = Some(outcome.clone());
            if interactive {
                reporter.final_verdict(&outcome, None);
            }
            return outcome;
        }

        // Hard-block check — short-circuits before any action runs.
        if let Some(block) = hard_block_detected(&current) {
            let outcome = FinalOutcome::HardBlock(block);
            session.final_outcome = Some(outcome.clone());
            if interactive {
                reporter.final_verdict(&outcome, None);
            }
            return outcome;
        }

        if interactive {
            reporter.iteration_header(iteration);
        }

        let registry = action::all_actions();
        let plan = build_plan(
            &failures,
            &session.attempts,
            &session.effectiveness,
            &registry,
        );

        if plan.is_empty() {
            let remaining: Vec<DiagnosticKey> = failures.into_iter().collect();
            let outcome = FinalOutcome::Exhausted(remaining);
            session.final_outcome = Some(outcome.clone());
            if interactive {
                reporter.final_verdict(&outcome, None);
            }
            return outcome;
        }

        // Apply actions in cost-order. Fatal env changes break early so we
        // re-probe before applying further actions in the same iteration.
        let mut user_declined_confirmation = false;
        let mut skipped_for_confirmation = false;
        let mut ran_action = false;
        for action in &plan {
            if session.wall_clock_exhausted() {
                break;
            }

            // Confirmation gates. High-risk always requires explicit Y/N;
            // medium-risk and DNS-changing actions honor --yes.
            if requires_confirmation(action, config.auto_confirm_medium_risk) {
                if !interactive {
                    session.record_action(
                        iteration,
                        action,
                        super::action::ActionOutcome::fail(
                            "Skipped: requires confirmation. Re-run `nd300 fix` in a terminal or use `--yes` for medium-risk actions.",
                        ),
                        Duration::from_millis(0),
                        false,
                        true,
                    );
                    skipped_for_confirmation = true;
                    continue;
                }

                let approved = if requires_high_risk_consent(action) {
                    reporter.high_risk_prompt(action)
                } else {
                    reporter.confirmation_prompt(action)
                };

                if !approved {
                    reporter.confirmation_declined(action);
                    session.record_action(
                        iteration,
                        action,
                        super::action::ActionOutcome::fail("User declined the prompt."),
                        Duration::from_millis(0),
                        true,
                        false,
                    );
                    user_declined_confirmation = true;
                    break;
                }
            }

            if interactive {
                reporter.announce_action(action);
            }
            let started = Instant::now();
            let outcome = action.apply(config, restore).await;
            let duration = started.elapsed();
            if interactive {
                reporter.finish_action(&outcome, duration);
            }

            let fatal_env_change = outcome.fatal_environment_change;
            session.record_action(iteration, action, outcome, duration, false, false);
            ran_action = true;

            // Stabilize before either re-probing or applying the next action.
            if action.stabilization > Duration::from_millis(0) {
                tokio::time::sleep(action.stabilization).await;
            }

            if fatal_env_change {
                // Break out of the plan-loop and re-probe immediately.
                break;
            }
        }

        if user_declined_confirmation || (skipped_for_confirmation && !ran_action) {
            let remaining: Vec<DiagnosticKey> = actionable_failures(&current).into_iter().collect();
            let outcome = FinalOutcome::UserDeclined(remaining);
            session.final_outcome = Some(outcome.clone());
            if interactive {
                reporter.final_verdict(&outcome, None);
            }
            return outcome;
        }

        // Light delay between iterations to let the OS settle.
        tokio::time::sleep(DEFAULT_ITERATION_DELAY).await;

        // Re-probe.
        let prior_failures = actionable_failures(&current);
        current = probe.probe().await;
        let now_failures = actionable_failures(&current);
        session.record_iteration(iteration, current.clone());
        session.update_effectiveness(iteration, &prior_failures, &now_failures);
    }

    // Hit MAX_ITERATIONS without converging.
    let remaining_failures = actionable_failures(&current);
    let remaining: Vec<DiagnosticKey> = remaining_failures.iter().copied().collect();
    let outcome = if remaining_failures.is_empty() {
        FinalOutcome::Fixed
    } else {
        let baseline_failures = session
            .baseline
            .as_ref()
            .map(actionable_failures)
            .unwrap_or_default();
        let any_progress = baseline_failures
            .difference(&remaining_failures)
            .next()
            .is_some();
        if any_progress {
            FinalOutcome::Partial(remaining)
        } else {
            FinalOutcome::Exhausted(remaining)
        }
    };
    session.final_outcome = Some(outcome.clone());
    if interactive {
        reporter.final_verdict(&outcome, None);
    }
    outcome
}

/// True when the loop can render interactive prompts (TTY + non-JSON output).
fn is_interactive(config: &Config) -> bool {
    use std::io::IsTerminal;
    config.format != OutputFormat::Json && std::io::stdin().is_terminal()
}

/// Convenience wrapper used by `actions::fix::run`. Persists the Markdown
/// report and returns the exit code derived from the `FinalOutcome`.
///
/// This is the interrupt-safe boundary: the triage loop runs inside a
/// `tokio::select!` that races it against `Ctrl-C`, and the loop future is
/// wrapped in `catch_unwind` so a panic is caught rather than aborting the
/// process. On EVERY terminal path — normal end, user-declined, wall-clock cap,
/// Ctrl-C, or panic — the restore registry is drained so any half-applied
/// network change (a disabled adapter, a disconnected VPN, a removed macOS
/// service) is rolled back before the process exits.
pub async fn run_and_finalize(config: &Config) -> i32 {
    use futures_util::FutureExt;

    // Pre-flight: elevation
    if !crate::platform::is_elevated() {
        let outcome = FinalOutcome::PreflightFailed(
            "The fix flow requires elevated privileges. Run with sudo (Unix) or as Administrator (Windows).".to_string(),
        );
        if config.format == OutputFormat::Json {
            print_json_outcome(&Session::new(), &outcome, None, &[]);
        } else {
            let reporter = Reporter::new(config);
            reporter.final_verdict(&outcome, None);
        }
        return outcome.exit_code();
    }

    let is_json = config.format == OutputFormat::Json;
    let mut session = Session::new();
    let restore = RestoreRegistry::new();

    // Race the loop against Ctrl-C, and catch any panic from the loop so we can
    // still drain restores instead of leaving the network half-broken.
    //
    // `AssertUnwindSafe` is sound here: the registry uses a non-poisoning
    // `tokio::sync::Mutex`, and after a caught panic we only READ the partially
    // populated `Session` to build a best-effort report — we never rely on it
    // being in a logically-consistent state.
    let loop_result = {
        let fut = std::panic::AssertUnwindSafe(run(config, &mut session, &restore)).catch_unwind();
        tokio::select! {
            biased;
            _ = tokio::signal::ctrl_c() => None,
            r = fut => Some(r),
        }
    };

    // Classify the terminal path.
    //   None                -> Ctrl-C interrupted the loop.
    //   Some(Ok(outcome))   -> loop finished normally (verdict already printed).
    //   Some(Err(_panic))   -> loop panicked (caught); re-raise after cleanup.
    let (outcome, panicked) = match loop_result {
        Some(Ok(outcome)) => (outcome, false),
        Some(Err(_panic)) => (
            FinalOutcome::Interrupted(remaining_after_interrupt(&session)),
            true,
        ),
        None => {
            // Ctrl-C: print a clear interrupted line now (the loop never
            // returned, so it never printed a verdict).
            if !is_json {
                println!();
                println!("  Interrupted — cleaning up and restoring network state...");
            }
            (
                FinalOutcome::Interrupted(remaining_after_interrupt(&session)),
                false,
            )
        }
    };

    if panicked && !is_json {
        println!();
        println!(
            "  A fatal internal error occurred mid-fix — restoring network state before exiting..."
        );
    }

    // ALWAYS drain restores, regardless of how we got here. Bound the whole
    // drain so cleanup itself can never hang.
    let drain_failures = match tokio::time::timeout(DRAIN_CAP, restore.drain()).await {
        Ok(failures) => failures,
        Err(_) => vec![format!(
            "Network-state cleanup did not finish within {}s; some changes may not have been restored.",
            DRAIN_CAP.as_secs()
        )],
    };

    // For the Interrupted path, print the verdict now (after the drain attempt)
    // so the manual-recovery guidance reads in order.
    if matches!(outcome, FinalOutcome::Interrupted(_)) && !is_json {
        let reporter = Reporter::new(config);
        reporter.final_verdict(&outcome, None);
    }

    // Surface anything that couldn't be restored as explicit manual-recovery
    // guidance (non-JSON; JSON carries it in the structured object).
    if !drain_failures.is_empty() && !is_json {
        println!();
        println!(
            "  {}",
            crate::render::color::yellow("Manual recovery needed:", config)
        );
        for f in &drain_failures {
            println!("{}", crate::render::color::yellow(f, config));
        }
    }

    // Record the final outcome on the session so the report reflects it even on
    // the interrupted / panic path.
    session.final_outcome = Some(outcome.clone());

    let report_path =
        super::report::save_session_report_with_recovery(&session, &outcome, &drain_failures);

    if is_json {
        print_json_outcome(&session, &outcome, report_path.as_deref(), &drain_failures);
    } else if let Some(path) = &report_path {
        // Re-print the path under the verdict so users see where to find it.
        println!(
            "  {} {}",
            crate::render::color::dim("Saved report:", config),
            crate::render::color::dim(&path.display().to_string(), config),
        );
    }

    let code = outcome.exit_code();

    // If the loop panicked, re-raise the failure as exit 101 AFTER cleanup so
    // the operator sees the standard panic exit code, having had the network
    // restored first.
    if panicked {
        std::process::exit(101);
    }

    code
}

/// Best-effort remaining-failure set for an interrupted run: the actionable
/// failures from the most recent diagnostics snapshot, or empty if none ran.
fn remaining_after_interrupt(session: &Session) -> Vec<DiagnosticKey> {
    session
        .snapshots
        .last()
        .map(|s| actionable_failures(&s.results).into_iter().collect())
        .unwrap_or_default()
}

fn print_json_outcome(
    session: &Session,
    outcome: &FinalOutcome,
    report_path: Option<&std::path::Path>,
    recovery_needed: &[String],
) {
    use serde_json::json;

    let outcome_label = match outcome {
        FinalOutcome::Fixed => "fixed",
        FinalOutcome::Partial(_) => "partial",
        FinalOutcome::Exhausted(_) => "exhausted",
        FinalOutcome::HardBlock(_) => "hard_block",
        FinalOutcome::Timeout(_) => "timeout",
        FinalOutcome::UserDeclined(_) => "user_declined",
        FinalOutcome::PreflightFailed(_) => "preflight_failed",
        FinalOutcome::Interrupted(_) => "interrupted",
    };

    let remaining: Vec<&str> = match outcome {
        FinalOutcome::Partial(rs)
        | FinalOutcome::Exhausted(rs)
        | FinalOutcome::Timeout(rs)
        | FinalOutcome::UserDeclined(rs)
        | FinalOutcome::Interrupted(rs) => rs.iter().map(|k| diagnostic_key_str(*k)).collect(),
        _ => Vec::new(),
    };

    let actions_json: Vec<_> = session
        .action_log
        .iter()
        .map(|r| {
            json!({
                "iteration": r.iteration,
                "action": format!("{:?}", r.action_id),
                "label": r.label,
                "ok": r.outcome.ok,
                "message": r.outcome.message,
                "duration_ms": r.duration.as_millis() as u64,
                "user_declined": r.user_declined,
                "skipped_no_interaction": r.skipped_no_interaction,
            })
        })
        .collect();

    let mut intermittent: Vec<&str> = session
        .intermittent
        .iter()
        .map(|k| diagnostic_key_str(*k))
        .collect();
    intermittent.sort_unstable();

    let value = json!({
        "action": "fix",
        "outcome": outcome_label,
        "exit_code": outcome.exit_code(),
        "iterations": session.snapshots.len().saturating_sub(1),
        "remaining_failures": remaining,
        "intermittent_failures": intermittent,
        "applied_actions": actions_json,
        "elapsed_seconds": session.elapsed().as_secs(),
        "report_path": report_path.map(|p| p.display().to_string()),
        "interrupted": matches!(outcome, FinalOutcome::Interrupted(_)),
        "manual_recovery_needed": recovery_needed,
        "preflight_error": match outcome {
            FinalOutcome::PreflightFailed(s) => Some(s.clone()),
            _ => None,
        },
        "hard_block": match outcome {
            FinalOutcome::HardBlock(b) => Some(hard_block_str(b).to_string()),
            _ => None,
        },
    });

    println!(
        "{}",
        serde_json::to_string_pretty(&value).unwrap_or_else(|_| "{}".to_string())
    );
}

fn diagnostic_key_str(k: DiagnosticKey) -> &'static str {
    match k {
        DiagnosticKey::Adapters => "adapters",
        DiagnosticKey::Interfaces => "interfaces",
        DiagnosticKey::Gateway => "gateway",
        DiagnosticKey::Dns => "dns",
        DiagnosticKey::PublicIp => "public_ip",
        DiagnosticKey::Latency => "latency",
        DiagnosticKey::Ports => "ports",
        DiagnosticKey::Speed => "speed",
    }
}

fn hard_block_str(b: &HardBlock) -> &'static str {
    match b {
        HardBlock::CaptivePortal => "captive_portal",
        HardBlock::NoPhysicalLink => "no_physical_link",
        HardBlock::IspOutage => "isp_outage",
        HardBlock::EnterpriseVpnActive(_) => "enterprise_vpn_active",
    }
}

#[cfg(test)]
mod loop_tests {
    use super::*;
    use crate::diagnostics::{DiagnosticResult, DiagnosticResults};
    use std::collections::VecDeque;

    /// Scripted diagnostics: each probe pops the next pre-built result set.
    /// Panics if the loop consumes more probes than the test scripted — that
    /// panic IS an assertion on the loop's probe count.
    struct ScriptedProbe {
        script: VecDeque<DiagnosticResults>,
    }

    impl ScriptedProbe {
        fn new(script: Vec<DiagnosticResults>) -> Self {
            Self {
                script: script.into(),
            }
        }
    }

    impl DiagProbe for ScriptedProbe {
        async fn probe(&mut self) -> DiagnosticResults {
            self.script
                .pop_front()
                .expect("ScriptedProbe ran dry — the loop probed more often than the test scripted")
        }
    }

    fn all_ok() -> DiagnosticResults {
        DiagnosticResults {
            timestamp: "test".to_string(),
            adapters: DiagnosticResult::ok("Adapters", "1 active"),
            interfaces: DiagnosticResult::ok("Network", "1 up"),
            gateway: DiagnosticResult::ok("Gateway", "reachable"),
            dns: DiagnosticResult::ok("DNS", "resolving"),
            public_ip: DiagnosticResult::ok("Internet", "203.0.113.1"),
            latency: DiagnosticResult::ok("Latency", "low"),
            speed: DiagnosticResult::skip("Speed", "skipped"),
            ports: DiagnosticResult::ok("Ports", "open"),
            interface_details: None,
            adapter_details: None,
            gateway_details: None,
            dns_details: None,
            public_ip_details: None,
            latency_details: None,
            speed_details: None,
            port_details: None,
            technician: None,
            timed_out: false,
        }
    }

    fn dns_failing() -> DiagnosticResults {
        let mut r = all_ok();
        r.dns = DiagnosticResult::fail("DNS", "DNS resolution failed");
        r
    }

    /// Gateway fine, but public IP + ports dark — the ISP-outage shape that
    /// `hard_block_detected` recognizes.
    fn isp_outage() -> DiagnosticResults {
        let mut r = all_ok();
        r.public_ip = DiagnosticResult::fail("Internet", "Cannot determine public IP");
        r.ports = DiagnosticResult::fail("Ports", "All tested ports blocked");
        r
    }

    fn quiet_config() -> Config {
        // JSON format keeps the loop non-interactive regardless of the test
        // runner's TTY, so no prompts and no terminal output.
        Config::new().with_json()
    }

    /// The core evidence-quality acceptance test: a failure on the first pass
    /// that does not reproduce on the second is transient — no repair plan,
    /// outcome Fixed, exactly two probes consumed.
    #[tokio::test]
    async fn transient_blip_is_fixed_without_actions() {
        let mut probe = ScriptedProbe::new(vec![dns_failing(), all_ok()]);
        let config = quiet_config();
        let mut session = Session::new();
        let restore = RestoreRegistry::new();

        let outcome = run_with_probe(&mut probe, &config, &mut session, &restore).await;

        assert!(matches!(outcome, FinalOutcome::Fixed), "got {:?}", outcome);
        assert!(
            session.action_log.is_empty(),
            "no repair may run on unconfirmed evidence"
        );
        assert!(probe.script.is_empty(), "exactly two probes expected");
        assert!(session.baseline_confirmation.is_some());
        assert!(session.intermittent.contains(&DiagnosticKey::Dns));
    }

    /// A hard-block shape present on both passes short-circuits before any
    /// action.
    #[tokio::test]
    async fn confirmed_hard_block_short_circuits() {
        let mut probe = ScriptedProbe::new(vec![isp_outage(), isp_outage()]);
        let config = quiet_config();
        let mut session = Session::new();
        let restore = RestoreRegistry::new();

        let outcome = run_with_probe(&mut probe, &config, &mut session, &restore).await;

        assert!(
            matches!(outcome, FinalOutcome::HardBlock(HardBlock::IspOutage)),
            "got {:?}",
            outcome
        );
        assert!(session.action_log.is_empty());
    }

    /// A confirmed failure with every action's attempts pre-exhausted proves
    /// the loop plans against the CONFIRMED set (not the raw second-pass set)
    /// and reaches Exhausted without any apply IO.
    #[tokio::test]
    async fn confirmed_failure_with_no_actions_left_is_exhausted() {
        let mut probe = ScriptedProbe::new(vec![dns_failing(), dns_failing()]);
        let config = quiet_config();
        let mut session = Session::new();
        for action in action::all_actions() {
            session.attempts.insert(action.id, u8::MAX);
        }
        let restore = RestoreRegistry::new();

        let outcome = run_with_probe(&mut probe, &config, &mut session, &restore).await;

        match outcome {
            FinalOutcome::Exhausted(remaining) => {
                assert_eq!(remaining, vec![DiagnosticKey::Dns]);
            }
            other => panic!("expected Exhausted, got {:?}", other),
        }
        assert!(session.action_log.is_empty());
    }

    /// A healthy baseline ends the run after a single probe — no confirmation
    /// pass when there is nothing to confirm.
    #[tokio::test]
    async fn healthy_baseline_fixed_after_one_probe() {
        let mut probe = ScriptedProbe::new(vec![all_ok()]);
        let config = quiet_config();
        let mut session = Session::new();
        let restore = RestoreRegistry::new();

        let outcome = run_with_probe(&mut probe, &config, &mut session, &restore).await;

        assert!(matches!(outcome, FinalOutcome::Fixed));
        assert!(probe.script.is_empty(), "exactly one probe expected");
        assert!(session.baseline_confirmation.is_none());
        assert!(session.action_log.is_empty());
    }
}