Skip to main content

nd_300/actions/fix/
action.rs

1//! Action type system and registry for the diagnostic-driven fix loop.
2//!
3//! Every fix primitive is wrapped as an [`Action`]. The triage layer picks
4//! actions whose `targets` intersect the current failure set; the loop runner
5//! calls `Action::apply` and records the [`ActionOutcome`].
6
7use std::time::Duration;
8
9use crate::actions::flush_dns_platform;
10use crate::config::Config;
11
12use super::adapters;
13use super::arp;
14use super::cmd::CmdOutcome;
15use super::dhcp;
16use super::dns::{self, DnsProvider};
17use super::session::{RestoreOp, RestoreRegistry};
18use super::stages;
19use super::vpn;
20
21/// Identifier for one of the user-mode core diagnostics. An [`Action`]'s
22/// `targets` field declares which of these failure categories it can address.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24pub enum DiagnosticKey {
25    Adapters,
26    Interfaces,
27    Gateway,
28    Dns,
29    PublicIp,
30    Latency,
31    Ports,
32    Speed,
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum Cost {
37    Cheap,
38    Medium,
39    Expensive,
40}
41
42impl Cost {
43    pub fn rank(self) -> u8 {
44        match self {
45            Cost::Cheap => 0,
46            Cost::Medium => 1,
47            Cost::Expensive => 2,
48        }
49    }
50}
51
52#[derive(Debug, Clone)]
53pub enum Risk {
54    Low,
55    Medium,
56    /// High-risk actions REQUIRE an attached [`RiskExplanation`] by
57    /// construction. There is no way to reach the loop's apply path on a
58    /// High-risk action without a complete plain-language explanation that the
59    /// user can read before approving.
60    High(RiskExplanation),
61}
62
63impl Risk {
64    pub fn is_high(&self) -> bool {
65        matches!(self, Risk::High(_))
66    }
67    pub fn rank(&self) -> u8 {
68        match self {
69            Risk::Low => 0,
70            Risk::Medium => 1,
71            Risk::High(_) => 2,
72        }
73    }
74}
75
76#[derive(Debug, Clone, Copy)]
77pub enum Reversibility {
78    /// Fully reversible without user action.
79    FullyReversible,
80    /// Requires a reboot to fully revert (e.g. Winsock reset).
81    RebootToFullyRevert,
82    /// Cannot be undone (e.g. delete Wi-Fi profile — passphrase is gone).
83    NotReversible,
84}
85
86impl Reversibility {
87    pub fn label(self) -> &'static str {
88        match self {
89            Reversibility::FullyReversible => "fully reversible",
90            Reversibility::RebootToFullyRevert => "requires reboot to fully revert",
91            Reversibility::NotReversible => "not reversible",
92        }
93    }
94}
95
96/// Plain-English explanation rendered before any High-risk action runs. Every
97/// field is mandatory — there is no way to construct a High-risk
98/// [`Action`] without a complete explanation.
99#[derive(Debug, Clone)]
100pub struct RiskExplanation {
101    /// One-line headline of what the action does (e.g. "Reset Windows
102    /// networking stack").
103    pub what: &'static str,
104    /// 1–3 sentence explanation of why this action exists and what kind of
105    /// problem it fixes. Written for non-technical readers.
106    pub why: &'static str,
107    /// Concrete consequences the user should expect, one per bullet.
108    pub side_effects: &'static [&'static str],
109    /// How recoverable the action is.
110    pub reversible: Reversibility,
111    /// Human-readable estimate of how long the user will be disrupted.
112    pub typical_duration: &'static str,
113}
114
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
116pub enum ActionId {
117    FlushDns,
118    SetDnsCloudflare,
119    SetDnsAutomatic,
120    FlushArp,
121    RestartNetworkServices,
122    RenewDhcp,
123    DisableConsumerVpns,
124    BounceInterface,
125    /// Platform-specific deep stack reset (Windows: Winsock+TCPIP+IPv6; macOS:
126    /// remove/recreate network service; Linux: nmcli connection
127    /// delete/recreate).
128    DeepStackReset,
129}
130
131#[derive(Debug, Clone)]
132pub struct Action {
133    pub id: ActionId,
134    /// Short label used in status lines (e.g. "Flush the DNS cache"). Should
135    /// read naturally to a non-technical user.
136    pub label: &'static str,
137    /// One-sentence "why I'm trying this" rendered before the action runs.
138    pub one_line_why: &'static str,
139    /// Failure categories this action can address. Used by triage to match
140    /// actions to current failures.
141    pub targets: &'static [DiagnosticKey],
142    pub cost: Cost,
143    pub risk: Risk,
144    /// Per-session attempt cap. Once reached, the action is removed from
145    /// future plans even if its targets keep failing — prevents the loop from
146    /// retrying a step that didn't help.
147    pub max_attempts: u8,
148    /// Wait after applying this action before the next iteration's
149    /// diagnostics run. DHCP renew and interface bounce need ~10s to settle;
150    /// a DNS flush only ~1s.
151    pub stabilization: Duration,
152}
153
154#[derive(Debug, Clone)]
155pub struct ActionOutcome {
156    pub ok: bool,
157    pub message: String,
158    /// Captured subprocess invocations from inside this action. Currently
159    /// populated only by actions that build their own commands; legacy
160    /// helpers that return `Result<String, String>` leave this empty. The
161    /// `message` field always carries a human-readable summary either way.
162    pub cmd_outcomes: Vec<CmdOutcome>,
163    /// Set by actions that change the network surface enough that the loop
164    /// should re-probe immediately rather than apply additional actions in
165    /// the same iteration (e.g. interface bounce, deep stack reset).
166    pub fatal_environment_change: bool,
167}
168
169impl ActionOutcome {
170    pub fn ok(msg: impl Into<String>) -> Self {
171        Self {
172            ok: true,
173            message: msg.into(),
174            cmd_outcomes: Vec::new(),
175            fatal_environment_change: false,
176        }
177    }
178    pub fn fail(msg: impl Into<String>) -> Self {
179        Self {
180            ok: false,
181            message: msg.into(),
182            cmd_outcomes: Vec::new(),
183            fatal_environment_change: false,
184        }
185    }
186    pub fn from_result(r: Result<String, String>) -> Self {
187        match r {
188            Ok(msg) => Self::ok(msg),
189            Err(msg) => Self::fail(msg),
190        }
191    }
192    pub fn with_fatal_env_change(mut self) -> Self {
193        self.fatal_environment_change = true;
194        self
195    }
196}
197
198impl Action {
199    /// Run this action against the current system. Always returns an
200    /// `ActionOutcome` — apply failures are encoded inside it, never
201    /// propagated as Rust errors.
202    ///
203    /// `restore` is the run's [`RestoreRegistry`]: destructive actions
204    /// (VPN disable, interface bounce, deep stack reset) register the inverse
205    /// op here *before* mutating state, so any abort (Ctrl-C / timeout / panic)
206    /// can roll the change back. Non-destructive actions (flush / DNS / DHCP /
207    /// service restart) ignore it.
208    pub async fn apply(&self, config: &Config, restore: &RestoreRegistry) -> ActionOutcome {
209        match self.id {
210            ActionId::FlushDns => apply_flush_dns().await,
211            ActionId::SetDnsCloudflare => apply_set_dns(DnsProvider::Cloudflare).await,
212            ActionId::SetDnsAutomatic => apply_set_dns(DnsProvider::Automatic).await,
213            ActionId::FlushArp => apply_flush_arp().await,
214            ActionId::RestartNetworkServices => apply_restart_services().await,
215            ActionId::RenewDhcp => apply_renew_dhcp().await,
216            ActionId::DisableConsumerVpns => apply_disable_consumer_vpns(config, restore).await,
217            ActionId::BounceInterface => apply_bounce_interface(restore).await,
218            ActionId::DeepStackReset => apply_deep_stack_reset(config, restore).await,
219        }
220    }
221}
222
223// ── Apply implementations ───────────────────────────────────────────────────
224
225async fn apply_flush_dns() -> ActionOutcome {
226    ActionOutcome::from_result(flush_dns_platform().await)
227}
228
229async fn apply_set_dns(provider: DnsProvider) -> ActionOutcome {
230    let iface = match adapters::detect_default_interface().await {
231        Some(i) => i,
232        None => return ActionOutcome::fail("Could not detect a default network interface"),
233    };
234    let service_name = service_name_for(&iface).await;
235    ActionOutcome::from_result(dns::set_dns_servers(&iface, &service_name, provider).await)
236}
237
238async fn apply_flush_arp() -> ActionOutcome {
239    ActionOutcome::from_result(arp::flush_arp().await)
240}
241
242async fn apply_restart_services() -> ActionOutcome {
243    ActionOutcome::from_result(stages::restart_services().await)
244}
245
246async fn apply_renew_dhcp() -> ActionOutcome {
247    if let Some(iface) = adapters::detect_default_interface().await {
248        ActionOutcome::from_result(adapters::renew_dhcp_on_interface(&iface).await)
249    } else {
250        ActionOutcome::from_result(dhcp::renew_dhcp().await)
251    }
252}
253
254async fn apply_disable_consumer_vpns(config: &Config, restore: &RestoreRegistry) -> ActionOutcome {
255    if !crate::actions::is_interactive(config) {
256        return ActionOutcome::fail(
257            "Skipped: disabling VPNs requires an interactive session so they can be re-enabled safely.",
258        );
259    }
260
261    let disabled = vpn::detect_and_disable(config).await;
262    if disabled.is_empty() {
263        return ActionOutcome::ok("No consumer VPNs were active");
264    }
265
266    // Register a re-enable op for every VPN we disabled BEFORE we report
267    // success. If the run is interrupted (Ctrl-C / timeout / panic) before the
268    // normal-path offer below runs, the drain re-connects each still-registered
269    // VPN so the user isn't left disconnected.
270    let mut tokens = Vec::with_capacity(disabled.len());
271    for v in &disabled {
272        let token = restore
273            .register(RestoreOp::ReEnableVpn(std::sync::Arc::new(v.clone())))
274            .await;
275        tokens.push(token);
276    }
277
278    let names: Vec<String> = disabled.iter().map(|v| v.name.clone()).collect();
279
280    // Normal path: offer to re-enable now (default No keeps them off for the
281    // re-probe). Whatever the user chooses, the offer has happened, so mark the
282    // restore ops resolved — the drain must not blindly re-enable them on a
283    // normal terminal path (that would undo a successful fix).
284    vpn::offer_reenable(&disabled, config).await;
285    for token in tokens {
286        restore.mark_resolved(token).await;
287    }
288
289    ActionOutcome::ok(format!("Disabled consumer VPNs: {}", names.join(", ")))
290        .with_fatal_env_change()
291}
292
293async fn apply_bounce_interface(restore: &RestoreRegistry) -> ActionOutcome {
294    let iface = match adapters::detect_default_interface().await {
295        Some(i) => i,
296        None => return ActionOutcome::fail("Could not detect a default network interface"),
297    };
298
299    // Register the re-enable BEFORE disabling, so an interrupt between disable
300    // and re-enable still brings the adapter back up via the drain.
301    let token = restore
302        .register(RestoreOp::ReEnableInterface {
303            iface: iface.clone(),
304        })
305        .await;
306
307    if let Err(e) = stages::disable_interface(&iface).await {
308        // Disable never happened — nothing to restore.
309        restore.mark_resolved(token).await;
310        return ActionOutcome::fail(format!("Disable {} failed: {}", iface, e));
311    }
312
313    tokio::time::sleep(Duration::from_secs(3)).await;
314
315    // Re-enable with one retry. Leaving an adapter disabled is far worse than a
316    // slow retry, so mirror the legacy 2s-wait retry that the old Stage 2 had.
317    if let Err(first_err) = stages::enable_interface(&iface).await {
318        tokio::time::sleep(Duration::from_secs(2)).await;
319        if let Err(retry_err) = stages::enable_interface(&iface).await {
320            // Still down. Leave the restore op REGISTERED so the drain retries
321            // it on the terminal path, and surface a loud, actionable message.
322            let cmd_hint = reenable_command_hint(&iface);
323            return ActionOutcome::fail(format!(
324                "Your network adapter \"{}\" is still DISABLED — re-enable failed twice ({}; retry: {}). \
325                 nd300 will try again as it exits. If you still have no connection, run: {}",
326                iface, first_err, retry_err, cmd_hint
327            ))
328            .with_fatal_env_change();
329        }
330    }
331
332    // Adapter is back up — the action restored it itself.
333    restore.mark_resolved(token).await;
334    ActionOutcome::ok(format!("{} bounced (disable → 3s wait → re-enable)", iface))
335        .with_fatal_env_change()
336}
337
338/// Platform-specific manual command to bring an interface back up, for the
339/// loud failure message when an automated re-enable fails twice.
340fn reenable_command_hint(iface: &str) -> String {
341    #[cfg(windows)]
342    {
343        format!("netsh interface set interface \"{}\" enabled", iface)
344    }
345    #[cfg(target_os = "macos")]
346    {
347        format!(
348            "networksetup -setairportpower {} on  (Wi-Fi)  or  ifconfig {} up  (wired)",
349            iface, iface
350        )
351    }
352    #[cfg(target_os = "linux")]
353    {
354        format!("sudo ip link set {} up", iface)
355    }
356}
357
358async fn apply_deep_stack_reset(config: &Config, restore: &RestoreRegistry) -> ActionOutcome {
359    let saved_ssid = super::wifi::capture_current_ssid().await;
360    match stages::platform_stage3(config, &saved_ssid, restore).await {
361        Ok(steps) => {
362            if steps.is_empty() {
363                ActionOutcome::fail("Stack reset attempted but no steps succeeded")
364                    .with_fatal_env_change()
365            } else {
366                ActionOutcome::ok(format!("Stack reset: {}", steps.join("; ")))
367                    .with_fatal_env_change()
368            }
369        }
370        Err(e) => ActionOutcome::fail(e).with_fatal_env_change(),
371    }
372}
373
374#[cfg(target_os = "macos")]
375async fn service_name_for(iface: &str) -> String {
376    // For macOS, fall back to the iface name itself if the lookup fails —
377    // networksetup will reject unknown service names cleanly.
378    if let Some(svc) = stages::detect_macos_service(iface).await {
379        svc
380    } else {
381        iface.to_string()
382    }
383}
384
385#[cfg(not(target_os = "macos"))]
386async fn service_name_for(iface: &str) -> String {
387    iface.to_string()
388}
389
390// ── Registry ────────────────────────────────────────────────────────────────
391
392/// Returns every [`Action`] available on the current platform. Order is not
393/// significant — [`super::triage::build_plan`] sorts by cost / risk /
394/// effectiveness when assembling each iteration's plan.
395pub fn all_actions() -> Vec<Action> {
396    let mut actions = vec![
397        Action {
398            id: ActionId::FlushDns,
399            label: "Flush the DNS cache",
400            one_line_why: "Clears stale DNS records that often cause resolution failures.",
401            targets: &[DiagnosticKey::Dns],
402            cost: Cost::Cheap,
403            risk: Risk::Low,
404            max_attempts: 2,
405            stabilization: Duration::from_secs(1),
406        },
407        Action {
408            id: ActionId::SetDnsCloudflare,
409            label: "Switch DNS to Cloudflare (1.1.1.1)",
410            one_line_why: "Bypasses a broken or filtered DNS server provided by your network.",
411            targets: &[DiagnosticKey::Dns],
412            cost: Cost::Cheap,
413            risk: Risk::Low,
414            max_attempts: 1,
415            stabilization: Duration::from_secs(2),
416        },
417        Action {
418            id: ActionId::SetDnsAutomatic,
419            label: "Reset DNS to your router's defaults (DHCP)",
420            one_line_why: "Removes any custom DNS servers and lets your router choose.",
421            targets: &[DiagnosticKey::Dns],
422            cost: Cost::Cheap,
423            risk: Risk::Low,
424            max_attempts: 1,
425            stabilization: Duration::from_secs(2),
426        },
427        Action {
428            id: ActionId::FlushArp,
429            label: "Flush the ARP cache",
430            one_line_why: "Clears stale gateway entries that block traffic to your router.",
431            targets: &[DiagnosticKey::Gateway, DiagnosticKey::Latency],
432            cost: Cost::Cheap,
433            risk: Risk::Low,
434            max_attempts: 1,
435            stabilization: Duration::from_secs(1),
436        },
437        Action {
438            id: ActionId::RestartNetworkServices,
439            label: "Restart networking services",
440            one_line_why: "Brings the OS-level DNS / DHCP services back to a clean state.",
441            targets: &[
442                DiagnosticKey::Dns,
443                DiagnosticKey::Gateway,
444                DiagnosticKey::PublicIp,
445            ],
446            cost: Cost::Medium,
447            risk: Risk::Low,
448            max_attempts: 1,
449            stabilization: Duration::from_secs(3),
450        },
451        Action {
452            id: ActionId::RenewDhcp,
453            label: "Renew the DHCP lease",
454            one_line_why: "Asks your router for a fresh IP address and gateway.",
455            targets: &[
456                DiagnosticKey::Gateway,
457                DiagnosticKey::PublicIp,
458                DiagnosticKey::Adapters,
459                DiagnosticKey::Interfaces,
460            ],
461            cost: Cost::Medium,
462            risk: Risk::Low,
463            max_attempts: 1,
464            stabilization: Duration::from_secs(8),
465        },
466        Action {
467            id: ActionId::DisableConsumerVpns,
468            label: "Temporarily disable consumer VPNs",
469            one_line_why: "Some consumer VPNs (NordVPN, ExpressVPN, Tailscale, etc.) interfere with diagnostics. Enterprise VPNs are never auto-disabled.",
470            targets: &[
471                DiagnosticKey::PublicIp,
472                DiagnosticKey::Latency,
473                DiagnosticKey::Dns,
474            ],
475            cost: Cost::Medium,
476            risk: Risk::Medium,
477            max_attempts: 1,
478            stabilization: Duration::from_secs(2),
479        },
480        Action {
481            id: ActionId::BounceInterface,
482            label: "Restart your network adapter (disable → re-enable)",
483            one_line_why: "Forces the adapter to reset its link, re-associate Wi-Fi, and re-DHCP.",
484            targets: &[
485                DiagnosticKey::Adapters,
486                DiagnosticKey::Interfaces,
487                DiagnosticKey::Gateway,
488                DiagnosticKey::Dns,
489                DiagnosticKey::PublicIp,
490                DiagnosticKey::Latency,
491            ],
492            cost: Cost::Expensive,
493            risk: Risk::Medium,
494            max_attempts: 1,
495            stabilization: Duration::from_secs(10),
496        },
497    ];
498
499    // Deep stack reset — High risk, requires Y/N. Wording is per-platform.
500    let deep_reset_explanation = make_deep_reset_explanation();
501    actions.push(Action {
502        id: ActionId::DeepStackReset,
503        label: deep_reset_explanation.what,
504        one_line_why: "Last-resort recovery when nothing else worked.",
505        targets: &[
506            DiagnosticKey::Dns,
507            DiagnosticKey::Gateway,
508            DiagnosticKey::PublicIp,
509            DiagnosticKey::Adapters,
510            DiagnosticKey::Interfaces,
511        ],
512        cost: Cost::Expensive,
513        risk: Risk::High(deep_reset_explanation),
514        max_attempts: 1,
515        stabilization: Duration::from_secs(15),
516    });
517
518    actions
519}
520
521#[cfg(windows)]
522fn make_deep_reset_explanation() -> RiskExplanation {
523    RiskExplanation {
524        what: "Reset Windows networking stack",
525        why: "This rebuilds Windows' TCP/IP, Winsock, and IPv6 catalogs from scratch — the standard fix when simpler steps haven't recovered the connection.",
526        side_effects: &[
527            "You will lose internet for ~10–15 seconds.",
528            "Open VPN sessions and SSH connections will drop.",
529            "A reboot is recommended afterward; nd300 will remind you at the end.",
530        ],
531        reversible: Reversibility::RebootToFullyRevert,
532        typical_duration: "10–15 seconds",
533    }
534}
535
536#[cfg(target_os = "macos")]
537fn make_deep_reset_explanation() -> RiskExplanation {
538    RiskExplanation {
539        what: "Recreate the macOS network service",
540        why: "This removes and recreates your active network service in System Settings — the standard fix when a service entry is corrupted.",
541        side_effects: &[
542            "You will lose internet for ~10–20 seconds.",
543            "Wi-Fi will need to reconnect; nd300 will try to restore it from Keychain.",
544            "nd300 snapshots and attempts to restore DNS, proxy, service order, and IP mode settings.",
545        ],
546        reversible: Reversibility::NotReversible,
547        typical_duration: "10–20 seconds",
548    }
549}
550
551#[cfg(target_os = "linux")]
552fn make_deep_reset_explanation() -> RiskExplanation {
553    RiskExplanation {
554        what: "Recreate the NetworkManager connection profile",
555        why: "This deletes and recreates the active NetworkManager connection profile — the standard fix when a profile is corrupted.",
556        side_effects: &[
557            "You will lose internet briefly.",
558            "Saved settings on the deleted profile are gone.",
559            "For Wi-Fi, you'll need to provide the SSID and passphrase again.",
560        ],
561        reversible: Reversibility::NotReversible,
562        typical_duration: "10–20 seconds",
563    }
564}
565
566#[cfg(test)]
567mod tests {
568    use crate::config::Config;
569
570    use super::*;
571
572    #[tokio::test]
573    async fn json_mode_does_not_disable_consumer_vpns() {
574        // The JSON path early-returns before touching the registry, so a fresh
575        // empty registry is sufficient and the assertion is unchanged.
576        let outcome =
577            apply_disable_consumer_vpns(&Config::new().with_json(), &RestoreRegistry::new()).await;
578
579        assert!(!outcome.ok);
580        assert!(
581            outcome.message.contains("requires an interactive session"),
582            "unexpected outcome: {:?}",
583            outcome
584        );
585    }
586}