Skip to main content

cli/cli/commands/daemon/
cmd.rs

1// SPDX-License-Identifier: Apache-2.0
2//! `heddle daemon …` CLI dispatchers.
3//!
4//! Three verbs:
5//!
6//! * `serve` — runs the foreground daemon. Linux + `--features mount`
7//!   only; everywhere else it returns the standard
8//!   `virtualized_unsupported_error`.
9//! * `status` — sends `health` to a running daemon and prints the
10//!   reply. No-op success when the daemon isn't running, so
11//!   operators can run `heddle daemon status` as a probe.
12//! * `stop` — sends `shutdown`, waits for the endpoint file to
13//!   disappear *and* the daemon PID to die, then sweeps any
14//!   leftover mounts as a safety net. The combined wait gives
15//!   callers a hard post-condition (see `cmd_daemon_stop`).
16
17use std::time::Duration;
18
19use anyhow::{Result, anyhow};
20use repo::daemon::{
21    MountDaemonRequest, MountDaemonResponse, load_endpoint, mount_daemon_endpoint_path, pid_alive,
22};
23
24use super::client::{rpc, sweep_stale_mounts};
25use crate::cli::Cli;
26
27#[cfg(all(target_os = "linux", feature = "mount"))]
28pub fn cmd_daemon_serve(cli: &Cli) -> Result<()> {
29    let repo_root = resolve_repo_root(cli)?;
30    super::server::run_mount_daemon(&repo_root)
31}
32
33#[cfg(not(all(target_os = "linux", feature = "mount")))]
34pub fn cmd_daemon_serve(_cli: &Cli) -> Result<()> {
35    Err(
36        crate::cli::commands::mount_lifecycle::virtualized_unsupported_error()
37            .context("heddle daemon serve"),
38    )
39}
40
41pub fn cmd_daemon_status(cli: &Cli) -> Result<()> {
42    let repo_root = resolve_repo_root(cli)?;
43    let response = rpc(&repo_root, &MountDaemonRequest::Health {}, false)?;
44    match response {
45        Some(MountDaemonResponse::Health {
46            version,
47            ok,
48            uptime_s,
49            mount_count,
50        }) => {
51            println!(
52                "daemon: ok={ok} version={version} uptime_s={uptime_s} mount_count={mount_count}"
53            );
54            Ok(())
55        }
56        Some(MountDaemonResponse::Error { code, message, .. }) => {
57            Err(anyhow!("daemon health failed: [{code}] {message}"))
58        }
59        Some(other) => Err(anyhow!("unexpected daemon response: {other:?}")),
60        None => {
61            println!(
62                "daemon: not running (no live endpoint at {})",
63                mount_daemon_endpoint_path(&repo_root).display()
64            );
65            Ok(())
66        }
67    }
68}
69
70/// Post-condition contract for `cmd_daemon_stop`: when this returns
71/// `Ok(())` after a live-daemon shutdown, the caller may rely on
72/// **all four** of the following being true:
73///
74/// 1. The daemon process (whose PID was advertised in the endpoint
75///    file) has exited (`kill -0` returns `ESRCH`).
76/// 2. `<repo>/.heddle/state/heddled.endpoint.json` no longer exists.
77/// 3. `<repo>/.heddle/state/mounts.json` no longer exists. The
78///    daemon's `MountRegistry::shutdown_all` removes it before
79///    `remove_endpoint`, and the CLI-side `sweep_stale_mounts` runs
80///    as a safety-net (idempotent — both use `fs::remove_file` and
81///    swallow `NotFound`).
82/// 4. Any FUSE mountpoints the daemon owned are unmounted (best-effort
83///    via the `BackgroundSession` drop in `LiveMount::shutdown`, with
84///    `fusermount -u` as a fallback inside `sweep_stale_mounts`).
85///
86/// Two timeouts are layered to make the contract observable rather
87/// than hopeful: 2 s for the endpoint file to disappear (proof the
88/// daemon's `run_mount_daemon` reached its post-shutdown cleanup), and
89/// a further 2 s for the PID to be reaped. Either can elapse without
90/// failing the call — the safety-net sweep still runs — but together
91/// they make the integration-test assertions deterministic.
92pub fn cmd_daemon_stop(cli: &Cli) -> Result<()> {
93    let repo_root = resolve_repo_root(cli)?;
94    let endpoint_path = mount_daemon_endpoint_path(&repo_root);
95    // Capture the daemon PID *before* sending shutdown so we can
96    // probe it via `kill -0` after the endpoint file is gone. If the
97    // endpoint file has no recorded PID (v1-era files, or a future
98    // schema change) we just skip the PID wait — the endpoint-gone
99    // observation is still load-bearing.
100    let recorded_pid = load_endpoint(&endpoint_path).ok().and_then(|e| e.pid);
101    match rpc(&repo_root, &MountDaemonRequest::Shutdown {}, false)? {
102        Some(MountDaemonResponse::Shutdown { ok: true, .. }) => {}
103        Some(MountDaemonResponse::Error { code, message, .. }) => {
104            return Err(anyhow!("daemon refused shutdown: [{code}] {message}"));
105        }
106        Some(other) => return Err(anyhow!("unexpected daemon response: {other:?}")),
107        None => {
108            println!("daemon: not running");
109            return Ok(());
110        }
111    }
112    // Phase 1: wait up to 2 s for the endpoint file to disappear.
113    // The daemon's `run_mount_daemon` removes it *after*
114    // `MountRegistry::shutdown_all` (which removes `mounts.json`),
115    // so endpoint-gone implies mounts.json-gone on the daemon side.
116    for _ in 0..40 {
117        if !endpoint_path.exists() {
118            break;
119        }
120        std::thread::sleep(Duration::from_millis(50));
121    }
122    // Phase 2: wait up to a further 2 s for the daemon process
123    // itself to exit. Without this, the endpoint-gone observation
124    // races the daemon's final `info!("heddle daemon exiting")` +
125    // process teardown — a caller probing PID liveness right after
126    // `daemon stop` returns could still see the PID briefly. Polling
127    // here turns the post-condition from "shutdown is in flight"
128    // into "shutdown is complete".
129    if let Some(pid) = recorded_pid {
130        for _ in 0..40 {
131            if !pid_alive(pid) {
132                break;
133            }
134            std::thread::sleep(Duration::from_millis(50));
135        }
136    }
137    // Sweep any leftover registry entries as a last-resort safety
138    // net for crash-during-shutdown scenarios. Idempotent: in the
139    // happy path the daemon has already removed `mounts.json`, so
140    // this is a no-op.
141    sweep_stale_mounts(&repo_root);
142    println!("daemon: stopped");
143    Ok(())
144}
145
146fn resolve_repo_root(cli: &Cli) -> Result<std::path::PathBuf> {
147    if let Some(root) = cli.repo.as_ref() {
148        return Ok(root.clone());
149    }
150    let repo = repo::Repository::open(&std::env::current_dir()?)?;
151    Ok(repo.root().to_path_buf())
152}