cli/cli/commands/daemon/cmd.rs
1// SPDX-License-Identifier: Apache-2.0
2//! `heddle daemon …` CLI dispatchers.
3//!
4//! Three verbs:
5//!
6//! * `serve` — runs the foreground daemon. Linux + `--features mount`
7//! only; everywhere else it returns the standard
8//! `virtualized_unsupported_error`.
9//! * `status` — sends `health` to a running daemon and prints the
10//! reply. No-op success when the daemon isn't running, so
11//! operators can run `heddle daemon status` as a probe.
12//! * `stop` — sends `shutdown`, waits for the endpoint file to
13//! disappear *and* the daemon PID to die, then sweeps any
14//! leftover mounts as a safety net. The combined wait gives
15//! callers a hard post-condition (see `cmd_daemon_stop`).
16
17use std::time::Duration;
18
19use anyhow::{Result, anyhow};
20use repo::daemon::{
21 MountDaemonRequest, MountDaemonResponse, load_endpoint, mount_daemon_endpoint_path, pid_alive,
22};
23
24use super::client::{rpc, sweep_stale_mounts};
25use crate::cli::Cli;
26
27#[cfg(all(target_os = "linux", feature = "mount"))]
28pub fn cmd_daemon_serve(cli: &Cli) -> Result<()> {
29 let repo_root = resolve_repo_root(cli)?;
30 super::server::run_mount_daemon(&repo_root)
31}
32
33#[cfg(not(all(target_os = "linux", feature = "mount")))]
34pub fn cmd_daemon_serve(_cli: &Cli) -> Result<()> {
35 Err(
36 crate::cli::commands::mount_lifecycle::virtualized_unsupported_error()
37 .context("heddle daemon serve"),
38 )
39}
40
41pub fn cmd_daemon_status(cli: &Cli) -> Result<()> {
42 let repo_root = resolve_repo_root(cli)?;
43 let response = rpc(&repo_root, &MountDaemonRequest::Health {}, false)?;
44 match response {
45 Some(MountDaemonResponse::Health {
46 version,
47 ok,
48 uptime_s,
49 mount_count,
50 }) => {
51 println!(
52 "daemon: ok={ok} version={version} uptime_s={uptime_s} mount_count={mount_count}"
53 );
54 Ok(())
55 }
56 Some(MountDaemonResponse::Error { code, message, .. }) => {
57 Err(anyhow!("daemon health failed: [{code}] {message}"))
58 }
59 Some(other) => Err(anyhow!("unexpected daemon response: {other:?}")),
60 None => {
61 println!(
62 "daemon: not running (no live endpoint at {})",
63 mount_daemon_endpoint_path(&repo_root).display()
64 );
65 Ok(())
66 }
67 }
68}
69
70/// Post-condition contract for `cmd_daemon_stop`: when this returns
71/// `Ok(())` after a live-daemon shutdown, the caller may rely on
72/// **all four** of the following being true:
73///
74/// 1. The daemon process (whose PID was advertised in the endpoint
75/// file) has exited (`kill -0` returns `ESRCH`).
76/// 2. `<repo>/.heddle/state/heddled.endpoint.json` no longer exists.
77/// 3. `<repo>/.heddle/state/mounts.json` no longer exists. The
78/// daemon's `MountRegistry::shutdown_all` removes it before
79/// `remove_endpoint`, and the CLI-side `sweep_stale_mounts` runs
80/// as a safety-net (idempotent — both use `fs::remove_file` and
81/// swallow `NotFound`).
82/// 4. Any FUSE mountpoints the daemon owned are unmounted (best-effort
83/// via the `BackgroundSession` drop in `LiveMount::shutdown`, with
84/// `fusermount -u` as a fallback inside `sweep_stale_mounts`).
85///
86/// Two timeouts are layered to make the contract observable rather
87/// than hopeful: 2 s for the endpoint file to disappear (proof the
88/// daemon's `run_mount_daemon` reached its post-shutdown cleanup), and
89/// a further 2 s for the PID to be reaped. Either can elapse without
90/// failing the call — the safety-net sweep still runs — but together
91/// they make the integration-test assertions deterministic.
92pub fn cmd_daemon_stop(cli: &Cli) -> Result<()> {
93 let repo_root = resolve_repo_root(cli)?;
94 let endpoint_path = mount_daemon_endpoint_path(&repo_root);
95 // Capture the daemon PID *before* sending shutdown so we can
96 // probe it via `kill -0` after the endpoint file is gone. If the
97 // endpoint file has no recorded PID (v1-era files, or a future
98 // schema change) we just skip the PID wait — the endpoint-gone
99 // observation is still load-bearing.
100 let recorded_pid = load_endpoint(&endpoint_path).ok().and_then(|e| e.pid);
101 match rpc(&repo_root, &MountDaemonRequest::Shutdown {}, false)? {
102 Some(MountDaemonResponse::Shutdown { ok: true, .. }) => {}
103 Some(MountDaemonResponse::Error { code, message, .. }) => {
104 return Err(anyhow!("daemon refused shutdown: [{code}] {message}"));
105 }
106 Some(other) => return Err(anyhow!("unexpected daemon response: {other:?}")),
107 None => {
108 println!("daemon: not running");
109 return Ok(());
110 }
111 }
112 // Phase 1: wait up to 2 s for the endpoint file to disappear.
113 // The daemon's `run_mount_daemon` removes it *after*
114 // `MountRegistry::shutdown_all` (which removes `mounts.json`),
115 // so endpoint-gone implies mounts.json-gone on the daemon side.
116 for _ in 0..40 {
117 if !endpoint_path.exists() {
118 break;
119 }
120 std::thread::sleep(Duration::from_millis(50));
121 }
122 // Phase 2: wait up to a further 2 s for the daemon process
123 // itself to exit. Without this, the endpoint-gone observation
124 // races the daemon's final `info!("heddle daemon exiting")` +
125 // process teardown — a caller probing PID liveness right after
126 // `daemon stop` returns could still see the PID briefly. Polling
127 // here turns the post-condition from "shutdown is in flight"
128 // into "shutdown is complete".
129 if let Some(pid) = recorded_pid {
130 for _ in 0..40 {
131 if !pid_alive(pid) {
132 break;
133 }
134 std::thread::sleep(Duration::from_millis(50));
135 }
136 }
137 // Sweep any leftover registry entries as a last-resort safety
138 // net for crash-during-shutdown scenarios. Idempotent: in the
139 // happy path the daemon has already removed `mounts.json`, so
140 // this is a no-op.
141 sweep_stale_mounts(&repo_root);
142 println!("daemon: stopped");
143 Ok(())
144}
145
146fn resolve_repo_root(cli: &Cli) -> Result<std::path::PathBuf> {
147 if let Some(root) = cli.repo.as_ref() {
148 return Ok(root.clone());
149 }
150 let repo = repo::Repository::open(&std::env::current_dir()?)?;
151 Ok(repo.root().to_path_buf())
152}