Skip to main content

openlatch_client/cli/commands/
lifecycle.rs

1/// Daemon lifecycle commands: start, stop, restart.
2///
3/// Provides OS-aware process management:
4/// - Unix: `process_group(0)` for clean background spawning
5/// - Windows: `CREATE_NO_WINDOW` to suppress console windows
6///
7/// All path references use `config::openlatch_dir()` per PLAT-02.
8use std::process::Stdio;
9
10use crate::cli::output::OutputConfig;
11use crate::cli::StartArgs;
12use crate::config;
13use crate::error::{OlError, ERR_DAEMON_START_FAILED, ERR_INVALID_CONFIG};
14
15/// Run the `openlatch start` command.
16///
17/// Starts the daemon in the background, or in foreground if `--foreground` is set.
18/// Idempotent: if the daemon is already running, exits 0 with a message.
19///
20/// # Errors
21///
22/// Returns an error if the daemon fails to spawn.
23pub fn run_start(args: &StartArgs, output: &OutputConfig) -> Result<(), OlError> {
24    let cfg = config::Config::load(args.port, None, false)?;
25
26    // Idempotency: check if daemon is already running
27    if let Some(pid) = read_pid_file() {
28        if is_process_alive(pid) {
29            output.print_info(&format!("Daemon is already running (PID {pid})"));
30            return Ok(());
31        }
32    }
33
34    let token = load_or_generate_token()?;
35
36    if args.foreground {
37        run_daemon_foreground(cfg.port, &token)?;
38    } else {
39        let pid = spawn_daemon_background(cfg.port, &token)?;
40        if !wait_for_health(cfg.port, 5) {
41            return Err(OlError::new(
42                ERR_DAEMON_START_FAILED,
43                format!("Daemon spawned (PID {pid}) but health check failed within 5s"),
44            )
45            .with_suggestion("Check ~/.openlatch/logs/daemon.log for errors.")
46            .with_docs("https://docs.openlatch.ai/errors/OL-1502"));
47        }
48        output.print_step(&format!("Daemon started on port {} (PID {pid})", cfg.port));
49    }
50
51    Ok(())
52}
53
54/// Run the `openlatch stop` command.
55///
56/// Sends a graceful shutdown request to the daemon via POST /shutdown.
57/// Idempotent: if the daemon is not running, exits 0 with a message.
58///
59/// # Errors
60///
61/// Returns an error if the shutdown request fails.
62pub fn run_stop(output: &OutputConfig) -> Result<(), OlError> {
63    let Some(pid) = read_pid_file() else {
64        output.print_info("Daemon is not running");
65        return Ok(());
66    };
67
68    if !is_process_alive(pid) {
69        output.print_info("Daemon is not running");
70        // Clean up stale PID file
71        let _ = std::fs::remove_file(config::openlatch_dir().join("daemon.pid"));
72        return Ok(());
73    }
74
75    // Load config to get the port
76    let cfg = config::Config::load(None, None, false)?;
77
78    // Prefer graceful shutdown via POST /shutdown endpoint (works cross-platform, DAEM-14)
79    let token = load_or_generate_token().unwrap_or_default();
80    if send_shutdown_request(cfg.port, &token) {
81        // Wait for process to exit (poll PID file deletion, 5s timeout)
82        let start = std::time::Instant::now();
83        while start.elapsed() < std::time::Duration::from_secs(5) {
84            if !is_process_alive(pid) {
85                break;
86            }
87            std::thread::sleep(std::time::Duration::from_millis(200));
88        }
89    }
90
91    // Clean up PID file if process is gone
92    if !is_process_alive(pid) {
93        let _ = std::fs::remove_file(config::openlatch_dir().join("daemon.pid"));
94        output.print_step("Daemon stopped");
95    } else {
96        // Fallback: force kill on Unix
97        #[cfg(unix)]
98        {
99            unsafe {
100                libc::kill(pid as libc::pid_t, libc::SIGTERM);
101            }
102            std::thread::sleep(std::time::Duration::from_millis(500));
103        }
104
105        // Final cleanup
106        let _ = std::fs::remove_file(config::openlatch_dir().join("daemon.pid"));
107        output.print_step("Daemon stopped");
108    }
109
110    Ok(())
111}
112
113/// Run the `openlatch restart` command.
114///
115/// Stops the daemon, waits for it to exit, then starts it again.
116/// Per Pitfall 4 from RESEARCH.md: waits for stop to complete before starting.
117///
118/// # Errors
119///
120/// Returns an error if start fails.
121pub fn run_restart(output: &OutputConfig) -> Result<(), OlError> {
122    // Stop — ignore "not running" case
123    run_stop(output)?;
124
125    // Wait until PID file is gone or health check fails before starting
126    let timeout = std::time::Duration::from_secs(5);
127    let start = std::time::Instant::now();
128    let cfg = config::Config::load(None, None, false)?;
129
130    while start.elapsed() < timeout {
131        let pid_file_gone = read_pid_file().is_none();
132        let health_down = !check_health(cfg.port);
133        if pid_file_gone || health_down {
134            break;
135        }
136        std::thread::sleep(std::time::Duration::from_millis(200));
137    }
138
139    let start_args = StartArgs {
140        foreground: false,
141        port: None,
142    };
143    run_start(&start_args, output)
144}
145
146/// Spawn the daemon as a detached background process.
147///
148/// Gets the path to the current executable and re-executes with `daemon start --foreground`.
149///
150/// Platform-specific detachment:
151/// - Unix: `process_group(0)` creates a new process group (survives parent exit)
152/// - Windows: `CREATE_NO_WINDOW` suppresses the console window
153///
154/// Writes PID to `config::openlatch_dir().join("daemon.pid")` per PLAT-02.
155///
156/// # Errors
157///
158/// Returns an error if the child process cannot be spawned or PID file cannot be written.
159pub fn spawn_daemon_background(port: u16, token: &str) -> Result<u32, OlError> {
160    let exe = std::env::current_exe().map_err(|e| {
161        OlError::new(
162            ERR_INVALID_CONFIG,
163            format!("Cannot locate current executable: {e}"),
164        )
165    })?;
166
167    #[cfg(unix)]
168    let child = {
169        use std::os::unix::process::CommandExt;
170        std::process::Command::new(&exe)
171            .args([
172                "daemon",
173                "start",
174                "--foreground",
175                "--port",
176                &port.to_string(),
177            ])
178            .env("OPENLATCH_TOKEN", token)
179            .stdin(Stdio::null())
180            .stdout(Stdio::null())
181            .stderr(Stdio::null())
182            .process_group(0)
183            .spawn()
184            .map_err(|e| {
185                OlError::new(
186                    ERR_INVALID_CONFIG,
187                    format!("Failed to spawn daemon process: {e}"),
188                )
189                .with_suggestion("Check that the openlatch binary is executable.")
190            })?
191    };
192
193    #[cfg(windows)]
194    let child = {
195        use std::os::windows::process::CommandExt;
196        // CREATE_NO_WINDOW: suppress console window for background daemon
197        // CREATE_NEW_PROCESS_GROUP: detach from parent so daemon survives parent exit
198        const CREATE_NO_WINDOW: u32 = 0x0800_0000;
199        const CREATE_NEW_PROCESS_GROUP: u32 = 0x0000_0200;
200        std::process::Command::new(&exe)
201            .args([
202                "daemon",
203                "start",
204                "--foreground",
205                "--port",
206                &port.to_string(),
207            ])
208            .env("OPENLATCH_TOKEN", token)
209            .stdin(Stdio::null())
210            .stdout(Stdio::null())
211            .stderr(Stdio::null())
212            .creation_flags(CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP)
213            .spawn()
214            .map_err(|e| {
215                OlError::new(
216                    ERR_INVALID_CONFIG,
217                    format!("Failed to spawn daemon process: {e}"),
218                )
219                .with_suggestion("Check that the openlatch binary is executable.")
220            })?
221    };
222
223    let pid = child.id();
224
225    // PID file is written by the child process in run_daemon_foreground(),
226    // not here — writing it here causes the child's idempotency check to
227    // see its own PID and exit immediately.
228
229    Ok(pid)
230}
231
232/// Read the daemon PID from the PID file.
233///
234/// Returns `None` if the file doesn't exist or can't be parsed.
235pub(crate) fn read_pid_file() -> Option<u32> {
236    let pid_path = config::openlatch_dir().join("daemon.pid");
237    let content = std::fs::read_to_string(&pid_path).ok()?;
238    content.trim().parse::<u32>().ok()
239}
240
241/// Check whether a process with the given PID is alive.
242///
243/// Uses OS-appropriate process existence checks.
244/// Per T-02-06: verifies the process exists, not just the PID file.
245pub(crate) fn is_process_alive(pid: u32) -> bool {
246    #[cfg(unix)]
247    {
248        // send signal 0 — tests process existence without actually sending a signal
249        let result = unsafe { libc::kill(pid as libc::pid_t, 0) };
250        result == 0
251    }
252
253    #[cfg(windows)]
254    {
255        // Use OpenProcess to check if the process exists
256        let handle = unsafe {
257            winapi::um::processthreadsapi::OpenProcess(
258                winapi::um::winnt::PROCESS_QUERY_INFORMATION,
259                0,
260                pid,
261            )
262        };
263        if handle.is_null() {
264            return false;
265        }
266        let mut exit_code: u32 = 0;
267        let alive = unsafe {
268            winapi::um::processthreadsapi::GetExitCodeProcess(handle, &mut exit_code) != 0
269                && exit_code == winapi::um::minwinbase::STILL_ACTIVE
270        };
271        unsafe { winapi::um::handleapi::CloseHandle(handle) };
272        alive
273    }
274
275    // Fallback for non-unix, non-windows (should not happen in practice)
276    #[cfg(not(any(unix, windows)))]
277    {
278        let _ = pid;
279        false
280    }
281}
282
283/// Send a graceful shutdown request to the daemon via POST /shutdown.
284///
285/// Returns true if the request was sent successfully.
286fn send_shutdown_request(port: u16, token: &str) -> bool {
287    let url = format!("http://127.0.0.1:{port}/shutdown");
288    let client = reqwest::blocking::Client::builder()
289        .timeout(std::time::Duration::from_secs(2))
290        .build();
291
292    match client {
293        Ok(c) => c
294            .post(&url)
295            .header("Authorization", format!("Bearer {token}"))
296            .send()
297            .is_ok(),
298        Err(_) => false,
299    }
300}
301
302/// Wait for the daemon's /health endpoint to return 200.
303///
304/// Returns true if health check passed within the timeout, false otherwise.
305pub(crate) fn wait_for_health(port: u16, timeout_secs: u64) -> bool {
306    let url = format!("http://127.0.0.1:{port}/health");
307    let start = std::time::Instant::now();
308    let timeout = std::time::Duration::from_secs(timeout_secs);
309
310    while start.elapsed() < timeout {
311        if let Ok(resp) = reqwest::blocking::get(&url) {
312            if resp.status().is_success() {
313                return true;
314            }
315        }
316        std::thread::sleep(std::time::Duration::from_millis(200));
317    }
318    false
319}
320
321/// Check if the daemon's /health endpoint is reachable (non-blocking, single attempt).
322fn check_health(port: u16) -> bool {
323    let url = format!("http://127.0.0.1:{port}/health");
324    reqwest::blocking::get(url)
325        .map(|r| r.status().is_success())
326        .unwrap_or(false)
327}
328
329/// Load the daemon token or generate a new one if missing.
330fn load_or_generate_token() -> Result<String, OlError> {
331    let ol_dir = config::openlatch_dir();
332    config::ensure_token(&ol_dir)
333}
334
335/// Start the daemon in foreground mode (blocking call).
336///
337/// Creates a tokio runtime and runs the daemon server directly.
338fn run_daemon_foreground(port: u16, token: &str) -> Result<(), OlError> {
339    let mut cfg = config::Config::load(Some(port), None, true)?;
340    cfg.foreground = true;
341
342    let rt = tokio::runtime::Runtime::new().map_err(|e| {
343        OlError::new(
344            ERR_INVALID_CONFIG,
345            format!("Failed to create async runtime: {e}"),
346        )
347    })?;
348
349    let token_owned = token.to_string();
350    rt.block_on(async move {
351        use crate::daemon;
352        use crate::envelope;
353        use crate::logging;
354        use crate::privacy;
355
356        let _guard = logging::daemon_log::init_daemon_logging(&cfg.log_dir);
357
358        if let Ok(deleted) = logging::cleanup_old_logs(&cfg.log_dir, cfg.retention_days) {
359            if deleted > 0 {
360                tracing::info!(deleted = deleted, "cleaned up old log files");
361            }
362        }
363
364        privacy::init_filter(&cfg.extra_patterns);
365
366        let pid = std::process::id();
367
368        // Write PID file so status/stop can find us
369        let pid_path = config::openlatch_dir().join("daemon.pid");
370        if let Err(e) = std::fs::write(&pid_path, pid.to_string()) {
371            tracing::warn!(error = %e, "failed to write PID file");
372        }
373
374        logging::daemon_log::log_startup(
375            env!("CARGO_PKG_VERSION"),
376            cfg.port,
377            pid,
378            envelope::os_string(),
379            envelope::arch_string(),
380        );
381
382        eprintln!(
383            "openlatch v{} \u{2022} listening on 127.0.0.1:{} \u{2022} pid {}",
384            env!("CARGO_PKG_VERSION"),
385            cfg.port,
386            pid,
387        );
388
389        match daemon::start_server(cfg.clone(), token_owned).await {
390            Ok((uptime_secs, events)) => {
391                eprintln!(
392                    "openlatch daemon stopped \u{2022} uptime {} \u{2022} {} events processed",
393                    daemon::format_uptime(uptime_secs),
394                    events
395                );
396            }
397            Err(e) => {
398                tracing::error!(error = %e, "daemon exited with error");
399                eprintln!("Error: daemon exited unexpectedly: {e}");
400            }
401        }
402
403        // Clean up PID file on exit
404        let _ = std::fs::remove_file(&pid_path);
405    });
406
407    Ok(())
408}