Skip to main content

trusty_memory/commands/
service.rs

1//! Handler for `trusty-memory service` (macOS launchd integration).
2//!
3//! Why: launchd is the canonical way to keep a long-lived foreground daemon
4//! alive on macOS โ€” it survives logout, restarts on crash, and integrates with
5//! `launchctl` for diagnostics. Wrapping the plist mechanics in `service`
6//! subcommands keeps users from having to hand-edit XML. This mirrors the
7//! pattern used by `trusty-search service`, sharing the
8//! [`trusty_common::launchd`] implementation so the two tools cannot drift.
9//! What: macOS routes to `service_install` / `service_start` / `service_stop`
10//! / `service_logs`. Non-macOS prints a "not supported" error and exits 1.
11//! Test: on Linux, every action returns Err with the platform message; on
12//! macOS, `service install` writes the plist without loading it, `start`
13//! bootstraps it, `stop` boots it out, and `logs` tails the log files.
14
15use anyhow::Result;
16use clap::Subcommand;
17#[cfg(target_os = "macos")]
18use colored::Colorize;
19
20/// Subcommands for `trusty-memory service` (macOS launchd integration).
21///
22/// Why: the four lifecycle actions (install, start, stop, logs) are the
23/// minimum surface needed to manage a launchd-backed daemon without
24/// hand-editing plists or shelling out to `launchctl` directly.
25/// What: a clap-derived enum dispatched by [`handle_service`].
26/// Test: clap's `--help` enumerates all four; integration via
27/// `cargo run -p trusty-memory -- service --help`.
28#[derive(Debug, Clone, Subcommand)]
29pub enum ServiceAction {
30    /// Install the LaunchAgent plist (does not load it).
31    Install,
32    /// Install and load the LaunchAgent (start the daemon).
33    Start,
34    /// Unload the LaunchAgent (stop the daemon).
35    Stop,
36    /// Tail the launchd stdout / stderr logs.
37    Logs,
38}
39
40/// Reverse-DNS label for the LaunchAgent.
41///
42/// Why: launchd identifies agents by their `Label`, which must also be the
43/// plist filename's stem. Centralising the constant keeps install / start /
44/// stop in lockstep.
45/// What: `com.trusty.memory` โ€” matches the naming convention used by
46/// `trusty-search` (`com.trusty.trusty-search`) and follows reverse-DNS.
47/// Test: covered indirectly by `service install` integration runs.
48#[cfg(target_os = "macos")]
49pub const LAUNCHD_LABEL: &str = "com.trusty.memory";
50
51/// Dispatch a `trusty-memory service <action>` invocation.
52///
53/// Why: the binary's `main.rs` should not contain `#[cfg]` blocks โ€” it
54/// always calls this function and lets the module decide what is and isn't
55/// supported on the current platform.
56/// What: on macOS, dispatches to the per-action helper. On every other
57/// platform, returns an error with a friendly message pointing operators to
58/// their native service manager.
59/// Test: on Linux CI, asserts the Err message contains "not supported".
60pub fn handle_service(action: &ServiceAction) -> Result<()> {
61    #[cfg(target_os = "macos")]
62    {
63        match action {
64            ServiceAction::Install => service_install(),
65            ServiceAction::Start => service_start(),
66            ServiceAction::Stop => service_stop(),
67            ServiceAction::Logs => service_logs(),
68        }
69    }
70    #[cfg(not(target_os = "macos"))]
71    {
72        let _ = action;
73        anyhow::bail!(
74            "`trusty-memory service` is not supported on this platform โ€” \
75             use your distro's service manager (systemd, OpenRC, etc.) directly."
76        );
77    }
78}
79
80/// Resolve the log directory for the launchd-managed daemon.
81///
82/// Why: launchd writes `stdout` and `stderr` to files we declare in the
83/// plist, and they need a real directory before the daemon can start.
84/// Centralising the path keeps install / logs in agreement.
85/// What: `<data_dir>/trusty-memory/logs`, where `<data_dir>` comes from
86/// `dirs::data_dir()` (`~/Library/Application Support` on macOS). Creates
87/// the directory if it does not already exist.
88/// Test: covered indirectly by `service install` integration runs.
89#[cfg(target_os = "macos")]
90pub(crate) fn launchd_log_dir() -> Result<std::path::PathBuf> {
91    let data =
92        dirs::data_dir().ok_or_else(|| anyhow::anyhow!("could not resolve user data directory"))?;
93    let dir = data.join("trusty-memory").join("logs");
94    std::fs::create_dir_all(&dir)
95        .map_err(|e| anyhow::anyhow!("create log dir {}: {e}", dir.display()))?;
96    Ok(dir)
97}
98
99/// Build the shared `LaunchdConfig` describing the trusty-memory agent.
100///
101/// Why: install / start / stop all need the same plist label, log paths,
102/// and arg vector. Building it in one place keeps them in sync and lets the
103/// shared [`trusty_common::launchd`] module own the XML rendering and the
104/// `launchctl` glue.
105///
106/// ๐Ÿ”ด The args MUST invoke `serve --foreground` rather than bare `serve`.
107/// Plain `serve` self-spawns a detached child and exits 0 (matching
108/// `trusty-search start`'s background-mode behaviour), which launchd
109/// interprets as "service stopped" โ€” it then re-launches the agent in a
110/// tight loop, creating orphan daemon processes and breaking auto-restart
111/// on reboot (issue #132). `--foreground` keeps the HTTP daemon in this
112/// process so launchd supervises the actual daemon PID and `KeepAlive`
113/// works correctly.
114///
115/// ๐Ÿ”ด fd limits: macOS launchd's default soft fd ceiling for user agents is
116/// 256. trusty-memory opens ~3 redb files per palace (data, KG, vector
117/// index) plus sockets and log descriptors, so at ~85 palaces the process
118/// hits EMFILE and every palace open call fails. The generated plist always
119/// sets both `SoftResourceLimits` and `HardResourceLimits` to
120/// [`trusty_common::launchd::LAUNCHD_FD_LIMIT`] (8192) so the limit is
121/// permanent and survives `service start` regeneration. `ThrottleInterval`
122/// (10 s) ensures KeepAlive cannot hot-loop respawn into a zombie herd.
123///
124/// What: assembles a [`trusty_common::launchd::LaunchdConfig`] pointing at
125/// the current binary with `serve --foreground` so launchd supervises the
126/// daemon process directly; uses `KeepAlive::OnSuccess` so a clean shutdown
127/// does not crash-loop. Also injects `FASTEMBED_CACHE_DIR=$HOME/.cache/fastembed`
128/// so the embedder model download does not try to write into launchd's
129/// read-only sandbox `TMPDIR` (GH #58).
130/// Test: `build_launchd_config_uses_canonical_shape` asserts the
131/// `--foreground` flag, fd limits, and throttle interval are all present
132/// (issue #132 regression guard + fd-exhaustion fix);
133/// `build_launchd_config_sets_fastembed_cache_dir` asserts the env var is
134/// wired in. End-to-end exercised via `service install` / `service start`.
135#[cfg(target_os = "macos")]
136pub(crate) fn build_launchd_config(
137    exe: std::path::PathBuf,
138    log_dir: std::path::PathBuf,
139) -> trusty_common::launchd::LaunchdConfig {
140    use trusty_common::launchd::{KeepAlive, LaunchdConfig, LAUNCHD_FD_LIMIT};
141    LaunchdConfig {
142        label: LAUNCHD_LABEL.to_string(),
143        exe_path: exe,
144        args: vec!["serve".to_string(), "--foreground".to_string()],
145        log_dir,
146        keep_alive: KeepAlive::OnSuccess,
147        // 10 s throttle prevents KeepAlive from hot-loop respawning when
148        // the daemon exits quickly (e.g. single-instance guard exit 0).
149        throttle_interval: 10,
150        env_vars: fastembed_env_vars(),
151        // Fix the fd-exhaustion bug: raise both soft and hard limits to
152        // 8192 so the daemon can open ~2730 palaces before hitting EMFILE.
153        // This is written into the plist on every install/start so a
154        // hand-patched plist is never silently reverted.
155        fd_limit: Some(LAUNCHD_FD_LIMIT),
156    }
157}
158
159/// Build the env var list embedded into the LaunchAgent plist.
160///
161/// Why: launchd's per-agent `TMPDIR` is a sandboxed `/var/folders/.../T/`
162/// path that is **read-only** for the agent's UID. fastembed's default
163/// model retrieval path is derived from that `TMPDIR`, so the first
164/// `TextEmbedding::try_new` call fails with `EROFS (os error 30)` and the
165/// daemon never reaches a ready state (GH #58). Pinning the fastembed cache
166/// to a writable user-owned directory in the plist solves the problem for
167/// every daemon start. Both `FASTEMBED_CACHE_DIR` and `FASTEMBED_CACHE_PATH`
168/// are emitted so the daemon agrees with both fastembed's native env
169/// (`FASTEMBED_CACHE_DIR`) and the alternative name documented in our
170/// install flow / accepted by `resolve_fastembed_cache_dir` (GH #62).
171/// What: returns `[("FASTEMBED_CACHE_DIR", "$HOME/.cache/fastembed"),
172/// ("FASTEMBED_CACHE_PATH", "$HOME/.cache/fastembed")]`, expanding `$HOME`
173/// from the install-time user. If `HOME` is unset (very unusual), returns
174/// an empty list โ€” `resolve_fastembed_cache_dir` will then fall back to
175/// its own logic at daemon startup.
176/// Test: `build_launchd_config_sets_fastembed_cache_dir` covers the happy
177/// path for both env var names.
178#[cfg(target_os = "macos")]
179fn fastembed_env_vars() -> Vec<(String, String)> {
180    if let Some(home) = dirs::home_dir() {
181        let cache = home.join(".cache").join("fastembed");
182        let value = cache.to_string_lossy().into_owned();
183        return vec![
184            ("FASTEMBED_CACHE_DIR".to_string(), value.clone()),
185            ("FASTEMBED_CACHE_PATH".to_string(), value),
186        ];
187    }
188    Vec::new()
189}
190
191#[cfg(target_os = "macos")]
192fn current_exe() -> Result<std::path::PathBuf> {
193    std::env::current_exe().map_err(|e| anyhow::anyhow!("could not resolve current exe: {e}"))
194}
195
196/// `service install` โ€” write the plist without loading it.
197///
198/// Why: operators sometimes want to inspect or hand-edit the plist before
199/// launchd takes ownership. Splitting "install" from "start" gives them that
200/// window without forcing a stop-start dance.
201/// What: resolves the binary path and log directory, then calls
202/// `LaunchdConfig::install()` which writes `~/Library/LaunchAgents/<label>.plist`
203/// and creates the log directory. Does not call `bootstrap`.
204/// Test: integration via `cargo run -p trusty-memory -- service install`.
205#[cfg(target_os = "macos")]
206fn service_install() -> Result<()> {
207    let exe = current_exe()?;
208    let log_dir = launchd_log_dir()?;
209    let cfg = build_launchd_config(exe, log_dir.clone());
210    let plist_path = cfg.plist_path()?;
211    cfg.install()?;
212    println!(
213        "{} Wrote LaunchAgent plist: {}",
214        "โœ“".green(),
215        plist_path.display()
216    );
217    ensure_fastembed_cache_dir();
218    println!(
219        "  Logs:    {}\n  Start:   {}",
220        log_dir.display().to_string().dimmed(),
221        "trusty-memory service start".cyan(),
222    );
223    Ok(())
224}
225
226/// Ensure the fastembed cache directory exists at install time.
227///
228/// Why: GH #62 โ€” the launchd plist now pins `FASTEMBED_CACHE_PATH` to
229/// `$HOME/.cache/fastembed`, but if that directory does not yet exist the
230/// daemon's first `TextEmbedding::try_new` will still trip over fastembed's
231/// cache-creation path under launchd's restricted environment. Creating the
232/// directory up-front (cheap, no network) guarantees the env var resolves
233/// to a writable path on the very first daemon start. A full model pre-warm
234/// is performed by `trusty-memory setup`; here we only do the minimum
235/// (mkdir -p) so `service install` stays fast and side-effect-light.
236/// What: best-effort `create_dir_all` against `$HOME/.cache/fastembed`.
237/// Failures are logged to stdout as a hint but do not abort install.
238/// Test: side-effecting; covered manually via `trusty-memory service install`.
239#[cfg(target_os = "macos")]
240fn ensure_fastembed_cache_dir() {
241    let Some(home) = dirs::home_dir() else {
242        return;
243    };
244    let cache = home.join(".cache").join("fastembed");
245    match std::fs::create_dir_all(&cache) {
246        Ok(()) => println!(
247            "{} fastembed cache dir ready at {}",
248            "โœ“".green(),
249            cache.display().to_string().dimmed()
250        ),
251        Err(e) => eprintln!(
252            "  {} could not pre-create {} ({e}); daemon will retry on first request.",
253            "ยท".dimmed(),
254            cache.display()
255        ),
256    }
257}
258
259/// `service start` โ€” install the plist (if needed) and bootstrap the agent.
260///
261/// Why: the common "I want it running" path should be one command, not two.
262/// `install` + `bootstrap` is idempotent under the shared launchd module
263/// (bootstrap calls bootout first), so calling start repeatedly is safe.
264/// What: writes the plist via `install()`, then loads it into the user's
265/// `gui/<uid>` domain via `bootstrap()`. The agent will start immediately
266/// and restart on non-zero exits per `KeepAlive::OnSuccess`.
267/// Test: integration via `cargo run -p trusty-memory -- service start`.
268#[cfg(target_os = "macos")]
269fn service_start() -> Result<()> {
270    let exe = current_exe()?;
271    let log_dir = launchd_log_dir()?;
272    let cfg = build_launchd_config(exe, log_dir.clone());
273    let plist_path = cfg.plist_path()?;
274    cfg.install()?;
275    println!(
276        "{} Wrote LaunchAgent plist: {}",
277        "โœ“".green(),
278        plist_path.display()
279    );
280
281    cfg.bootstrap()?;
282    let domain = format!("gui/{}", trusty_common::launchd::current_uid());
283    println!(
284        "{} Loaded {} into {} โ€” daemon will start automatically.",
285        "โœ“".green(),
286        LAUNCHD_LABEL,
287        domain
288    );
289    println!(
290        "  Logs:    {}\n  Stop:    {}",
291        log_dir.display().to_string().dimmed(),
292        "trusty-memory service stop".cyan(),
293    );
294    Ok(())
295}
296
297/// `service stop` โ€” boot out the agent (stop and unload).
298///
299/// Why: operators need a friendly counterpart to `start` that does not
300/// require remembering the full `launchctl bootout gui/<uid>/<label>`
301/// invocation. The shared launchd module treats "not loaded" as success, so
302/// calling stop on an unloaded agent is also a no-op.
303/// What: builds the same config used by `start`, then calls `bootout()`.
304/// Leaves the plist file in place โ€” re-`start` will reload it.
305/// Test: integration via `cargo run -p trusty-memory -- service stop`.
306#[cfg(target_os = "macos")]
307fn service_stop() -> Result<()> {
308    let exe = current_exe()?;
309    let log_dir = launchd_log_dir()?;
310    let cfg = build_launchd_config(exe, log_dir);
311    cfg.bootout()?;
312    println!(
313        "{} Unloaded {} (plist file preserved at {}).",
314        "โœ“".green(),
315        LAUNCHD_LABEL,
316        cfg.plist_path()?.display().to_string().dimmed()
317    );
318    Ok(())
319}
320
321/// `service logs` โ€” tail the launchd stdout/stderr log files.
322///
323/// Why: launchd routes the daemon's stdout/stderr to plain files; a friendly
324/// `tail -F` wrapper avoids forcing operators to remember the path.
325/// What: resolves the log directory and execs `tail -F <stdout> <stderr>`.
326/// Emits a hint when neither file exists yet (daemon never started).
327/// Test: side-effecting; covered manually via
328/// `cargo run -p trusty-memory -- service logs`.
329#[cfg(target_os = "macos")]
330fn service_logs() -> Result<()> {
331    let log_dir = launchd_log_dir()?;
332    let stdout = log_dir.join("stdout.log");
333    let stderr = log_dir.join("stderr.log");
334    if !stdout.exists() && !stderr.exists() {
335        eprintln!(
336            "{} No logs at {} yet โ€” start the service first ({}).",
337            "ยท".dimmed(),
338            log_dir.display(),
339            "trusty-memory service start".cyan()
340        );
341        return Ok(());
342    }
343    let status = std::process::Command::new("tail")
344        .arg("-F")
345        .arg(&stdout)
346        .arg(&stderr)
347        .status()
348        .map_err(|e| anyhow::anyhow!("tail failed: {e}"))?;
349    if !status.success() {
350        anyhow::bail!("tail exited with {status}");
351    }
352    Ok(())
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358
359    /// Why: on non-macOS platforms, every `service` action must surface a
360    /// clear, actionable error instead of silently succeeding or panicking.
361    /// What: invokes `handle_service` with each action and asserts the Err
362    /// message contains the "not supported" sentinel.
363    /// Test: macOS skips this (the actions perform real `launchctl` work).
364    #[cfg(not(target_os = "macos"))]
365    #[test]
366    fn handle_service_errors_on_unsupported_platform() {
367        for action in [
368            ServiceAction::Install,
369            ServiceAction::Start,
370            ServiceAction::Stop,
371            ServiceAction::Logs,
372        ] {
373            let err = handle_service(&action).expect_err("must fail on non-macOS");
374            let msg = format!("{err}");
375            assert!(
376                msg.contains("not supported"),
377                "expected platform error, got: {msg}"
378            );
379        }
380    }
381
382    /// Why: the LaunchdConfig we hand to `trusty_common::launchd` must always
383    /// describe the canonical trusty-memory agent (label, args, restart
384    /// policy, fd limits, throttle). Drift here corrupts every plist that
385    /// the binary writes.
386    /// Issue #132 specifically required that the args invoke
387    /// `serve --foreground` โ€” plain `serve` self-spawns and exits 0, which
388    /// launchd interprets as "service stopped" and re-launches in a tight
389    /// loop. The fd-limit and throttle assertions guard against the
390    /// fd-exhaustion / zombie-herd regression (fix A).
391    /// What: builds the config with dummy paths and asserts the
392    /// load-bearing fields, including the `--foreground` flag, fd limit,
393    /// and throttle interval.
394    /// Test: pure construction, no fs side effects.
395    #[cfg(target_os = "macos")]
396    #[test]
397    fn build_launchd_config_uses_canonical_shape() {
398        use std::path::PathBuf;
399        use trusty_common::launchd::{KeepAlive, LAUNCHD_FD_LIMIT};
400
401        let cfg = build_launchd_config(
402            PathBuf::from("/usr/local/bin/trusty-memory"),
403            PathBuf::from("/tmp/trusty-memory/logs"),
404        );
405        assert_eq!(cfg.label, LAUNCHD_LABEL);
406        assert_eq!(
407            cfg.args,
408            vec!["serve".to_string(), "--foreground".to_string()],
409            "launchd plist must invoke `serve --foreground` (issue #132) so \
410             launchd supervises the daemon PID directly instead of \
411             re-launching the self-spawning parent on every exit"
412        );
413        assert_eq!(cfg.keep_alive, KeepAlive::OnSuccess);
414        assert_eq!(
415            cfg.throttle_interval, 10,
416            "ThrottleInterval must be 10 s to prevent KeepAlive hot-loop respawn"
417        );
418        // fd_limit must be the canonical ceiling so the generated plist always
419        // includes SoftResourceLimits and HardResourceLimits (fd-exhaustion fix).
420        assert_eq!(
421            cfg.fd_limit,
422            Some(LAUNCHD_FD_LIMIT),
423            "fd_limit must be Some(LAUNCHD_FD_LIMIT) so generated plist raises \
424             both soft and hard limits to {LAUNCHD_FD_LIMIT} (fd-exhaustion fix)"
425        );
426        // env_vars is allowed to be empty only on hosts without a HOME
427        // (extremely rare); on developer/CI machines HOME is always set
428        // and FASTEMBED_CACHE_DIR must be wired in.
429        if dirs::home_dir().is_some() {
430            assert!(
431                cfg.env_vars.iter().any(|(k, _)| k == "FASTEMBED_CACHE_DIR"),
432                "FASTEMBED_CACHE_DIR must be present in the LaunchAgent plist (GH #58)"
433            );
434        }
435    }
436
437    /// Why: the generated plist XML (what launchd actually reads from disk)
438    /// must contain both resource-limit dicts with the canonical fd value.
439    /// Asserting on `render_plist()` output catches regressions where the
440    /// config struct is correct but the renderer drops the dicts.
441    /// What: renders the plist with a dummy exe/log dir and checks that the
442    /// SoftResourceLimits, HardResourceLimits, and NumberOfFiles keys appear
443    /// with the right integer value. Also asserts ThrottleInterval is present.
444    /// Test: pure string generation, no fs side effects.
445    #[cfg(target_os = "macos")]
446    #[test]
447    fn build_launchd_config_plist_includes_fd_limits_and_throttle() {
448        use std::path::PathBuf;
449        use trusty_common::launchd::LAUNCHD_FD_LIMIT;
450
451        let cfg = build_launchd_config(
452            PathBuf::from("/usr/local/bin/trusty-memory"),
453            PathBuf::from("/tmp/trusty-memory/logs"),
454        );
455        let xml = cfg.render_plist().expect("render_plist must succeed");
456
457        assert!(
458            xml.contains("<key>SoftResourceLimits</key>"),
459            "plist must contain SoftResourceLimits to raise fd ceiling"
460        );
461        assert!(
462            xml.contains("<key>HardResourceLimits</key>"),
463            "plist must contain HardResourceLimits so soft limit is not clamped below it"
464        );
465        let fd_str = format!("<integer>{LAUNCHD_FD_LIMIT}</integer>");
466        assert!(
467            xml.contains(&fd_str),
468            "plist NumberOfFiles must equal {LAUNCHD_FD_LIMIT}, got xml: {xml}"
469        );
470        assert!(
471            xml.contains("<key>ThrottleInterval</key>"),
472            "plist must contain ThrottleInterval"
473        );
474        assert!(
475            xml.contains("<integer>10</integer>"),
476            "ThrottleInterval must be 10 s"
477        );
478    }
479
480    /// Why: GH #58 โ€” launchd's read-only `TMPDIR` breaks fastembed's first
481    /// model download. The plist installer is the single source of truth
482    /// for the daemon's runtime environment, so the env var must be set
483    /// there. Asserting on `build_launchd_config` (not just
484    /// `fastembed_env_vars`) catches regressions where someone strips the
485    /// env list when refactoring the config builder.
486    /// What: builds the config with dummy paths and asserts the env var is
487    /// present and points under `$HOME/.cache/fastembed`.
488    /// Test: pure construction, no fs side effects.
489    #[cfg(target_os = "macos")]
490    #[test]
491    fn build_launchd_config_sets_fastembed_cache_dir() {
492        use std::path::PathBuf;
493
494        let cfg = build_launchd_config(
495            PathBuf::from("/usr/local/bin/trusty-memory"),
496            PathBuf::from("/tmp/trusty-memory/logs"),
497        );
498        if let Some(home) = dirs::home_dir() {
499            let expected = home
500                .join(".cache")
501                .join("fastembed")
502                .to_string_lossy()
503                .into_owned();
504            let dir_value = cfg
505                .env_vars
506                .iter()
507                .find(|(k, _)| k == "FASTEMBED_CACHE_DIR")
508                .map(|(_, v)| v.clone())
509                .expect("FASTEMBED_CACHE_DIR must be present");
510            assert_eq!(dir_value, expected);
511            // GH #62: also assert FASTEMBED_CACHE_PATH is present and
512            // points to the same path. Both names exist because fastembed
513            // reads `FASTEMBED_CACHE_DIR` natively, while
514            // `resolve_fastembed_cache_dir` (and our docs) prefer the
515            // `FASTEMBED_CACHE_PATH` alias.
516            let path_value = cfg
517                .env_vars
518                .iter()
519                .find(|(k, _)| k == "FASTEMBED_CACHE_PATH")
520                .map(|(_, v)| v.clone())
521                .expect("FASTEMBED_CACHE_PATH must be present (GH #62)");
522            assert_eq!(path_value, expected);
523        }
524    }
525}