trusty_memory/commands/service.rs
1//! Handler for `trusty-memory service` (macOS launchd integration).
2//!
3//! Why: launchd is the canonical way to keep a long-lived foreground daemon
4//! alive on macOS โ it survives logout, restarts on crash, and integrates with
5//! `launchctl` for diagnostics. Wrapping the plist mechanics in `service`
6//! subcommands keeps users from having to hand-edit XML. This mirrors the
7//! pattern used by `trusty-search service`, sharing the
8//! [`trusty_common::launchd`] implementation so the two tools cannot drift.
9//! What: macOS routes to `service_install` / `service_start` / `service_stop`
10//! / `service_logs`. Non-macOS prints a "not supported" error and exits 1.
11//! Test: on Linux, every action returns Err with the platform message; on
12//! macOS, `service install` writes the plist without loading it, `start`
13//! bootstraps it, `stop` boots it out, and `logs` tails the log files.
14
15use anyhow::Result;
16use clap::Subcommand;
17#[cfg(target_os = "macos")]
18use colored::Colorize;
19
20/// Subcommands for `trusty-memory service` (macOS launchd integration).
21///
22/// Why: the four lifecycle actions (install, start, stop, logs) are the
23/// minimum surface needed to manage a launchd-backed daemon without
24/// hand-editing plists or shelling out to `launchctl` directly.
25/// What: a clap-derived enum dispatched by [`handle_service`].
26/// Test: clap's `--help` enumerates all four; integration via
27/// `cargo run -p trusty-memory -- service --help`.
28#[derive(Debug, Clone, Subcommand)]
29pub enum ServiceAction {
30 /// Install the LaunchAgent plist (does not load it).
31 Install,
32 /// Install and load the LaunchAgent (start the daemon).
33 Start,
34 /// Unload the LaunchAgent (stop the daemon).
35 Stop,
36 /// Tail the launchd stdout / stderr logs.
37 Logs,
38}
39
40/// Reverse-DNS label for the LaunchAgent.
41///
42/// Why: launchd identifies agents by their `Label`, which must also be the
43/// plist filename's stem. Centralising the constant keeps install / start /
44/// stop in lockstep.
45/// What: `com.trusty.memory` โ matches the naming convention used by
46/// `trusty-search` (`com.trusty.trusty-search`) and follows reverse-DNS.
47/// Test: covered indirectly by `service install` integration runs.
48#[cfg(target_os = "macos")]
49pub const LAUNCHD_LABEL: &str = "com.trusty.memory";
50
51/// Dispatch a `trusty-memory service <action>` invocation.
52///
53/// Why: the binary's `main.rs` should not contain `#[cfg]` blocks โ it
54/// always calls this function and lets the module decide what is and isn't
55/// supported on the current platform.
56/// What: on macOS, dispatches to the per-action helper. On every other
57/// platform, returns an error with a friendly message pointing operators to
58/// their native service manager.
59/// Test: on Linux CI, asserts the Err message contains "not supported".
60pub fn handle_service(action: &ServiceAction) -> Result<()> {
61 #[cfg(target_os = "macos")]
62 {
63 match action {
64 ServiceAction::Install => service_install(),
65 ServiceAction::Start => service_start(),
66 ServiceAction::Stop => service_stop(),
67 ServiceAction::Logs => service_logs(),
68 }
69 }
70 #[cfg(not(target_os = "macos"))]
71 {
72 let _ = action;
73 anyhow::bail!(
74 "`trusty-memory service` is not supported on this platform โ \
75 use your distro's service manager (systemd, OpenRC, etc.) directly."
76 );
77 }
78}
79
80/// Resolve the log directory for the launchd-managed daemon.
81///
82/// Why: launchd writes `stdout` and `stderr` to files we declare in the
83/// plist, and they need a real directory before the daemon can start.
84/// Centralising the path keeps install / logs in agreement.
85/// What: `<data_dir>/trusty-memory/logs`, where `<data_dir>` comes from
86/// `dirs::data_dir()` (`~/Library/Application Support` on macOS). Creates
87/// the directory if it does not already exist.
88/// Test: covered indirectly by `service install` integration runs.
89#[cfg(target_os = "macos")]
90pub(crate) fn launchd_log_dir() -> Result<std::path::PathBuf> {
91 let data =
92 dirs::data_dir().ok_or_else(|| anyhow::anyhow!("could not resolve user data directory"))?;
93 let dir = data.join("trusty-memory").join("logs");
94 std::fs::create_dir_all(&dir)
95 .map_err(|e| anyhow::anyhow!("create log dir {}: {e}", dir.display()))?;
96 Ok(dir)
97}
98
99/// Build the shared `LaunchdConfig` describing the trusty-memory agent.
100///
101/// Why: install / start / stop all need the same plist label, log paths,
102/// and arg vector. Building it in one place keeps them in sync and lets the
103/// shared [`trusty_common::launchd`] module own the XML rendering and the
104/// `launchctl` glue.
105///
106/// ๐ด The args MUST invoke `serve --foreground` rather than bare `serve`.
107/// Plain `serve` self-spawns a detached child and exits 0 (matching
108/// `trusty-search start`'s background-mode behaviour), which launchd
109/// interprets as "service stopped" โ it then re-launches the agent in a
110/// tight loop, creating orphan daemon processes and breaking auto-restart
111/// on reboot (issue #132). `--foreground` keeps the HTTP daemon in this
112/// process so launchd supervises the actual daemon PID and `KeepAlive`
113/// works correctly.
114///
115/// ๐ด fd limits: macOS launchd's default soft fd ceiling for user agents is
116/// 256. trusty-memory opens ~3 redb files per palace (data, KG, vector
117/// index) plus sockets and log descriptors, so at ~85 palaces the process
118/// hits EMFILE and every palace open call fails. The generated plist always
119/// sets both `SoftResourceLimits` and `HardResourceLimits` to
120/// [`trusty_common::launchd::LAUNCHD_FD_LIMIT`] (8192) so the limit is
121/// permanent and survives `service start` regeneration. `ThrottleInterval`
122/// (10 s) ensures KeepAlive cannot hot-loop respawn into a zombie herd.
123///
124/// What: assembles a [`trusty_common::launchd::LaunchdConfig`] pointing at
125/// the current binary with `serve --foreground` so launchd supervises the
126/// daemon process directly; uses `KeepAlive::OnSuccess` so a clean shutdown
127/// does not crash-loop. Also injects `FASTEMBED_CACHE_DIR=$HOME/.cache/fastembed`
128/// so the embedder model download does not try to write into launchd's
129/// read-only sandbox `TMPDIR` (GH #58).
130/// Test: `build_launchd_config_uses_canonical_shape` asserts the
131/// `--foreground` flag, fd limits, and throttle interval are all present
132/// (issue #132 regression guard + fd-exhaustion fix);
133/// `build_launchd_config_sets_fastembed_cache_dir` asserts the env var is
134/// wired in. End-to-end exercised via `service install` / `service start`.
135#[cfg(target_os = "macos")]
136pub(crate) fn build_launchd_config(
137 exe: std::path::PathBuf,
138 log_dir: std::path::PathBuf,
139) -> trusty_common::launchd::LaunchdConfig {
140 use trusty_common::launchd::{KeepAlive, LaunchdConfig, LAUNCHD_FD_LIMIT};
141 LaunchdConfig {
142 label: LAUNCHD_LABEL.to_string(),
143 exe_path: exe,
144 args: vec!["serve".to_string(), "--foreground".to_string()],
145 log_dir,
146 keep_alive: KeepAlive::OnSuccess,
147 // 10 s throttle prevents KeepAlive from hot-loop respawning when
148 // the daemon exits quickly (e.g. single-instance guard exit 0).
149 throttle_interval: 10,
150 env_vars: fastembed_env_vars(),
151 // Fix the fd-exhaustion bug: raise both soft and hard limits to
152 // 8192 so the daemon can open ~2730 palaces before hitting EMFILE.
153 // This is written into the plist on every install/start so a
154 // hand-patched plist is never silently reverted.
155 fd_limit: Some(LAUNCHD_FD_LIMIT),
156 }
157}
158
159/// Build the env var list embedded into the LaunchAgent plist.
160///
161/// Why: launchd's per-agent `TMPDIR` is a sandboxed `/var/folders/.../T/`
162/// path that is **read-only** for the agent's UID. fastembed's default
163/// model retrieval path is derived from that `TMPDIR`, so the first
164/// `TextEmbedding::try_new` call fails with `EROFS (os error 30)` and the
165/// daemon never reaches a ready state (GH #58). Pinning the fastembed cache
166/// to a writable user-owned directory in the plist solves the problem for
167/// every daemon start. Both `FASTEMBED_CACHE_DIR` and `FASTEMBED_CACHE_PATH`
168/// are emitted so the daemon agrees with both fastembed's native env
169/// (`FASTEMBED_CACHE_DIR`) and the alternative name documented in our
170/// install flow / accepted by `resolve_fastembed_cache_dir` (GH #62).
171/// What: returns `[("FASTEMBED_CACHE_DIR", "$HOME/.cache/fastembed"),
172/// ("FASTEMBED_CACHE_PATH", "$HOME/.cache/fastembed")]`, expanding `$HOME`
173/// from the install-time user. If `HOME` is unset (very unusual), returns
174/// an empty list โ `resolve_fastembed_cache_dir` will then fall back to
175/// its own logic at daemon startup.
176/// Test: `build_launchd_config_sets_fastembed_cache_dir` covers the happy
177/// path for both env var names.
178#[cfg(target_os = "macos")]
179fn fastembed_env_vars() -> Vec<(String, String)> {
180 if let Some(home) = dirs::home_dir() {
181 let cache = home.join(".cache").join("fastembed");
182 let value = cache.to_string_lossy().into_owned();
183 return vec![
184 ("FASTEMBED_CACHE_DIR".to_string(), value.clone()),
185 ("FASTEMBED_CACHE_PATH".to_string(), value),
186 ];
187 }
188 Vec::new()
189}
190
191#[cfg(target_os = "macos")]
192fn current_exe() -> Result<std::path::PathBuf> {
193 std::env::current_exe().map_err(|e| anyhow::anyhow!("could not resolve current exe: {e}"))
194}
195
196/// `service install` โ write the plist without loading it.
197///
198/// Why: operators sometimes want to inspect or hand-edit the plist before
199/// launchd takes ownership. Splitting "install" from "start" gives them that
200/// window without forcing a stop-start dance.
201/// What: resolves the binary path and log directory, then calls
202/// `LaunchdConfig::install()` which writes `~/Library/LaunchAgents/<label>.plist`
203/// and creates the log directory. Does not call `bootstrap`.
204/// Test: integration via `cargo run -p trusty-memory -- service install`.
205#[cfg(target_os = "macos")]
206fn service_install() -> Result<()> {
207 let exe = current_exe()?;
208 let log_dir = launchd_log_dir()?;
209 let cfg = build_launchd_config(exe, log_dir.clone());
210 let plist_path = cfg.plist_path()?;
211 cfg.install()?;
212 println!(
213 "{} Wrote LaunchAgent plist: {}",
214 "โ".green(),
215 plist_path.display()
216 );
217 ensure_fastembed_cache_dir();
218 println!(
219 " Logs: {}\n Start: {}",
220 log_dir.display().to_string().dimmed(),
221 "trusty-memory service start".cyan(),
222 );
223 Ok(())
224}
225
226/// Ensure the fastembed cache directory exists at install time.
227///
228/// Why: GH #62 โ the launchd plist now pins `FASTEMBED_CACHE_PATH` to
229/// `$HOME/.cache/fastembed`, but if that directory does not yet exist the
230/// daemon's first `TextEmbedding::try_new` will still trip over fastembed's
231/// cache-creation path under launchd's restricted environment. Creating the
232/// directory up-front (cheap, no network) guarantees the env var resolves
233/// to a writable path on the very first daemon start. A full model pre-warm
234/// is performed by `trusty-memory setup`; here we only do the minimum
235/// (mkdir -p) so `service install` stays fast and side-effect-light.
236/// What: best-effort `create_dir_all` against `$HOME/.cache/fastembed`.
237/// Failures are logged to stdout as a hint but do not abort install.
238/// Test: side-effecting; covered manually via `trusty-memory service install`.
239#[cfg(target_os = "macos")]
240fn ensure_fastembed_cache_dir() {
241 let Some(home) = dirs::home_dir() else {
242 return;
243 };
244 let cache = home.join(".cache").join("fastembed");
245 match std::fs::create_dir_all(&cache) {
246 Ok(()) => println!(
247 "{} fastembed cache dir ready at {}",
248 "โ".green(),
249 cache.display().to_string().dimmed()
250 ),
251 Err(e) => eprintln!(
252 " {} could not pre-create {} ({e}); daemon will retry on first request.",
253 "ยท".dimmed(),
254 cache.display()
255 ),
256 }
257}
258
259/// `service start` โ install the plist (if needed) and bootstrap the agent.
260///
261/// Why: the common "I want it running" path should be one command, not two.
262/// `install` + `bootstrap` is idempotent under the shared launchd module
263/// (bootstrap calls bootout first), so calling start repeatedly is safe.
264/// What: writes the plist via `install()`, then loads it into the user's
265/// `gui/<uid>` domain via `bootstrap()`. The agent will start immediately
266/// and restart on non-zero exits per `KeepAlive::OnSuccess`.
267/// Test: integration via `cargo run -p trusty-memory -- service start`.
268#[cfg(target_os = "macos")]
269fn service_start() -> Result<()> {
270 let exe = current_exe()?;
271 let log_dir = launchd_log_dir()?;
272 let cfg = build_launchd_config(exe, log_dir.clone());
273 let plist_path = cfg.plist_path()?;
274 cfg.install()?;
275 println!(
276 "{} Wrote LaunchAgent plist: {}",
277 "โ".green(),
278 plist_path.display()
279 );
280
281 cfg.bootstrap()?;
282 let domain = format!("gui/{}", trusty_common::launchd::current_uid());
283 println!(
284 "{} Loaded {} into {} โ daemon will start automatically.",
285 "โ".green(),
286 LAUNCHD_LABEL,
287 domain
288 );
289 println!(
290 " Logs: {}\n Stop: {}",
291 log_dir.display().to_string().dimmed(),
292 "trusty-memory service stop".cyan(),
293 );
294 Ok(())
295}
296
297/// `service stop` โ boot out the agent (stop and unload).
298///
299/// Why: operators need a friendly counterpart to `start` that does not
300/// require remembering the full `launchctl bootout gui/<uid>/<label>`
301/// invocation. The shared launchd module treats "not loaded" as success, so
302/// calling stop on an unloaded agent is also a no-op.
303/// What: builds the same config used by `start`, then calls `bootout()`.
304/// Leaves the plist file in place โ re-`start` will reload it.
305/// Test: integration via `cargo run -p trusty-memory -- service stop`.
306#[cfg(target_os = "macos")]
307fn service_stop() -> Result<()> {
308 let exe = current_exe()?;
309 let log_dir = launchd_log_dir()?;
310 let cfg = build_launchd_config(exe, log_dir);
311 cfg.bootout()?;
312 println!(
313 "{} Unloaded {} (plist file preserved at {}).",
314 "โ".green(),
315 LAUNCHD_LABEL,
316 cfg.plist_path()?.display().to_string().dimmed()
317 );
318 Ok(())
319}
320
321/// `service logs` โ tail the launchd stdout/stderr log files.
322///
323/// Why: launchd routes the daemon's stdout/stderr to plain files; a friendly
324/// `tail -F` wrapper avoids forcing operators to remember the path.
325/// What: resolves the log directory and execs `tail -F <stdout> <stderr>`.
326/// Emits a hint when neither file exists yet (daemon never started).
327/// Test: side-effecting; covered manually via
328/// `cargo run -p trusty-memory -- service logs`.
329#[cfg(target_os = "macos")]
330fn service_logs() -> Result<()> {
331 let log_dir = launchd_log_dir()?;
332 let stdout = log_dir.join("stdout.log");
333 let stderr = log_dir.join("stderr.log");
334 if !stdout.exists() && !stderr.exists() {
335 eprintln!(
336 "{} No logs at {} yet โ start the service first ({}).",
337 "ยท".dimmed(),
338 log_dir.display(),
339 "trusty-memory service start".cyan()
340 );
341 return Ok(());
342 }
343 let status = std::process::Command::new("tail")
344 .arg("-F")
345 .arg(&stdout)
346 .arg(&stderr)
347 .status()
348 .map_err(|e| anyhow::anyhow!("tail failed: {e}"))?;
349 if !status.success() {
350 anyhow::bail!("tail exited with {status}");
351 }
352 Ok(())
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358
359 /// Why: on non-macOS platforms, every `service` action must surface a
360 /// clear, actionable error instead of silently succeeding or panicking.
361 /// What: invokes `handle_service` with each action and asserts the Err
362 /// message contains the "not supported" sentinel.
363 /// Test: macOS skips this (the actions perform real `launchctl` work).
364 #[cfg(not(target_os = "macos"))]
365 #[test]
366 fn handle_service_errors_on_unsupported_platform() {
367 for action in [
368 ServiceAction::Install,
369 ServiceAction::Start,
370 ServiceAction::Stop,
371 ServiceAction::Logs,
372 ] {
373 let err = handle_service(&action).expect_err("must fail on non-macOS");
374 let msg = format!("{err}");
375 assert!(
376 msg.contains("not supported"),
377 "expected platform error, got: {msg}"
378 );
379 }
380 }
381
382 /// Why: the LaunchdConfig we hand to `trusty_common::launchd` must always
383 /// describe the canonical trusty-memory agent (label, args, restart
384 /// policy, fd limits, throttle). Drift here corrupts every plist that
385 /// the binary writes.
386 /// Issue #132 specifically required that the args invoke
387 /// `serve --foreground` โ plain `serve` self-spawns and exits 0, which
388 /// launchd interprets as "service stopped" and re-launches in a tight
389 /// loop. The fd-limit and throttle assertions guard against the
390 /// fd-exhaustion / zombie-herd regression (fix A).
391 /// What: builds the config with dummy paths and asserts the
392 /// load-bearing fields, including the `--foreground` flag, fd limit,
393 /// and throttle interval.
394 /// Test: pure construction, no fs side effects.
395 #[cfg(target_os = "macos")]
396 #[test]
397 fn build_launchd_config_uses_canonical_shape() {
398 use std::path::PathBuf;
399 use trusty_common::launchd::{KeepAlive, LAUNCHD_FD_LIMIT};
400
401 let cfg = build_launchd_config(
402 PathBuf::from("/usr/local/bin/trusty-memory"),
403 PathBuf::from("/tmp/trusty-memory/logs"),
404 );
405 assert_eq!(cfg.label, LAUNCHD_LABEL);
406 assert_eq!(
407 cfg.args,
408 vec!["serve".to_string(), "--foreground".to_string()],
409 "launchd plist must invoke `serve --foreground` (issue #132) so \
410 launchd supervises the daemon PID directly instead of \
411 re-launching the self-spawning parent on every exit"
412 );
413 assert_eq!(cfg.keep_alive, KeepAlive::OnSuccess);
414 assert_eq!(
415 cfg.throttle_interval, 10,
416 "ThrottleInterval must be 10 s to prevent KeepAlive hot-loop respawn"
417 );
418 // fd_limit must be the canonical ceiling so the generated plist always
419 // includes SoftResourceLimits and HardResourceLimits (fd-exhaustion fix).
420 assert_eq!(
421 cfg.fd_limit,
422 Some(LAUNCHD_FD_LIMIT),
423 "fd_limit must be Some(LAUNCHD_FD_LIMIT) so generated plist raises \
424 both soft and hard limits to {LAUNCHD_FD_LIMIT} (fd-exhaustion fix)"
425 );
426 // env_vars is allowed to be empty only on hosts without a HOME
427 // (extremely rare); on developer/CI machines HOME is always set
428 // and FASTEMBED_CACHE_DIR must be wired in.
429 if dirs::home_dir().is_some() {
430 assert!(
431 cfg.env_vars.iter().any(|(k, _)| k == "FASTEMBED_CACHE_DIR"),
432 "FASTEMBED_CACHE_DIR must be present in the LaunchAgent plist (GH #58)"
433 );
434 }
435 }
436
437 /// Why: the generated plist XML (what launchd actually reads from disk)
438 /// must contain both resource-limit dicts with the canonical fd value.
439 /// Asserting on `render_plist()` output catches regressions where the
440 /// config struct is correct but the renderer drops the dicts.
441 /// What: renders the plist with a dummy exe/log dir and checks that the
442 /// SoftResourceLimits, HardResourceLimits, and NumberOfFiles keys appear
443 /// with the right integer value. Also asserts ThrottleInterval is present.
444 /// Test: pure string generation, no fs side effects.
445 #[cfg(target_os = "macos")]
446 #[test]
447 fn build_launchd_config_plist_includes_fd_limits_and_throttle() {
448 use std::path::PathBuf;
449 use trusty_common::launchd::LAUNCHD_FD_LIMIT;
450
451 let cfg = build_launchd_config(
452 PathBuf::from("/usr/local/bin/trusty-memory"),
453 PathBuf::from("/tmp/trusty-memory/logs"),
454 );
455 let xml = cfg.render_plist().expect("render_plist must succeed");
456
457 assert!(
458 xml.contains("<key>SoftResourceLimits</key>"),
459 "plist must contain SoftResourceLimits to raise fd ceiling"
460 );
461 assert!(
462 xml.contains("<key>HardResourceLimits</key>"),
463 "plist must contain HardResourceLimits so soft limit is not clamped below it"
464 );
465 let fd_str = format!("<integer>{LAUNCHD_FD_LIMIT}</integer>");
466 assert!(
467 xml.contains(&fd_str),
468 "plist NumberOfFiles must equal {LAUNCHD_FD_LIMIT}, got xml: {xml}"
469 );
470 assert!(
471 xml.contains("<key>ThrottleInterval</key>"),
472 "plist must contain ThrottleInterval"
473 );
474 assert!(
475 xml.contains("<integer>10</integer>"),
476 "ThrottleInterval must be 10 s"
477 );
478 }
479
480 /// Why: GH #58 โ launchd's read-only `TMPDIR` breaks fastembed's first
481 /// model download. The plist installer is the single source of truth
482 /// for the daemon's runtime environment, so the env var must be set
483 /// there. Asserting on `build_launchd_config` (not just
484 /// `fastembed_env_vars`) catches regressions where someone strips the
485 /// env list when refactoring the config builder.
486 /// What: builds the config with dummy paths and asserts the env var is
487 /// present and points under `$HOME/.cache/fastembed`.
488 /// Test: pure construction, no fs side effects.
489 #[cfg(target_os = "macos")]
490 #[test]
491 fn build_launchd_config_sets_fastembed_cache_dir() {
492 use std::path::PathBuf;
493
494 let cfg = build_launchd_config(
495 PathBuf::from("/usr/local/bin/trusty-memory"),
496 PathBuf::from("/tmp/trusty-memory/logs"),
497 );
498 if let Some(home) = dirs::home_dir() {
499 let expected = home
500 .join(".cache")
501 .join("fastembed")
502 .to_string_lossy()
503 .into_owned();
504 let dir_value = cfg
505 .env_vars
506 .iter()
507 .find(|(k, _)| k == "FASTEMBED_CACHE_DIR")
508 .map(|(_, v)| v.clone())
509 .expect("FASTEMBED_CACHE_DIR must be present");
510 assert_eq!(dir_value, expected);
511 // GH #62: also assert FASTEMBED_CACHE_PATH is present and
512 // points to the same path. Both names exist because fastembed
513 // reads `FASTEMBED_CACHE_DIR` natively, while
514 // `resolve_fastembed_cache_dir` (and our docs) prefer the
515 // `FASTEMBED_CACHE_PATH` alias.
516 let path_value = cfg
517 .env_vars
518 .iter()
519 .find(|(k, _)| k == "FASTEMBED_CACHE_PATH")
520 .map(|(_, v)| v.clone())
521 .expect("FASTEMBED_CACHE_PATH must be present (GH #62)");
522 assert_eq!(path_value, expected);
523 }
524 }
525}