trusty_memory/foreground.rs
1//! Supervised `serve --foreground` entry point (issue #787).
2//!
3//! Why: extracted from `lib.rs` to keep that file under the 500-line-cap
4//! ratchet budget. All three issue-#787 fixes (lock file ownership, http_addr
5//! guarantee, bind-abort-on-collision) live here so they are easy to find and
6//! test together.
7//! What: exports `bind_foreground_port` (Fix C) and `run_http_foreground`
8//! (Fix A + Fix B + Fix C combined entry point) for use by `main.rs`.
9//! Test: `bind_foreground_port_refuses_collision` (unit, real TCP bind);
10//! `daemon_lock` module tests cover the lock-file logic.
11
12use anyhow::Result;
13use std::net::SocketAddr;
14
15use crate::DEFAULT_HTTP_PORT;
16#[cfg(feature = "axum-server")]
17use crate::{commands::daemon_lock, run_http_on, AppState};
18
19/// Bind a `TcpListener` to `127.0.0.1:DEFAULT_HTTP_PORT` for the
20/// launchd/supervisor `serve --foreground` path — abort on collision.
21///
22/// Why (issue #787, Fix C): the generic `bind_dynamic_port` silently walks
23/// 7070→7071→…→7079 when port 7070 is taken, then falls back to an
24/// OS-assigned port. Under `serve --foreground` (the launchd path) this
25/// produces a hidden second daemon on a different port rather than failing
26/// loudly. The correct behavior for a supervised `serve --foreground` is to
27/// abort with a clear error when port 7070 is already bound — the existing
28/// daemon is already serving and the new instance should not start at all.
29/// The `single_instance_check` in `main.rs` catches the live-daemon case
30/// before reaching this function; the only cases that slip through are
31/// non-trusty-memory processes bound to 7070 (a genuine conflict) and race
32/// windows (two simultaneous launchd respawns) — both are better served by
33/// a loud error than a silent port-walk.
34/// What: attempts to bind exactly `127.0.0.1:DEFAULT_HTTP_PORT`. Returns
35/// `Err` with a clear human-readable message when the port is already in
36/// use (EADDRINUSE), so callers can surface it to launchd's log and the
37/// user's terminal instead of silently spawning on a different port.
38/// Test: `bind_foreground_port_refuses_collision` occupies port 7070 then
39/// asserts `bind_foreground_port` returns `Err` containing "already in use".
40pub async fn bind_foreground_port() -> Result<tokio::net::TcpListener> {
41 let addr: SocketAddr = SocketAddr::from(([127, 0, 0, 1], DEFAULT_HTTP_PORT));
42 tokio::net::TcpListener::bind(addr).await.map_err(|e| {
43 if e.kind() == std::io::ErrorKind::AddrInUse {
44 anyhow::anyhow!(
45 "port {} is already in use — another trusty-memory daemon is \
46 likely running. Check with `trusty-memory port` or \
47 `lsof -nP -iTCP:{} -sTCP:LISTEN`. If the existing daemon is \
48 healthy, this instance will exit 0 (expected under launchd \
49 KeepAlive). If the existing daemon is stale, run \
50 `trusty-memory stop` first.",
51 DEFAULT_HTTP_PORT,
52 DEFAULT_HTTP_PORT
53 )
54 } else {
55 anyhow::anyhow!("bind {}:{}: {e}", addr.ip(), DEFAULT_HTTP_PORT)
56 }
57 })
58}
59
60/// The canonical `serve --foreground` entry point used by launchd and systemd
61/// supervisors (issue #787).
62///
63/// Why (issue #787): previously `serve --foreground` shared the same
64/// `run_http_dynamic` path used by ad-hoc CLI invocations. That path
65/// silently port-walked (7070→7071→…→7079→OS-assigned) on bind collision,
66/// producing hidden second instances that never appeared in the `http_addr`
67/// discovery file at the expected port. This function replaces that path
68/// for the supervised case with three explicit guarantees:
69///
70/// 1. **Lock file ownership** (Fix A): writes `daemon.lock` containing the
71/// current PID before binding. The RAII guard removes the file on any
72/// exit (graceful shutdown, panic, launchd SIGTERM). `start` and the
73/// single-instance guard read this file as a second detection layer when
74/// `http_addr` is absent or stale.
75///
76/// 2. **http_addr written on bind** (Fix B): `run_http_on` writes both the
77/// OS-standard `http_addr` file and the legacy dotfile path
78/// (`~/.trusty-memory/http_addr`) immediately after binding, before
79/// accepting the first request. Both files are removed on clean shutdown.
80/// This ensures `trusty-memory port` and the MCP bridge always find the
81/// running daemon.
82///
83/// 3. **Abort on port collision** (Fix C): uses `bind_foreground_port`
84/// (binds exactly port 7070, returns `Err` on `EADDRINUSE`) instead of
85/// the port-walking `bind_dynamic_port`. If 7070 is already taken the
86/// function returns `Err` with a clear message; the caller (`main.rs`)
87/// exits non-zero, launchd logs the error, applies `ThrottleInterval`,
88/// and the single-instance guard prevents a respawn storm.
89///
90/// What: acquires the daemon lock, binds `127.0.0.1:7070` (aborts on
91/// collision), then runs `run_http_on` which writes the addr file and
92/// serves until graceful shutdown. The lock guard is dropped after
93/// `run_http_on` returns, removing `daemon.lock` best-effort.
94///
95/// Test: `bind_foreground_port_refuses_collision` (unit), plus the
96/// integration path `trusty-memory service start` followed by a second
97/// `trusty-memory serve --foreground` which should exit immediately with
98/// the "already in use" error.
99#[cfg(feature = "axum-server")]
100pub async fn run_http_foreground(state: AppState) -> Result<()> {
101 // Fix A (issue #787): acquire and own the daemon PID lock file for the
102 // duration of this daemon instance. A stale lock (dead PID) is reclaimed
103 // automatically; a live lock causes an `Err` that propagates to `main`,
104 // which exits non-zero so launchd logs the message and applies its
105 // ThrottleInterval instead of hot-looping. The RAII guard removes the
106 // file on any exit path (clean, panic, SIGTERM→graceful-shutdown).
107 let lock_path = daemon_lock::lock_file_path();
108 let _lock_guard: Option<daemon_lock::DaemonLock> = match lock_path {
109 Some(ref p) => match daemon_lock::acquire_lock(p) {
110 Ok(g) => {
111 tracing::info!(
112 "daemon lock acquired at {} (pid {})",
113 p.display(),
114 std::process::id()
115 );
116 Some(g)
117 }
118 Err(e) => {
119 // Lock held by a live process: this is a real duplicate-start.
120 // Exit non-zero so launchd records the error in its log and
121 // respects ThrottleInterval. The single-instance guard in
122 // main.rs handles the healthy-daemon case (exit 0); this
123 // branch handles the edge case where the addr probe passed
124 // but the lock is still contested.
125 return Err(e.context(
126 "serve --foreground refused to start: daemon lock held by live process",
127 ));
128 }
129 },
130 None => {
131 // No data dir resolvable (container, no $HOME). Log and continue
132 // without a lock — discovery files also won't work in this env.
133 tracing::warn!(
134 "could not resolve data dir for daemon lock — \
135 starting without PID lock (containerised / no $HOME)"
136 );
137 None
138 }
139 };
140
141 // Fix C (issue #787): bind exactly port 7070, abort on collision.
142 // `bind_foreground_port` returns `Err(EADDRINUSE)` when 7070 is taken.
143 // Under launchd, the single-instance guard already exits 0 before we
144 // reach this point when a healthy daemon is on 7070; `Err` here means a
145 // non-trusty-memory process owns the port, which is a genuine conflict.
146 let listener = bind_foreground_port().await?;
147
148 // Fix B (issue #787): `run_http_on` writes `http_addr` (and the dotfile
149 // path) immediately after binding, before accepting the first request, so
150 // `trusty-memory port` and the MCP bridge can locate this daemon
151 // immediately. Both files are removed on graceful shutdown.
152 run_http_on(state, listener).await
153}
154
155#[cfg(test)]
156mod tests {
157 use super::*;
158
159 /// Why (issue #787, Fix C): `bind_foreground_port` must return `Err`
160 /// when port 7070 is already in use rather than silently walking to
161 /// the next free port. This is the key guard against a launchd
162 /// respawn colliding with the live daemon and producing an orphan on
163 /// port 7071+. The single-instance guard catches the healthy-daemon
164 /// case (exits 0) before this function is reached; this test covers
165 /// the "port taken by a non-trusty-memory process" scenario.
166 /// What: occupies `127.0.0.1:DEFAULT_HTTP_PORT`, then asserts that
167 /// `bind_foreground_port` returns `Err` containing "already in use".
168 /// Test: itself (real TCP bind, no daemon spawned).
169 #[tokio::test]
170 async fn bind_foreground_port_refuses_collision() {
171 use std::net::SocketAddr;
172 // Occupy the default port with a plain TcpListener (not trusty-memory).
173 let addr: SocketAddr = SocketAddr::from(([127, 0, 0, 1], DEFAULT_HTTP_PORT));
174 let Ok(_holder) = tokio::net::TcpListener::bind(addr).await else {
175 // Port already taken by something else on the CI host — skip the
176 // test rather than fail spuriously.
177 return;
178 };
179 // Now `bind_foreground_port` must refuse.
180 let result = bind_foreground_port().await;
181 assert!(
182 result.is_err(),
183 "bind_foreground_port must return Err when port {DEFAULT_HTTP_PORT} \
184 is already bound; got Ok"
185 );
186 let msg = format!("{}", result.unwrap_err());
187 assert!(
188 msg.contains("already in use"),
189 "error message must mention 'already in use'; got: {msg}"
190 );
191 }
192}