Skip to main content

zero_commands/
supervisor.rs

1//! Supervisor + Auto-mode dispatch surfaces.
2//!
3//! The `zero-commands` crate does not know how the headless
4//! supervisor or the engine's Auto-mode flag are actually
5//! implemented; it only needs to know there is a typed seam for
6//! **asking** for a specific action. The adapters in
7//! `crates/zero/src/main.rs` (for the engine-facing [`AutoSource`])
8//! and in `zero-headless` (for the daemon-facing
9//! [`SupervisorSource`]) turn those asks into concrete work —
10//! an HTTP call, a launchd/systemd spawn, a Unix-socket probe.
11//!
12//! Keeping the traits here (rather than in an adapter crate)
13//! means the dispatcher, the tests, and the TUI adapters all
14//! agree on the wire shape of the "request" and the "reply" by
15//! construction. ADR-006 and M2_PLAN §5 are the source of
16//! truth for the verbs.
17
18use std::error::Error;
19use std::fmt;
20use std::time::Duration;
21
22/// A command issued to the engine's Auto-mode switch.
23///
24/// `Status` is read-only — the adapter reports the current mode
25/// without mutating engine state. `On` / `Off` are the mutating
26/// verbs; the friction ladder has already gated `On` by the time
27/// the adapter sees the request (Phase-2: `On` is Increases, `Off`
28/// / `Status` are Neutral — see `Command::risk`).
29///
30/// Distinct from [`crate::command::AutoAction`] (the user-typed
31/// subcommand which also carries `Missing` / `Unknown` for usage
32/// hints) — this enum is the *resolved* adapter request, so it
33/// only carries verbs the adapter can act on.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum AutoRequest {
36    On,
37    Off,
38    Status,
39}
40
41/// Reply from an [`AutoSource`] call. `mode` is the effective mode
42/// after the adapter acted (so `On` returns `AutoMode::On` on
43/// success, `AutoMode::Off` on adapter-side refusal), and
44/// `changed` is `true` when the call flipped the mode. The
45/// dispatcher renders different lines for the two cases so the
46/// operator is never left guessing whether the toggle actually
47/// did something.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub struct AutoReply {
50    pub mode: AutoMode,
51    pub changed: bool,
52}
53
54/// Observed Auto-mode state. Mirrors the engine's two-state
55/// switch; no `Paused` / `Transitioning` intermediate — the
56/// engine either accepts Plan verdicts or it does not.
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub enum AutoMode {
59    On,
60    Off,
61}
62
63impl AutoMode {
64    #[must_use]
65    pub const fn as_str(self) -> &'static str {
66        match self {
67            Self::On => "on",
68            Self::Off => "off",
69        }
70    }
71}
72
73/// A command issued to the operator-local supervisor daemon.
74///
75/// `Start` asks the adapter to spawn the daemon (idempotent — a
76/// second `Start` on an already-running daemon is a no-op, the
77/// dispatcher surfaces a "already running" line). `Stop` asks the
78/// adapter to signal the daemon to exit and tear down the
79/// listener socket. `Status` reports whether the daemon is alive
80/// and — if so — what pid + socket path it is using.
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub enum SupervisorAction {
83    Start,
84    Stop,
85    Status,
86}
87
88/// Reply from a [`SupervisorSource`] call. `state` is the
89/// observed state after the adapter acted. `socket` is the
90/// daemon's Unix socket path when the daemon is running. Production
91/// paths are operator-local; tests stub this to any path.
92#[derive(Debug, Clone, PartialEq, Eq)]
93pub struct SupervisorReply {
94    pub state: SupervisorState,
95    pub socket: Option<String>,
96    pub pid: Option<u32>,
97    /// `true` when the call changed daemon state (started a
98    /// stopped daemon, stopped a running one). Lets the dispatch
99    /// layer render "headless: started" vs "already running" off
100    /// a single reply shape.
101    pub changed: bool,
102    /// Monotonic uptime when the daemon is running; `None`
103    /// otherwise. Rendered in `/headless status` lines.
104    pub uptime: Option<Duration>,
105}
106
107/// Observed daemon lifecycle state.
108///
109/// `Running` / `Stopped` are the steady states. `Failed` carries
110/// a short free-form reason so a `/headless status` line can
111/// distinguish a clean stop from a crash — silent conflation would
112/// hide the 2 AM case where the daemon died in the night.
113#[derive(Debug, Clone, PartialEq, Eq)]
114pub enum SupervisorState {
115    Running,
116    Stopped,
117    Failed(String),
118}
119
120/// Errors an adapter can return.
121///
122/// Typed (rather than `Box<dyn Error>`) so the dispatcher can
123/// choose different copy for each class without string-matching
124/// on message bodies.
125#[derive(Debug, Clone, PartialEq, Eq)]
126pub enum SupervisorError {
127    /// Adapter is not configured for this invocation (no daemon
128    /// binary installed, or `--no-headless` flag set).
129    Unavailable(String),
130    /// Transport-level failure — socket gone, permission denied,
131    /// pipe closed mid-call.
132    Io(String),
133    /// The daemon refused the request (e.g. asked to stop while
134    /// already stopping). Rendered as a warn line, not an alert —
135    /// the call was understood, just not honored.
136    Refused(String),
137}
138
139impl fmt::Display for SupervisorError {
140    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
141        match self {
142            Self::Unavailable(s) => write!(f, "supervisor unavailable: {s}"),
143            Self::Io(s) => write!(f, "supervisor IO error: {s}"),
144            Self::Refused(s) => write!(f, "supervisor refused: {s}"),
145        }
146    }
147}
148
149impl Error for SupervisorError {}
150
151/// Dispatcher-side handle to the operator-local supervisor.
152///
153/// Implemented by the `zero-headless` adapter (production), by
154/// `crates/zero/src/main.rs` when stubbed (current-M2 CLI has
155/// no daemon binary yet), and by test scaffolding. When no
156/// adapter is attached to [`crate::DispatchContext`] the
157/// dispatcher emits a single "headless supervisor unavailable"
158/// alert rather than hanging — same honesty contract as
159/// [`crate::SessionSource`] on `--no-persist`.
160pub trait SupervisorSource: Send + Sync + 'static {
161    /// Issue an action to the supervisor.
162    ///
163    /// # Errors
164    /// Propagates adapter-specific failures — see
165    /// [`SupervisorError`].
166    fn act(&self, action: SupervisorAction) -> Result<SupervisorReply, SupervisorError>;
167
168    /// Tear down the daemon's listener socket as part of a
169    /// `/kill`. Idempotent: when the daemon is already stopped
170    /// this returns `Ok(false)` and the dispatch layer treats
171    /// that as "no tear-down needed". When it did tear one
172    /// down, returns `Ok(true)` so the `/kill` line can tag
173    /// the compound behavior.
174    ///
175    /// # Errors
176    /// Propagates adapter-specific failures.
177    fn tear_down_socket(&self) -> Result<bool, SupervisorError>;
178}
179
180/// Dispatcher-side handle to the engine's Auto-mode switch.
181///
182/// Production impl lives in the `zero-engine-client`-aware
183/// adapter in `crates/zero/src/main.rs`. Tests use
184/// [`MockAutoSource`]. When no adapter is attached the
185/// dispatcher surfaces "auto mode unavailable" rather than
186/// pretending — same pattern as every other optional source on
187/// [`crate::DispatchContext`].
188pub trait AutoSource: Send + Sync + 'static {
189    /// Issue an Auto-mode action.
190    ///
191    /// # Errors
192    /// Same taxonomy as [`SupervisorError`] (transport, refusal,
193    /// unavailability) — re-using the type avoids a parallel
194    /// error enum for an isomorphic surface.
195    fn act(&self, action: AutoRequest) -> Result<AutoReply, SupervisorError>;
196}
197
198/// In-memory [`AutoSource`] used by tests and offline paths.
199/// Flips the stored mode on `On` / `Off`, returns it on
200/// `Status`. `changed` is computed by comparing the requested
201/// action against the current mode, matching the production
202/// adapter's contract.
203#[derive(Debug)]
204pub struct MockAutoSource {
205    mode: std::sync::Mutex<AutoMode>,
206}
207
208impl MockAutoSource {
209    #[must_use]
210    pub fn new(initial: AutoMode) -> Self {
211        Self {
212            mode: std::sync::Mutex::new(initial),
213        }
214    }
215
216    /// Current mode. Handy for assertions.
217    #[must_use]
218    pub fn current(&self) -> AutoMode {
219        *self
220            .mode
221            .lock()
222            .unwrap_or_else(std::sync::PoisonError::into_inner)
223    }
224}
225
226impl Default for MockAutoSource {
227    fn default() -> Self {
228        Self::new(AutoMode::Off)
229    }
230}
231
232impl AutoSource for MockAutoSource {
233    fn act(&self, action: AutoRequest) -> Result<AutoReply, SupervisorError> {
234        let mut guard = self
235            .mode
236            .lock()
237            .unwrap_or_else(std::sync::PoisonError::into_inner);
238        let prior = *guard;
239        let (mode, changed) = match action {
240            AutoRequest::On => (AutoMode::On, prior != AutoMode::On),
241            AutoRequest::Off => (AutoMode::Off, prior != AutoMode::Off),
242            AutoRequest::Status => (prior, false),
243        };
244        *guard = mode;
245        Ok(AutoReply { mode, changed })
246    }
247}
248
249/// In-memory [`SupervisorSource`] used by tests and the M2
250/// CLI's own "no daemon yet" path. Tracks daemon state as a
251/// boolean and reports a stubbed socket path on `Start` /
252/// `Status` so the dispatcher copy has something concrete to
253/// print.
254#[derive(Debug)]
255pub struct MockSupervisorSource {
256    inner: std::sync::Mutex<MockSupervisorInner>,
257}
258
259#[derive(Debug)]
260struct MockSupervisorInner {
261    running: bool,
262    socket: String,
263    pid: u32,
264    started_at: std::time::Instant,
265    /// Simulate the daemon having torn itself down on a prior
266    /// `/kill`. Purely for test ergonomics.
267    socket_torn_down: bool,
268}
269
270impl MockSupervisorSource {
271    #[must_use]
272    pub fn new(running: bool) -> Self {
273        Self {
274            inner: std::sync::Mutex::new(MockSupervisorInner {
275                running,
276                socket: "<operator-socket>".to_owned(),
277                pid: 4242,
278                started_at: std::time::Instant::now(),
279                socket_torn_down: false,
280            }),
281        }
282    }
283
284    /// Observed running-state. Handy for assertions.
285    #[must_use]
286    pub fn is_running(&self) -> bool {
287        self.inner
288            .lock()
289            .unwrap_or_else(std::sync::PoisonError::into_inner)
290            .running
291    }
292
293    /// Did any prior call tear down the socket? Lets
294    /// `/kill`-compound tests assert the behavior happened
295    /// without exposing internal state on every reply.
296    #[must_use]
297    pub fn socket_torn_down(&self) -> bool {
298        self.inner
299            .lock()
300            .unwrap_or_else(std::sync::PoisonError::into_inner)
301            .socket_torn_down
302    }
303}
304
305impl Default for MockSupervisorSource {
306    fn default() -> Self {
307        Self::new(false)
308    }
309}
310
311impl SupervisorSource for MockSupervisorSource {
312    fn act(&self, action: SupervisorAction) -> Result<SupervisorReply, SupervisorError> {
313        let mut inner = self
314            .inner
315            .lock()
316            .unwrap_or_else(std::sync::PoisonError::into_inner);
317        match action {
318            SupervisorAction::Start => {
319                let changed = !inner.running;
320                if changed {
321                    inner.running = true;
322                    inner.started_at = std::time::Instant::now();
323                    inner.socket_torn_down = false;
324                }
325                Ok(SupervisorReply {
326                    state: SupervisorState::Running,
327                    socket: Some(inner.socket.clone()),
328                    pid: Some(inner.pid),
329                    changed,
330                    uptime: Some(inner.started_at.elapsed()),
331                })
332            }
333            SupervisorAction::Stop => {
334                let changed = inner.running;
335                inner.running = false;
336                Ok(SupervisorReply {
337                    state: SupervisorState::Stopped,
338                    socket: None,
339                    pid: None,
340                    changed,
341                    uptime: None,
342                })
343            }
344            SupervisorAction::Status => {
345                if inner.running {
346                    Ok(SupervisorReply {
347                        state: SupervisorState::Running,
348                        socket: Some(inner.socket.clone()),
349                        pid: Some(inner.pid),
350                        changed: false,
351                        uptime: Some(inner.started_at.elapsed()),
352                    })
353                } else {
354                    Ok(SupervisorReply {
355                        state: SupervisorState::Stopped,
356                        socket: None,
357                        pid: None,
358                        changed: false,
359                        uptime: None,
360                    })
361                }
362            }
363        }
364    }
365
366    fn tear_down_socket(&self) -> Result<bool, SupervisorError> {
367        let mut inner = self
368            .inner
369            .lock()
370            .unwrap_or_else(std::sync::PoisonError::into_inner);
371        if inner.running {
372            inner.running = false;
373            inner.socket_torn_down = true;
374            Ok(true)
375        } else {
376            // Already stopped — nothing to tear down. The compound
377            // `/kill` line still renders; it just omits the
378            // headless tag.
379            Ok(false)
380        }
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::{
387        AutoMode, AutoRequest, AutoSource, MockAutoSource, MockSupervisorSource, SupervisorAction,
388        SupervisorSource, SupervisorState,
389    };
390
391    #[test]
392    fn mock_auto_flips_on_then_is_idempotent() {
393        let src = MockAutoSource::new(AutoMode::Off);
394        let first = src.act(AutoRequest::On).unwrap();
395        assert!(first.changed);
396        assert_eq!(first.mode, AutoMode::On);
397        let again = src.act(AutoRequest::On).unwrap();
398        assert!(!again.changed);
399        assert_eq!(again.mode, AutoMode::On);
400    }
401
402    #[test]
403    fn mock_auto_status_is_pure() {
404        let src = MockAutoSource::new(AutoMode::On);
405        let reply = src.act(AutoRequest::Status).unwrap();
406        assert!(!reply.changed);
407        assert_eq!(reply.mode, AutoMode::On);
408        assert_eq!(src.current(), AutoMode::On);
409    }
410
411    #[test]
412    fn mock_supervisor_start_then_status_reports_running() {
413        let src = MockSupervisorSource::new(false);
414        let started = src.act(SupervisorAction::Start).unwrap();
415        assert!(started.changed);
416        assert_eq!(started.state, SupervisorState::Running);
417        let status = src.act(SupervisorAction::Status).unwrap();
418        assert!(!status.changed);
419        assert_eq!(status.state, SupervisorState::Running);
420        assert_eq!(status.socket.as_deref(), Some("<operator-socket>"));
421    }
422
423    #[test]
424    fn mock_supervisor_tear_down_only_when_running() {
425        let src = MockSupervisorSource::new(true);
426        assert!(src.tear_down_socket().unwrap());
427        assert!(!src.is_running());
428        assert!(src.socket_torn_down());
429        // Second call is a no-op.
430        assert!(!src.tear_down_socket().unwrap());
431    }
432}