babysit 0.8.2

Wrap a shell command in a PTY and expose it to external AI agents (Claude / Codex) via subcommands
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
use crate::attach;
use crate::control::{self, Handle, LoopMessage};
use crate::pane::{ExitInfo, OutputHub, Pane};
use crate::paths;
use crate::session::{self, Meta, State, Status};
use anyhow::{Context, Result, anyhow};
use chrono::Utc;
use std::io::{IsTerminal, Write};
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Duration;
use tokio::sync::{mpsc, watch};

/// Entry point for `babysit run` / `babysit -- …` / `babysit -d -- …`.
///
/// Architecture (tmux-style): the wrapped command always runs under a
/// headless *worker* process that owns the PTY, the control socket, and the
/// output fan-out. Foreground terminals are just *clients* attached over the
/// socket. `run` spawns the worker and (unless `-d`) attaches to it; `-d`
/// spawns the worker and returns immediately.
#[allow(clippy::too_many_arguments)] // top-level CLI entry; each arg is a distinct flag
pub async fn run(
    cmd: Vec<String>,
    id: Option<String>,
    detach: bool,
    detached_id: Option<String>,
    no_tty: bool,
    timeout: Option<String>,
    idle_timeout: Option<String>,
    size: Option<String>,
    json: bool,
) -> Result<i32> {
    // Parse the inputs up front so a bad value errors before we spawn.
    // Use parse_timeout (not parse_duration) so `0`/`none`/`off`/`never` mean
    // "no timeout" here too, consistent with wait/expect/wait-idle — otherwise
    // `--timeout 0s` would auto-kill the command immediately.
    let timeout = timeout.as_deref().map(parse_timeout).transpose()?.flatten();
    let idle_timeout = idle_timeout
        .as_deref()
        .map(parse_timeout)
        .transpose()?
        .flatten();
    let size = size.as_deref().map(parse_size).transpose()?;

    // We are the detached worker (re-exec'd with --detached-id): run the
    // headless server loop and never come back until the command exits.
    if let Some(worker_id) = detached_id {
        serve_worker(cmd, worker_id, !no_tty, timeout, idle_timeout, size).await?;
        return Ok(0);
    }

    // Parent: choose the id, announce it, spawn the worker.
    let session_id = session::make_id(id).await?;
    if json {
        // Machine-readable: an agent captures `.id` without scraping prose.
        println!("{}", serde_json::json!({ "id": session_id }));
        let _ = std::io::stdout().flush();
    } else {
        print_banner(&session_id, &cmd.join(" "));
    }
    spawn_worker_process(&cmd, &session_id, no_tty, timeout, idle_timeout, size)?;

    if detach {
        return Ok(0);
    }
    // Attached run: stream the session until it exits or we detach. Use the
    // id directly (skip resolution) since the worker may not have written the
    // session dir yet — connect_retry waits for its socket.
    attach::attach_to(session_id).await
}

/// The headless worker: owns the PTY + control socket, fans output out to
/// attached clients, and supervises restarts until the command exits.
async fn serve_worker(
    cmd: Vec<String>,
    id: String,
    tty: bool,
    timeout: Option<Duration>,
    idle_timeout: Option<Duration>,
    size: Option<(u16, u16)>,
) -> Result<()> {
    let meta = Meta {
        id: id.clone(),
        cmd: cmd.clone(),
        babysit_pid: std::process::id(),
        started_at: Utc::now(),
    };
    session::write_meta(&meta).await?;
    session::write_status(&id, &Status::starting()).await?;

    // No terminal here (stdio is /dev/null); start at the requested size or a
    // sane default. Attached clients send their real size via a resize frame.
    let (cols, rows) = size.unwrap_or((80, 24));

    let log_path = paths::output_log_path(&id)?;
    let env = vec![("BABYSIT_SESSION_ID".into(), id.clone())];
    let hub = OutputHub::new();
    let pane = match Pane::spawn(&cmd, rows, cols, &env, Some(&log_path), hub.clone(), tty) {
        Ok(p) => Arc::new(p),
        Err(e) => {
            // Don't leave the session stuck in `starting` forever.
            let _ = session::write_status(
                &id,
                &Status {
                    state: State::Exited,
                    child_pid: None,
                    exit_code: None,
                    last_change: Utc::now(),
                },
            )
            .await;
            return Err(e);
        }
    };

    session::write_status(
        &id,
        &Status {
            state: State::Running,
            child_pid: pane.pid,
            exit_code: None,
            last_change: Utc::now(),
        },
    )
    .await?;

    let (action_tx, mut action_rx) = mpsc::unbounded_channel::<LoopMessage>();
    let (exit_tx, exit_rx) = watch::channel::<Option<ExitInfo>>(None);
    let (detach_tx, _detach_rx0) = watch::channel::<u64>(0);
    let detach_tx = Arc::new(detach_tx);
    let attached = Arc::new(AtomicUsize::new(0));
    let handle = Handle::new(
        id.clone(),
        pane.clone(),
        action_tx,
        hub.clone(),
        exit_rx,
        detach_tx,
        attached.clone(),
    );
    control::serve(handle.clone()).await?;

    let mut current_pane = pane;
    let info: Option<ExitInfo>;
    // Optional auto-kill deadline. Fires once; after that the branch is
    // disabled so we don't busy-loop re-killing.
    let timeout_at = timeout.map(|d| tokio::time::Instant::now() + d);
    let mut timed_out = false;

    // Optional inactivity watchdog: poll the pane's idle time and kill once it
    // exceeds the limit. Polled (rather than event-driven) since output
    // arrives on a blocking reader thread.
    let idle_limit_ms = idle_timeout.map(|d| d.as_millis() as u64);
    let mut idle_tick = idle_limit_ms.map(|_| tokio::time::interval(Duration::from_millis(500)));
    let mut idle_killed = false;

    loop {
        let exit_notify = current_pane.exit_notify.clone();
        tokio::select! {
            _ = async {
                match timeout_at {
                    Some(t) => tokio::time::sleep_until(t).await,
                    None => std::future::pending::<()>().await,
                }
            }, if !timed_out => {
                timed_out = true;
                current_pane.kill();
            }
            _ = async {
                match idle_tick.as_mut() {
                    Some(t) => { t.tick().await; }
                    None => std::future::pending::<()>().await,
                }
            }, if !idle_killed => {
                if let Some(limit) = idle_limit_ms
                    && current_pane.idle_ms() >= limit
                {
                    idle_killed = true;
                    current_pane.kill();
                }
            }
            Some(msg) = action_rx.recv() => match msg {
                LoopMessage::Restart => {
                    current_pane.kill();
                    current_pane.exit_notify.notified().await;
                    let new_pane = Arc::new(Pane::spawn(&cmd, rows, cols, &env, Some(&log_path), hub.clone(), tty)?);
                    handle.replace_cmd_pane(new_pane.clone()).await;
                    session::write_status(&id, &Status {
                        state: State::Running,
                        child_pid: new_pane.pid,
                        exit_code: None,
                        last_change: Utc::now(),
                    }).await?;
                    current_pane = new_pane;
                }
            },
            _ = exit_notify.notified() => {
                info = current_pane.exit_info();
                let signaled = info.map(|i| i.signaled).unwrap_or(true);
                let state = if signaled { State::Killed } else { State::Exited };
                session::write_status(&id, &Status {
                    state,
                    child_pid: None,
                    exit_code: info.and_then(|i| i.code),
                    last_change: Utc::now(),
                }).await?;
                break;
            }
        }
    }

    // Let the reader thread drain the final PTY output to the log and to any
    // attached clients' queues (bounded so lingering PTY holders can't wedge
    // shutdown), then tell attached clients the exit code.
    let _ = tokio::time::timeout(
        std::time::Duration::from_millis(500),
        current_pane.reader_done.notified(),
    )
    .await;
    let _ = exit_tx.send(Some(info.unwrap_or(ExitInfo {
        code: None,
        signaled: true,
    })));

    // Wait (bounded) for attached clients to flush the remaining output and
    // the exit frame and disconnect, so the live view isn't truncated. The
    // on-disk log already has everything regardless.
    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(2);
    while attached.load(Ordering::SeqCst) > 0 && std::time::Instant::now() < deadline {
        tokio::time::sleep(std::time::Duration::from_millis(20)).await;
    }
    control::cleanup(&id);
    Ok(())
}

/// Print the session-id banner to the user's terminal.
fn print_banner(id: &str, cmd_title: &str) {
    let (on, off) = if std::io::stdout().is_terminal() {
        ("\x1b[1;36m", "\x1b[0m")
    } else {
        ("", "")
    };
    println!("babysit session {on}{id}{off}: {cmd_title}");
    println!("  babysit log -s {on}{id}{off} --tail 200");
    println!("  babysit attach -s {on}{id}{off}");
    let _ = std::io::stdout().flush();
}

/// Re-exec babysit as a detached worker that supervises `cmd` in the
/// background. The worker gets its own session (setsid) so it survives the
/// parent and the user's shell exiting, and its stdio is detached to
/// /dev/null (output is captured to the log and fanned out to attached
/// clients). The chosen `id` is handed down via --detached-id.
fn spawn_worker_process(
    cmd: &[String],
    id: &str,
    no_tty: bool,
    timeout: Option<Duration>,
    idle_timeout: Option<Duration>,
    size: Option<(u16, u16)>,
) -> Result<()> {
    use std::process::{Command, Stdio};

    let exe = std::env::current_exe().context("locating the babysit executable")?;
    let mut command = Command::new(exe);
    command.arg("run").arg("--detached-id").arg(id);
    if no_tty {
        command.arg("--no-tty");
    }
    if let Some(d) = timeout {
        // Pass milliseconds so a sub-second timeout isn't truncated to 0s when
        // re-exec'd into the worker.
        command.arg("--timeout").arg(format!("{}ms", d.as_millis()));
    }
    if let Some(d) = idle_timeout {
        command
            .arg("--idle-timeout")
            .arg(format!("{}ms", d.as_millis()));
    }
    if let Some((c, r)) = size {
        command.arg("--size").arg(format!("{c}x{r}"));
    }
    command.arg("--").args(cmd);
    command
        .stdin(Stdio::null())
        .stdout(Stdio::null())
        .stderr(Stdio::null());

    #[cfg(unix)]
    {
        use std::os::unix::process::CommandExt;
        // New session: detach from the controlling terminal and the parent's
        // process group so the worker isn't killed when the shell exits or
        // sends Ctrl-C to the foreground group.
        unsafe {
            command.pre_exec(|| {
                nix::unistd::setsid().map_err(|e| std::io::Error::from_raw_os_error(e as i32))?;
                Ok(())
            });
        }
    }

    command
        .spawn()
        .context("spawning detached babysit worker")?;
    Ok(())
}

/// Parse a `--timeout` value into an optional deadline. The sentinels `0`,
/// `none`, `off`, `never` (and the empty string) mean "no timeout" and yield
/// `None`; everything else parses as a normal duration. A zero duration
/// (e.g. `0s`) is also treated as "no timeout".
pub fn parse_timeout(s: &str) -> Result<Option<Duration>> {
    let t = s.trim();
    if t.is_empty() || matches!(t.to_ascii_lowercase().as_str(), "none" | "off" | "never") {
        return Ok(None);
    }
    let d = parse_duration(t)?;
    Ok(if d.is_zero() { None } else { Some(d) })
}

/// Parse a human duration like `500ms`, `30s`, `10m`, `2h`, `1d`, or a bare
/// number of seconds, into a `Duration`.
pub fn parse_duration(s: &str) -> Result<Duration> {
    let s = s.trim();
    if s.is_empty() {
        return Err(anyhow!("empty duration"));
    }
    // Milliseconds first, since `ms` ends in `s` and would otherwise be read
    // as seconds.
    if let Some(num) = s.strip_suffix("ms").or_else(|| s.strip_suffix("MS")) {
        let n: u64 = num
            .trim()
            .parse()
            .map_err(|_| anyhow!("invalid duration `{s}` (use e.g. 500ms, 30s, 10m, 2h)"))?;
        return Ok(Duration::from_millis(n));
    }
    let (num, unit_secs) = match s.as_bytes()[s.len() - 1] {
        b's' | b'S' => (&s[..s.len() - 1], 1u64),
        b'm' | b'M' => (&s[..s.len() - 1], 60),
        b'h' | b'H' => (&s[..s.len() - 1], 3600),
        b'd' | b'D' => (&s[..s.len() - 1], 86400),
        _ => (s, 1),
    };
    let n: u64 = num
        .trim()
        .parse()
        .map_err(|_| anyhow!("invalid duration `{s}` (use e.g. 500ms, 30s, 10m, 2h)"))?;
    Ok(Duration::from_secs(n * unit_secs))
}

/// Parse a `COLSxROWS` geometry string like `120x40` into `(cols, rows)`.
pub fn parse_size(s: &str) -> Result<(u16, u16)> {
    let (c, r) = s
        .split_once(['x', 'X'])
        .ok_or_else(|| anyhow!("invalid size `{s}` (use COLSxROWS, e.g. 120x40)"))?;
    let cols: u16 = c
        .trim()
        .parse()
        .map_err(|_| anyhow!("invalid columns in `{s}`"))?;
    let rows: u16 = r
        .trim()
        .parse()
        .map_err(|_| anyhow!("invalid rows in `{s}`"))?;
    if cols == 0 || rows == 0 {
        return Err(anyhow!("size must be non-zero (got `{s}`)"));
    }
    Ok((cols, rows))
}

#[cfg(test)]
mod tests {
    use super::{parse_duration, parse_timeout};
    use std::time::Duration;

    #[test]
    fn timeout_sentinels_mean_infinite() {
        // `0`, zero durations and the word forms => no deadline.
        assert_eq!(parse_timeout("0").unwrap(), None);
        assert_eq!(parse_timeout("0s").unwrap(), None);
        assert_eq!(parse_timeout("none").unwrap(), None);
        assert_eq!(parse_timeout("OFF").unwrap(), None);
        assert_eq!(parse_timeout("never").unwrap(), None);
        assert_eq!(parse_timeout("").unwrap(), None);
        // A real duration parses through.
        assert_eq!(parse_timeout("30s").unwrap(), Some(Duration::from_secs(30)));
        assert!(parse_timeout("abc").is_err());
    }

    #[test]
    fn parses_units_and_bare_seconds() {
        assert_eq!(parse_duration("30s").unwrap(), Duration::from_secs(30));
        assert_eq!(parse_duration("10m").unwrap(), Duration::from_secs(600));
        assert_eq!(parse_duration("2h").unwrap(), Duration::from_secs(7200));
        assert_eq!(parse_duration("1d").unwrap(), Duration::from_secs(86400));
        assert_eq!(parse_duration("45").unwrap(), Duration::from_secs(45));
    }

    #[test]
    fn rejects_garbage() {
        assert!(parse_duration("").is_err());
        assert!(parse_duration("abc").is_err());
        assert!(parse_duration("10x").is_err());
    }

    #[test]
    fn parses_milliseconds() {
        assert_eq!(parse_duration("500ms").unwrap(), Duration::from_millis(500));
        assert_eq!(parse_duration("0ms").unwrap(), Duration::from_millis(0));
        // `ms` must win over the `s`-suffix branch.
        assert_ne!(parse_duration("500ms").unwrap(), Duration::from_secs(500));
        assert!(parse_duration("ms").is_err());
    }

    #[test]
    fn parses_size() {
        use super::parse_size;
        assert_eq!(parse_size("120x40").unwrap(), (120, 40));
        assert_eq!(parse_size("80X24").unwrap(), (80, 24));
        assert!(parse_size("120").is_err());
        assert!(parse_size("0x10").is_err());
        assert!(parse_size("axb").is_err());
    }
}