pitboss 0.3.0

CLI that orchestrates coding agents (Claude Code and others) through a phased implementation plan, with automatic test/commit loops and a TUI dashboard
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
//! Agent abstraction — the single pluggable surface for every role.
//!
//! Phase 7 nails the trait shape down once. Concrete implementations
//! ([`dry_run::DryRunAgent`] for tests, [`claude_code::ClaudeCodeAgent`] for
//! production in phase 8) plug into the same [`Agent::run`] contract and the
//! runner driving them stays identical.
//!
//! ## Shape
//!
//! - [`AgentRequest`] is the per-dispatch input. Composed once by the runner
//!   from `config.toml`, the active phase, and the prompt template.
//! - [`AgentEvent`] is streamed on the caller-supplied
//!   [`tokio::sync::mpsc::Sender`] while the agent runs. Events are best-effort
//!   — if the receiver is dropped, the agent keeps running and continues to
//!   write to its log file.
//! - [`AgentOutcome`] is the terminal value. [`StopReason`] tells the runner
//!   which terminator fired (natural exit, timeout, cancel, internal error).
//!
//! Implementations **must** honor both the supplied `cancel`
//! [`tokio_util::sync::CancellationToken`] and `req.timeout`.

pub mod aider;
pub mod backend;
pub mod claude_code;
pub mod codex;
pub mod dry_run;
pub mod gemini;
pub mod subprocess;

use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;

use anyhow::Result;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken;

use crate::state::TokenUsage;

pub use subprocess::{run_logged, run_logged_with_stdin, SubprocessOutcome};

/// Which agent role is being dispatched.
///
/// Round-trips through serde as the lowercase string used in `config.toml`'s
/// `[models]` keys, so a single source of truth covers config and runtime.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Role {
    /// `pitboss plan` — generates a fresh `plan.md` from a goal.
    Planner,
    /// Per-phase implementation pass — the bulk of token spend.
    Implementer,
    /// Post-phase, pre-commit audit pass.
    Auditor,
    /// Test-failure fix-up pass; bounded by `retries.fixer_max_attempts`.
    Fixer,
}

impl Role {
    /// String name matching the `config.toml` `[models]` key. Stable.
    pub fn as_str(self) -> &'static str {
        match self {
            Role::Planner => "planner",
            Role::Implementer => "implementer",
            Role::Auditor => "auditor",
            Role::Fixer => "fixer",
        }
    }
}

impl std::fmt::Display for Role {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.write_str(self.as_str())
    }
}

/// Everything the runner hands an agent to dispatch it once.
#[derive(Debug, Clone)]
pub struct AgentRequest {
    /// Which role this dispatch fills.
    pub role: Role,
    /// Model identifier passed verbatim to the underlying agent (e.g., the
    /// `--model` flag of the `claude` CLI). Validation is the agent's job.
    pub model: String,
    /// System prompt template, fully substituted.
    pub system_prompt: String,
    /// User prompt body, fully substituted.
    pub user_prompt: String,
    /// Working directory the agent should operate in.
    pub workdir: PathBuf,
    /// Per-attempt log file the agent must tee its output into. The agent
    /// creates this file (and any parent dirs) if it does not exist.
    pub log_path: PathBuf,
    /// Hard wall-clock cap. If the agent is still running when this elapses
    /// the impl must terminate it and return [`StopReason::Timeout`].
    pub timeout: Duration,
    /// Extra environment variables the agent must apply to any subprocess it
    /// spawns. The grind runner uses this to surface `PITBOSS_RUN_ID`,
    /// `PITBOSS_PROMPT_NAME`, `PITBOSS_SUMMARY_FILE`, `PITBOSS_SCRATCHPAD`,
    /// and `PITBOSS_SESSION_SEQ` to the dispatched agent. The phased `play`
    /// runner leaves this empty.
    pub env: HashMap<String, String>,
}

/// Streaming events emitted while an agent runs.
///
/// Implementations are responsible for ordering and channel delivery. Sends
/// are best-effort: a closed receiver does not abort the run.
#[derive(Debug, Clone)]
pub enum AgentEvent {
    /// One line of standard output from the underlying process.
    Stdout(String),
    /// One line of standard error from the underlying process.
    Stderr(String),
    /// Incremental token usage update — runner sums these into the running
    /// [`TokenUsage`] total for the active role.
    TokenDelta(TokenUsage),
    /// Tool invocation announced by the agent (used by the dashboard/logger).
    ToolUse(String),
}

/// Final result of a single agent dispatch.
#[derive(Debug, Clone)]
pub struct AgentOutcome {
    /// Underlying process exit code. `-1` for non-process outcomes
    /// (timeout, cancel, internal errors).
    pub exit_code: i32,
    /// Why the agent stopped.
    pub stop_reason: StopReason,
    /// Total token usage observed across the run, attributable to
    /// `req.role`. `by_role` may be left empty by impls that only know
    /// totals; the runner re-keys before persisting into [`crate::state::RunState`].
    pub tokens: TokenUsage,
    /// Echo of the request's `log_path`, returned for convenience so callers
    /// don't have to plumb the request through to where the log is consumed.
    pub log_path: PathBuf,
}

/// Why an agent stopped running.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StopReason {
    /// Agent ran to natural completion. `exit_code` may still be non-zero.
    Completed,
    /// Agent exceeded `AgentRequest::timeout` and was terminated.
    Timeout,
    /// Caller's [`CancellationToken`] was triggered and the agent terminated.
    Cancelled,
    /// Internal error preventing normal completion (failed to spawn, agent
    /// protocol parse error, etc.). Carries a human-readable message.
    Error(String),
}

/// Single pluggable abstraction for every agent role.
///
/// Implementations must:
/// 1. Stream `AgentEvent`s on `events` while running (best-effort sends).
/// 2. Honor `cancel` — `CancellationToken::cancelled()` resolves means stop.
/// 3. Honor `req.timeout` — internal wall clock, not the runner's job.
/// 4. Return an [`AgentOutcome`] with a [`StopReason`] reflecting which
///    terminator fired. Internal errors return `Ok(outcome)` with
///    [`StopReason::Error`] rather than `Err(_)`; the `Err` channel is for
///    setup failures (couldn't open log file, couldn't spawn subprocess at
///    all, etc.).
#[async_trait]
pub trait Agent: Send + Sync {
    /// Short identifier for log lines (e.g., `"claude-code"`, `"dry-run"`).
    fn name(&self) -> &str;

    /// Run the agent to completion (or until cancelled / timed out).
    async fn run(
        &self,
        req: AgentRequest,
        events: mpsc::Sender<AgentEvent>,
        cancel: CancellationToken,
    ) -> Result<AgentOutcome>;
}

/// Blanket impl so `Box<dyn Agent + Send + Sync>` satisfies the `Agent`
/// bound the runner and CLI helpers carry. Enables [`build_agent`] to return
/// a heap-allocated trait object that flows through generic call sites
/// (`Runner::new<A: Agent + 'static>`, `run_with_agent<A: Agent>`) without
/// every caller having to depend on the concrete backend type.
#[async_trait]
impl<A: Agent + ?Sized> Agent for Box<A> {
    fn name(&self) -> &str {
        (**self).name()
    }

    async fn run(
        &self,
        req: AgentRequest,
        events: mpsc::Sender<AgentEvent>,
        cancel: CancellationToken,
    ) -> Result<AgentOutcome> {
        (**self).run(req, events, cancel).await
    }
}

/// Construct the agent the runner should dispatch through, based on
/// `config.toml`'s `[agent] backend` selector.
///
/// A missing or absent `backend` falls back to [`backend::BackendKind::default`]
/// (Claude Code) so workspaces without an `[agent]` section keep today's
/// behavior. Unknown backend strings surface a parse error from
/// [`backend::BackendKind`]'s [`std::str::FromStr`] impl. Each known backend
/// (`claude_code`, `codex`, `aider`, `gemini`) builds its own adapter, with
/// the matching `[agent.<backend>]` sub-table feeding binary path, extra
/// arguments, and model overrides into the constructor.
pub fn build_agent(cfg: &crate::config::Config) -> Result<Box<dyn Agent + Send + Sync>> {
    let kind = match cfg.agent.backend.as_deref() {
        None => backend::BackendKind::default(),
        Some(s) => s.parse::<backend::BackendKind>()?,
    };
    match kind {
        backend::BackendKind::ClaudeCode => {
            let overrides = &cfg.agent.claude_code;
            let mut agent = match overrides.binary.as_ref() {
                Some(path) => claude_code::ClaudeCodeAgent::with_binary(path),
                None => claude_code::ClaudeCodeAgent::new(),
            };
            if !overrides.extra_args.is_empty() {
                agent = agent.with_extra_args(overrides.extra_args.clone());
            }
            if let Some(model) = overrides.model.as_deref() {
                agent = agent.with_model_override(model);
            }
            if let Some(mode) = overrides.permission_mode.as_deref() {
                agent = agent.with_permission_mode(mode);
            }
            Ok(Box::new(agent))
        }
        backend::BackendKind::Codex => {
            let overrides = &cfg.agent.codex;
            let mut agent = match overrides.binary.as_ref() {
                Some(path) => codex::CodexAgent::with_binary(path),
                None => codex::CodexAgent::new(),
            };
            if !overrides.extra_args.is_empty() {
                agent = agent.with_extra_args(overrides.extra_args.clone());
            }
            if let Some(model) = overrides.model.as_deref() {
                agent = agent.with_model_override(model);
            }
            if let Some(policy) = overrides.approval_policy.as_deref() {
                agent = agent.with_approval_policy(policy);
            }
            Ok(Box::new(agent))
        }
        backend::BackendKind::Aider => {
            let overrides = &cfg.agent.aider;
            let mut agent = match overrides.binary.as_ref() {
                Some(path) => aider::AiderAgent::with_binary(path),
                None => aider::AiderAgent::new(),
            };
            if !overrides.extra_args.is_empty() {
                agent = agent.with_extra_args(overrides.extra_args.clone());
            }
            if let Some(model) = overrides.model.as_deref() {
                agent = agent.with_model_override(model);
            }
            Ok(Box::new(agent))
        }
        backend::BackendKind::Gemini => {
            let overrides = &cfg.agent.gemini;
            let mut agent = match overrides.binary.as_ref() {
                Some(path) => gemini::GeminiAgent::with_binary(path),
                None => gemini::GeminiAgent::new(),
            };
            if !overrides.extra_args.is_empty() {
                agent = agent.with_extra_args(overrides.extra_args.clone());
            }
            if let Some(model) = overrides.model.as_deref() {
                agent = agent.with_model_override(model);
            }
            Ok(Box::new(agent))
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn role_as_str_matches_config_keys() {
        assert_eq!(Role::Planner.as_str(), "planner");
        assert_eq!(Role::Implementer.as_str(), "implementer");
        assert_eq!(Role::Auditor.as_str(), "auditor");
        assert_eq!(Role::Fixer.as_str(), "fixer");
    }

    #[test]
    fn role_serde_round_trips_through_lowercase_string() {
        let json = serde_json::to_string(&Role::Implementer).unwrap();
        assert_eq!(json, "\"implementer\"");
        let back: Role = serde_json::from_str(&json).unwrap();
        assert_eq!(back, Role::Implementer);
    }

    #[test]
    fn stop_reason_equality_ignores_completion_payload() {
        assert_eq!(StopReason::Completed, StopReason::Completed);
        assert_ne!(StopReason::Completed, StopReason::Timeout);
        assert_eq!(StopReason::Error("x".into()), StopReason::Error("x".into()));
        assert_ne!(StopReason::Error("x".into()), StopReason::Error("y".into()));
    }

    #[test]
    fn build_agent_defaults_to_claude_code_when_unspecified() {
        let cfg = crate::config::Config::default();
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "claude-code"),
            Err(e) => panic!("default config must build the claude_code agent: {e:#}"),
        }
    }

    #[test]
    fn build_agent_dispatches_explicit_claude_code() {
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("claude_code".to_string());
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "claude-code"),
            Err(e) => panic!("explicit claude_code must build: {e:#}"),
        }
    }

    #[test]
    fn build_agent_has_no_pending_backends() {
        // Every named backend in [`backend::BackendKind`] must construct an
        // adapter — pitboss shipped its full backend trio in phases 02–04
        // (codex, aider, gemini) on top of the default claude_code. If a new
        // backend is added to the enum without a matching factory arm this
        // test fails and forces the wiring to land in the same change.
        for name in ["claude_code", "codex", "aider", "gemini"] {
            let mut cfg = crate::config::Config::default();
            cfg.agent.backend = Some(name.to_string());
            assert!(
                build_agent(&cfg).is_ok(),
                "backend {name} must build a concrete agent"
            );
        }
    }

    #[test]
    fn build_agent_dispatches_explicit_codex() {
        // Phase 02 acceptance: setting `[agent] backend = "codex"` must build
        // the CodexAgent adapter rather than the default Claude Code one.
        // `Box<dyn Agent>` hides the concrete type, so we verify via
        // `Agent::name`, which is the same surface the runner logs use.
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("codex".to_string());
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "codex"),
            Err(e) => panic!("explicit codex must build: {e:#}"),
        }
    }

    #[test]
    fn build_agent_dispatches_explicit_aider() {
        // Phase 03 acceptance: setting `[agent] backend = "aider"` must build
        // the AiderAgent adapter rather than the default Claude Code one or
        // erroring out as a not-yet-implemented backend.
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("aider".to_string());
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "aider"),
            Err(e) => panic!("explicit aider must build: {e:#}"),
        }
    }

    #[test]
    fn build_agent_dispatches_explicit_gemini() {
        // Phase 04 acceptance: setting `[agent] backend = "gemini"` must build
        // the GeminiAgent adapter rather than the default Claude Code one or
        // erroring out as a not-yet-implemented backend.
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("gemini".to_string());
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "gemini"),
            Err(e) => panic!("explicit gemini must build: {e:#}"),
        }
    }

    #[test]
    fn build_agent_gemini_honors_overrides() {
        // The `[agent.gemini]` table must reach the constructed agent so tests
        // (and real installs in non-standard locations) can point at a stub
        // script and apply per-backend `extra_args` / `model`.
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("gemini".to_string());
        cfg.agent.gemini.binary = Some(std::path::PathBuf::from("/tmp/fake-gemini"));
        cfg.agent.gemini.extra_args = vec!["--include-directories".into(), "src".into()];
        cfg.agent.gemini.model = Some("gemini-2.5-flash".into());
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "gemini"),
            Err(e) => panic!("gemini with overrides must build: {e:#}"),
        }
    }

    #[test]
    fn build_agent_aider_honors_overrides() {
        // The `[agent.aider]` table must reach the constructed agent so
        // tests (and real installs in non-standard locations) can point at a
        // stub script and apply per-backend `extra_args` / `model`.
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("aider".to_string());
        cfg.agent.aider.binary = Some(std::path::PathBuf::from("/tmp/fake-aider"));
        cfg.agent.aider.extra_args = vec!["--no-auto-commits".into()];
        cfg.agent.aider.model = Some("anthropic/sonnet-4.5".into());
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "aider"),
            Err(e) => panic!("aider with overrides must build: {e:#}"),
        }
    }

    #[test]
    fn build_agent_claude_code_honors_overrides() {
        // The `[agent.claude_code]` table must reach the constructed agent
        // so a workspace-pinned binary, model, or extra args actually changes
        // dispatch behavior. Without this wiring the TUI header would show an
        // override model the backend never uses.
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("claude_code".to_string());
        cfg.agent.claude_code.binary = Some(std::path::PathBuf::from("/tmp/fake-claude"));
        cfg.agent.claude_code.extra_args = vec!["--max-turns".into(), "50".into()];
        cfg.agent.claude_code.model = Some("claude-opus-4-7".into());
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "claude-code"),
            Err(e) => panic!("claude_code with overrides must build: {e:#}"),
        }
    }

    #[test]
    fn build_agent_codex_honors_binary_override() {
        // The `[agent.codex] binary = "..."` override must reach the
        // constructed agent so tests (and real installs in non-standard
        // locations) can point at a stub script. The dispatch path doesn't
        // spawn the binary, so an obviously-fake path is fine here.
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("codex".to_string());
        cfg.agent.codex.binary = Some(std::path::PathBuf::from("/tmp/fake-codex"));
        cfg.agent.codex.extra_args = vec!["--quiet".into()];
        cfg.agent.codex.model = Some("gpt-5-codex".into());
        match build_agent(&cfg) {
            Ok(agent) => assert_eq!(agent.name(), "codex"),
            Err(e) => panic!("codex with overrides must build: {e:#}"),
        }
    }

    #[test]
    fn build_agent_rejects_unknown_backend() {
        let mut cfg = crate::config::Config::default();
        cfg.agent.backend = Some("ollama".into());
        match build_agent(&cfg) {
            Ok(_) => panic!("unknown backend must not build"),
            Err(e) => {
                let msg = format!("{e:#}");
                assert!(
                    msg.contains("ollama"),
                    "expected unknown-backend error to echo the input, got: {msg}"
                );
            }
        }
    }
}