defect-cli 0.1.0-alpha.4

defect: a highly configurable, ACP-native, resource-frugal headless general-purpose agent CLI.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
//! Single-turn unattended mode —— `defect --message <prompt>`.
//!
//! Purpose: in CI / scripts, "run one prompt, produce a result, exit by
//! success/failure". On **equal footing** with the interactive REPL and the
//! ACP server; the three only share the [`AgentCore`] kernel:
//!
//! - Does **not** reuse the REPL's [`crate::repl`] rendering (that stack carries
//!   ANSI / line editing, bound to the `repl` feature's crossterm/owo-colors).
//!   This module ships its own minimal, ANSI-free event projection, so under
//!   `--no-default-features --features oneshot` it builds a slim CI binary with
//!   no TUI dependencies.
//! - Connects in-process directly to `AgentCore` (like the REPL), bypassing the
//!   wire —— CI runs its own agent and does not need ACP's cross-process
//!   generality.
//!
//! ## Output contract: stdout = agent content, stderr = framework logs
//!
//! **All agent content** (assistant body / thinking / tool calls) goes to
//! **stdout** as a single stream in event order; framework-level diagnostics
//! (denial warnings, turn errors, unreached goals) go through `tracing` —— and
//! `tracing` is uniformly written to **stderr** by `defect_obs::init_tracing`.
//! So `2>/dev/null` cleanly filters out framework noise while preserving the
//! agent's complete work record; the two streams no longer share a cursor or
//! run together.
//!
//! ## Exit codes (CI's lifeline for judging success/failure)
//!
//! Priority high to low: `TurnError`(1) > `Refusal`(3) > `MaxTokens`(2) >
//! `MaxTurnRequests`(7) > `Cancelled`(5) > unattended denial (`denied`, 4) >
//! goal unreached (6) > `EndTurn`(0). `MaxTokens` (a single response truncated
//! by the output limit) and `MaxTurnRequests` (the per-turn call budget
//! exhausted) are distinct conditions and carry distinct codes. See
//! `ExitOutcome`.
//!
//! ## Non-interactive permissions
//!
//! The caller (`bin/cli.rs`) is responsible for wrapping the session's policy in
//! [`defect_agent::policy::NonInteractivePolicy`], so that `Ask` degrades to
//! `Deny` and it does not hang waiting for input in a TTY-less environment. This
//! module listens for `PolicyDecision::Deny` in the event stream: once one
//! occurs, it logs a warning via `tracing` (→ stderr) and sets the `denied`
//! flag, so even if the turn ends normally with `EndTurn` it exits with a
//! non-zero code —— fail loud, letting CI know "an operation was denied, this
//! result is not trustworthy".

use std::collections::HashMap;
use std::path::PathBuf;
use std::process::ExitCode;
use std::sync::Arc;

use agent_client_protocol_schema::{ContentBlock, SessionId, StopReason, TextContent, ToolCallId};
use defect_agent::event::AgentEvent;
use defect_agent::policy::PolicyDecision;
use defect_agent::session::{AgentCore, TurnError};
use futures::{FutureExt, StreamExt};
use tokio::io::{AsyncWriteExt, Stdout};

use crate::args::OutputFormat;
use crate::session_open::{LocalSessionOpts, open_local_session};

/// Runs a single-turn prompt and returns the process exit code.
///
/// Only when `track_denied = true` (the caller wrapped `NonInteractivePolicy`)
/// is `PolicyDecision::Deny` treated as an "unattended gap" that affects the
/// exit code —— modes like `deny-all`, where the user knowingly expects
/// denials, should pass `false` to avoid spuriously reporting non-zero.
///
/// # Errors
///
/// Session open failure, stdin read failure, stdout/stderr write failure.
/// When `goal` is `Some` (`--goal` mode): if the goal is unreached after the
/// turn ends (turns exhausted without ever calling `goal_done`), exits with the
/// Exhausted code —— prevents CI from mistaking "ran out of turns without
/// reaching the goal" for success.
#[allow(clippy::too_many_arguments)]
pub async fn run(
    agent: Arc<dyn AgentCore>,
    cwd: PathBuf,
    message: String,
    format: OutputFormat,
    resume: Option<SessionId>,
    track_denied: bool,
    goal: Option<Arc<defect_agent::session::GoalState>>,
    shell_output_max_bytes: usize,
) -> anyhow::Result<ExitCode> {
    let prompt = resolve_prompt(message).await?;

    let mut out = tokio::io::stdout();

    let session = open_local_session(
        &agent,
        &cwd,
        LocalSessionOpts {
            resume,
            shell_output_max_bytes,
        },
    )
    .await?;

    // Subscribe once outside the loop, draining across this turn (including the
    // driver's autonomous turn continuations) —— isomorphic to the interactive
    // REPL / ACP event pump. The turn future only yields the final StopReason;
    // this turn's content is pushed through the event stream.
    let mut events = session.subscribe();
    let mut sink = EventSink::new(format, track_denied);

    let prompt_blocks = vec![ContentBlock::Text(TextContent::new(prompt))];
    let turn = session.run_turn(prompt_blocks);
    tokio::pin!(turn);

    let result = loop {
        tokio::select! {
            // biased + drain events first: the turn future and the event stream
            // may become ready at the same time (turn finishes fast, trailing
            // events already in the buffer). Poll events first to avoid dropping
            // any rendering.
            biased;
            ev = events.next() => {
                if let Some(ev) = ev {
                    sink.emit(&mut out, ev).await?;
                }
            }
            r = &mut turn => break r,
        }
    };

    // Turn has ended, but the buffer may still hold just-sent, not-yet-polled
    // trailing events —— drain everything that is immediately ready.
    while let Some(Some(ev)) = events.next().now_or_never() {
        sink.emit(&mut out, ev).await?;
    }

    // goal mode: turn ended normally but the goal is unreached (turns exhausted
    // without ever calling goal_done) → Exhausted.
    let goal_unreached = goal.as_ref().is_some_and(|g| !g.is_reached());
    if goal_unreached {
        tracing::warn!(
            "goal not reached: the agent stopped (or ran out of turns) without calling `goal_done`"
        );
    }
    let outcome = ExitOutcome::from(&result, sink.denied, goal_unreached);
    sink.finish(&mut out, &result, &outcome).await?;
    out.flush().await?;
    Ok(outcome.code())
}

/// Resolves the prompt source: `-`, or read from stdin when stdin is piped;
/// otherwise use the literal value.
async fn resolve_prompt(message: String) -> anyhow::Result<String> {
    use std::io::IsTerminal;

    let from_stdin = message == "-" || (message.is_empty() && !std::io::stdin().is_terminal());
    if from_stdin {
        use tokio::io::AsyncReadExt;
        let mut buf = String::new();
        tokio::io::stdin().read_to_string(&mut buf).await?;
        Ok(buf)
    } else {
        Ok(message)
    }
}

/// Reduction from the turn result to the process exit code.
enum ExitOutcome {
    Success,
    Denied,
    Cancelled,
    /// A single LLM response was truncated by the output `max_tokens` limit.
    MaxTokens,
    /// The per-turn LLM-call budget (`request_limit`) was exhausted.
    MaxRequests,
    Refusal,
    Error,
    /// goal mode: turn ended normally but the goal is unreached (turns exhausted
    /// / model gave up).
    GoalUnreached,
}

impl ExitOutcome {
    fn from(result: &Result<StopReason, TurnError>, denied: bool, goal_unreached: bool) -> Self {
        match result {
            Err(_) => Self::Error,
            Ok(StopReason::Refusal) => Self::Refusal,
            Ok(StopReason::MaxTokens) => Self::MaxTokens,
            Ok(StopReason::MaxTurnRequests) => Self::MaxRequests,
            Ok(StopReason::Cancelled) => Self::Cancelled,
            // EndTurn (and any future success-class terminal states): denied >
            // goal unreached > success.
            Ok(_) if denied => Self::Denied,
            Ok(_) if goal_unreached => Self::GoalUnreached,
            Ok(_) => Self::Success,
        }
    }

    /// Numeric exit code (0 = success).
    fn raw(&self) -> u8 {
        match self {
            Self::Success => 0,
            Self::Error => 1,
            Self::MaxTokens => 2,
            Self::Refusal => 3,
            Self::Denied => 4,
            Self::Cancelled => 5,
            Self::GoalUnreached => 6,
            Self::MaxRequests => 7,
        }
    }

    fn code(&self) -> ExitCode {
        ExitCode::from(self.raw())
    }
}

/// Event projector: writes the [`AgentEvent`] stream to stdout/stderr according
/// to the [`OutputFormat`].
struct EventSink {
    format: OutputFormat,
    track_denied: bool,
    /// Whether an unattended denial has occurred.
    denied: bool,
    /// `ToolCallId → tool name`, used to report which tool was involved on a
    /// `PolicyDecision::Deny`.
    tool_names: HashMap<ToolCallId, String>,
    /// In text format: whether stdout is still mid-line (the last thing written
    /// was not a `\n`). In goal mode it inserts newlines between multi-turn
    /// assistant outputs, avoiding "previous tail + next head" running together
    /// on one line.
    mid_line: bool,
    /// In text format: whether we are currently inside a thinking block. A
    /// thinking block's multiple deltas share one `[thinking] ` prefix (printed
    /// only on the first delta), so consecutive deltas merge into one block.
    in_thought: bool,
}

impl EventSink {
    fn new(format: OutputFormat, track_denied: bool) -> Self {
        Self {
            format,
            track_denied,
            denied: false,
            tool_names: HashMap::new(),
            mid_line: false,
            in_thought: false,
        }
    }

    async fn emit(&mut self, out: &mut Stdout, event: AgentEvent) -> anyhow::Result<()> {
        // Record tool names (needed for every format, used for denial reports).
        if let AgentEvent::ToolCallStarted { id, name, fields } = &event {
            let label = fields.title.clone().unwrap_or_else(|| name.clone());
            self.tool_names.insert(id.clone(), label);
        }

        // Unattended denial: framework-level diagnostic via tracing (→ stderr) +
        // set the flag (fail loud).
        if self.track_denied
            && let AgentEvent::PolicyDecision {
                id,
                decision: PolicyDecision::Deny,
            } = &event
        {
            self.denied = true;
            let tool = self
                .tool_names
                .get(id)
                .map(String::as_str)
                .unwrap_or("<unknown>");
            tracing::warn!(
                tool = %tool,
                "tool denied: no operator present to approve (non-interactive)"
            );
        }

        match self.format {
            OutputFormat::Json => self.emit_json(out, &event).await,
            OutputFormat::Text => self.emit_text(out, &event).await,
            OutputFormat::Quiet => Ok(()),
        }
    }

    /// NDJSON: one line per event. `AgentEvent` already derives Serialize
    /// (`LlmCallStarted.request` is `#[serde(skip)]`, so it never enters JSON).
    async fn emit_json(&self, out: &mut Stdout, event: &AgentEvent) -> anyhow::Result<()> {
        let line = serde_json::to_string(event)?;
        write_raw(out, &line).await?;
        write_raw(out, "\n").await
    }

    /// Plain text: **all agent content** (body / thinking / tools) goes to
    /// stdout as a single stream in event order —— framework logs (tracing) go
    /// to stderr, the two never interfere.
    ///
    /// Boundary newlines: assistant body often has no trailing newline, while
    /// what immediately follows may be a thinking / tool line or the next
    /// generation. Before switching to a "non-body line" (thinking / tool) or
    /// starting a new generation segment, if stdout is still mid-line insert a
    /// `\n`, so each segment occupies whole lines and does not run together.
    async fn emit_text(&mut self, out: &mut Stdout, event: &AgentEvent) -> anyhow::Result<()> {
        match event {
            // A new LLM generation starts: if the previous assistant body had no
            // newline, insert one before starting the new segment.
            AgentEvent::LlmCallStarted { .. } | AgentEvent::TurnEnded { .. } => {
                self.end_thought(out).await?;
                self.break_line(out).await?;
            }
            AgentEvent::AssistantText { content } => {
                if let Some(text) = block_text(content)
                    && !text.is_empty()
                {
                    self.end_thought(out).await?;
                    write_raw(out, &text).await?;
                    out.flush().await?;
                    self.mid_line = !text.ends_with('\n');
                }
            }
            // A thinking block's multiple deltas share one `[thinking] ` prefix
            // —— consecutive deltas merge, with each delta written raw.
            AgentEvent::AssistantThought { content } => {
                if let Some(text) = block_text(content)
                    && !text.is_empty()
                {
                    if !self.in_thought {
                        self.break_line(out).await?;
                        write(out, "[thinking] ").await?;
                        self.in_thought = true;
                    }
                    write_raw(out, &text).await?;
                    out.flush().await?;
                    self.mid_line = !text.ends_with('\n');
                }
            }
            AgentEvent::ToolCallStarted { name, fields, .. } => {
                self.end_thought(out).await?;
                self.break_line(out).await?;
                let title = fields.title.clone().unwrap_or_else(|| name.clone());
                write(out, &format!("[tool] {title}\n")).await?;
                out.flush().await?;
            }
            _ => {}
        }
        Ok(())
    }

    /// Ends the current thinking block: if inside one, clears the flag and
    /// inserts a `\n`. Called before switching to body / tool / a new generation
    /// / turn end, so the thinking block occupies whole lines.
    async fn end_thought(&mut self, out: &mut Stdout) -> anyhow::Result<()> {
        if self.in_thought {
            self.in_thought = false;
            self.break_line(out).await?;
        }
        Ok(())
    }

    /// If stdout is still mid-line, insert a `\n` to close it and flush. Used
    /// before a "non-body line" (thinking / tool) or a new generation segment,
    /// so the previous assistant body occupies its own whole line —— even within
    /// a single stream, content is separated by line.
    async fn break_line(&mut self, out: &mut Stdout) -> anyhow::Result<()> {
        if self.mid_line {
            write_raw(out, "\n").await?;
            out.flush().await?;
            self.mid_line = false;
        }
        Ok(())
    }

    /// Wrap-up output after the turn ends. Framework-level diagnostics (turn
    /// errors) go through tracing (→ stderr).
    async fn finish(
        &self,
        out: &mut Stdout,
        result: &Result<StopReason, TurnError>,
        outcome: &ExitOutcome,
    ) -> anyhow::Result<()> {
        if let Err(e) = result {
            tracing::error!(error = %e, "turn error");
        }
        match self.format {
            OutputFormat::Text => {
                // When the streamed assistant body has no trailing newline,
                // insert one to avoid running into the following shell prompt;
                // if already at line start (mid_line=false) skip it, to not emit
                // an extra blank line.
                if self.mid_line {
                    write_raw(out, "\n").await?;
                }
            }
            OutputFormat::Json => {
                // Final summary line: terminal state + exit-code semantics.
                let summary = serde_json::json!({
                    "type": "oneshot_result",
                    "stop_reason": result.as_ref().ok().map(|r| format!("{r:?}")),
                    "error": result.as_ref().err().map(|e| e.to_string()),
                    "denied": self.denied,
                    "exit_code": outcome.raw(),
                });
                write_raw(out, &summary.to_string()).await?;
                write_raw(out, "\n").await?;
            }
            OutputFormat::Quiet => {}
        }
        Ok(())
    }
}

/// Extracts text from a [`ContentBlock`]; non-text blocks return `None`.
fn block_text(block: &ContentBlock) -> Option<String> {
    match block {
        ContentBlock::Text(t) => Some(t.text.clone()),
        _ => None,
    }
}

/// Writes a string to any async writer.
async fn write<W>(out: &mut W, s: &str) -> anyhow::Result<()>
where
    W: AsyncWriteExt + Unpin,
{
    out.write_all(s.as_bytes()).await?;
    Ok(())
}

/// Alias for `write`, semantically emphasizing "write as-is, with no
/// decoration".
async fn write_raw<W>(out: &mut W, s: &str) -> anyhow::Result<()>
where
    W: AsyncWriteExt + Unpin,
{
    write(out, s).await
}