ta_changeset/
supervisor_review.rs

1// supervisor_review.rs — AI-powered supervisor that reviews staged changes against goal alignment and constitution.
2
3use std::io::Read;
4use std::path::Path;
5use std::time::Instant;
6
7use serde::{Deserialize, Serialize};
8
9/// Verdict from the supervisor agent review.
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
11#[serde(rename_all = "lowercase")]
12pub enum SupervisorVerdict {
13    /// Changes are aligned with the goal and constitution.
14    Pass,
15    /// Minor concerns but not blocking — shown in draft view with yellow.
16    Warn,
17    /// Significant alignment or constitution violation — can block approval.
18    Block,
19}
20
21impl std::fmt::Display for SupervisorVerdict {
22    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23        match self {
24            Self::Pass => write!(f, "pass"),
25            Self::Warn => write!(f, "warn"),
26            Self::Block => write!(f, "block"),
27        }
28    }
29}
30
31#[allow(clippy::derivable_impls)]
32impl Default for SupervisorVerdict {
33    fn default() -> Self {
34        Self::Warn
35    }
36}
37
38/// The result of an AI supervisor reviewing staged changes.
39/// Embedded in `DraftPackage.supervisor_review`.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct SupervisorReview {
42    /// Overall verdict: pass, warn, or block.
43    pub verdict: SupervisorVerdict,
44    /// Whether changes stayed within the goal's declared scope.
45    pub scope_ok: bool,
46    /// Specific findings from the review (concerns, observations).
47    pub findings: Vec<String>,
48    /// One-sentence summary of the review.
49    pub summary: String,
50    /// Which supervisor produced this review ("builtin", "claude-code", "codex", etc.).
51    pub agent: String,
52    /// How long the supervisor took in seconds.
53    pub duration_secs: f32,
54}
55
56/// Configuration for the supervisor passed at runtime (derived from WorkflowConfig).
57#[derive(Debug, Clone)]
58pub struct SupervisorRunConfig {
59    /// Enabled flag.
60    pub enabled: bool,
61    /// Agent name: "builtin" | "claude-code" | "codex" | "ollama" | manifest name.
62    pub agent: String,
63    /// What to do when verdict is Block: "warn" (just show) or "block" (refuse approve).
64    pub verdict_on_block: String,
65    /// Path to project constitution file.
66    pub constitution_path: Option<std::path::PathBuf>,
67    /// Don't fail if constitution is missing.
68    pub skip_if_no_constitution: bool,
69    /// Kill supervisor if no token is received for this many seconds (default 30).
70    ///
71    /// Replaces wall-clock `timeout_secs`: a supervisor actively streaming a large diff
72    /// will never be killed as long as tokens keep arriving. Only a truly stalled process
73    /// (no output for `heartbeat_stale_secs`) is terminated.
74    pub heartbeat_stale_secs: u64,
75    /// Deprecated: use `heartbeat_stale_secs` instead. Accepted for backward compat and
76    /// mapped to `heartbeat_stale_secs` at construction time with a deprecation warning.
77    pub timeout_secs: u64,
78    /// Optional env var name to check before spawning the agent (pre-flight UX check).
79    /// When set, TA verifies the var exists and prints an actionable message if missing.
80    /// The agent binary reads the var itself — TA never passes the value.
81    pub api_key_env: Option<String>,
82    /// Staging directory path (required for manifest-based custom agents).
83    pub staging_path: Option<std::path::PathBuf>,
84    /// Path to the heartbeat file written after each token chunk. When `None`, heartbeat
85    /// writes are skipped (e.g. in tests or when no workspace dir is available).
86    pub heartbeat_path: Option<std::path::PathBuf>,
87    /// Optional agent profile name resolved from workflow.toml `[agent_profiles]`.
88    /// When set, the `agent` field is treated as a fallback; the profile's `framework`
89    /// drives dispatch and `model` is forwarded to the agent binary when applicable.
90    pub agent_profile: Option<String>,
91    /// Resolved model from agent_profile (if any). Passed to agent CLI via --model flag.
92    pub resolved_model: Option<String>,
93    /// Allow session hooks to fire in the supervisor subprocess. Default: false.
94    ///
95    /// When false, `CLAUDE_CODE_DISABLE_HOOKS=1` is set so that `SessionStart` and other
96    /// hooks do not write JSON to stdout before supervisor content arrives. Set to `true`
97    /// only if a custom hook must run during supervisor invocations.
98    pub enable_hooks: bool,
99}
100
101/// Raw LLM response structure (expected JSON from the supervisor prompt).
102#[derive(Deserialize, Debug)]
103struct LlmSupervisorResponse {
104    verdict: Option<String>,
105    scope_ok: Option<bool>,
106    findings: Option<Vec<String>>,
107    summary: Option<String>,
108}
109
110/// Unified supervisor agent dispatcher.
111///
112/// Dispatches on `config.agent`:
113/// - `"builtin"` | `"claude-code"` → spawn `claude --print --verbose --output-format stream-json`
114///   (delegates auth entirely to the `claude` binary — supports subscription OAuth, API key, etc.)
115/// - `"codex"` → spawn `codex --approval-mode full-auto --quiet`
116/// - `"ollama"` → spawn `ta agent run ollama --headless`
117/// - any other string → look up `.ta/agents/<name>.toml` manifest in `config.staging_path`
118///
119/// Falls back to `SupervisorVerdict::Warn` on any failure — never blocks a draft build.
120pub fn invoke_supervisor_agent(
121    objective: &str,
122    changed_files: &[String],
123    constitution_text: Option<&str>,
124    config: &SupervisorRunConfig,
125) -> SupervisorReview {
126    let started = Instant::now();
127    let prompt = build_supervisor_prompt(objective, changed_files, constitution_text);
128
129    // Pre-flight check: verify api_key_env exists before spawning agent.
130    if let Some(ref env_var) = config.api_key_env {
131        if std::env::var(env_var).is_err() {
132            let msg = format!(
133                "Supervisor agent '{}' requires {} — set it or change [supervisor] agent in workflow.toml.",
134                config.agent, env_var
135            );
136            tracing::warn!("{}", msg);
137            return fallback_supervisor_review(&config.agent, &msg, 0.0);
138        }
139    }
140
141    let result = match config.agent.as_str() {
142        "builtin" | "claude-code" => invoke_claude_cli_supervisor(&prompt, config),
143        "codex" => invoke_codex_supervisor(&prompt, config),
144        "ollama" => invoke_ollama_supervisor(&prompt, config),
145        other => {
146            if let Some(ref staging) = config.staging_path {
147                run_manifest_supervisor(staging, other, objective, changed_files, config, started)
148            } else {
149                Err(anyhow::anyhow!(
150                    "Custom agent '{}' requires staging_path to be set in SupervisorRunConfig",
151                    other
152                ))
153            }
154        }
155    };
156
157    let duration_secs = started.elapsed().as_secs_f32();
158
159    match result {
160        Ok(mut review) => {
161            review.duration_secs = duration_secs;
162            review
163        }
164        Err(e) => {
165            tracing::warn!(
166                error = %e,
167                agent = %config.agent,
168                "Supervisor agent failed — falling back to warn verdict"
169            );
170            fallback_supervisor_review(&config.agent, &e.to_string(), duration_secs)
171        }
172    }
173}
174
175/// Invoke the `claude` CLI in headless stream-json mode.
176///
177/// Uses `claude --print --verbose --output-format stream-json <prompt>`. Auth is handled
178/// entirely by the `claude` binary (subscription OAuth, API key from env or config).
179/// `--verbose` is required when combining `--print` with `--output-format stream-json`.
180fn invoke_claude_cli_supervisor(
181    prompt: &str,
182    config: &SupervisorRunConfig,
183) -> anyhow::Result<SupervisorReview> {
184    let staging = config.staging_path.as_deref();
185
186    // When --allowedTools is passed, the claude CLI does not accept the prompt as a
187    // positional argument — it must come via stdin. Always use stdin for consistency.
188    let mut args_owned: Vec<String> = vec![
189        "--print".into(),
190        "--verbose".into(),
191        "--output-format".into(),
192        "stream-json".into(),
193        "--allowedTools".into(),
194        "Read(*),Grep(*),Glob(*)".into(),
195    ];
196
197    if let Some(ref model) = config.resolved_model {
198        args_owned.push("--model".into());
199        args_owned.push(model.clone());
200    }
201
202    // Prompt goes via stdin (not positional) — required when --allowedTools is present.
203    let args_refs: Vec<&str> = args_owned.iter().map(|s| s.as_str()).collect();
204
205    let disable_hooks_env: &[(&str, &str)] = if config.enable_hooks {
206        &[]
207    } else {
208        &[("CLAUDE_CODE_DISABLE_HOOKS", "1")]
209    };
210
211    let stdout = spawn_with_heartbeat_monitor(
212        "claude",
213        &args_refs,
214        config.heartbeat_stale_secs,
215        config.heartbeat_path.as_deref(),
216        "Claude Code CLI",
217        staging,
218        disable_hooks_env,
219        Some(prompt),
220    )?;
221
222    let text = extract_claude_stream_json_text(&stdout);
223    let mut review = parse_supervisor_response_or_text(&text, "claude-code");
224    apply_hedging_quality_gate(&mut review);
225    Ok(review)
226}
227
228/// Invoke the `codex` CLI in headless mode.
229///
230/// Uses `codex --approval-mode full-auto --quiet <prompt>`.
231/// Codex outputs plain text; we wrap it as summary and attempt JSON extraction.
232fn invoke_codex_supervisor(
233    prompt: &str,
234    config: &SupervisorRunConfig,
235) -> anyhow::Result<SupervisorReview> {
236    let staging = config.staging_path.as_deref();
237    let disable_hooks_env: &[(&str, &str)] = if config.enable_hooks {
238        &[]
239    } else {
240        &[("CLAUDE_CODE_DISABLE_HOOKS", "1")]
241    };
242    let stdout = spawn_with_heartbeat_monitor(
243        "codex",
244        &["--approval-mode", "full-auto", "--quiet", prompt],
245        config.heartbeat_stale_secs,
246        config.heartbeat_path.as_deref(),
247        "Codex CLI",
248        staging,
249        disable_hooks_env,
250        None,
251    )?;
252
253    let mut review = parse_supervisor_response_or_text(&stdout, "codex");
254    apply_hedging_quality_gate(&mut review);
255    Ok(review)
256}
257
258/// Invoke the ollama agent via `ta agent run ollama --headless`.
259fn invoke_ollama_supervisor(
260    prompt: &str,
261    config: &SupervisorRunConfig,
262) -> anyhow::Result<SupervisorReview> {
263    let staging = config.staging_path.as_deref();
264    let disable_hooks_env: &[(&str, &str)] = if config.enable_hooks {
265        &[]
266    } else {
267        &[("CLAUDE_CODE_DISABLE_HOOKS", "1")]
268    };
269    let stdout = spawn_with_heartbeat_monitor(
270        "ta",
271        &[
272            "agent",
273            "run",
274            "ollama",
275            "--headless",
276            "--tools",
277            "read,grep,glob",
278            "--prompt",
279            prompt,
280        ],
281        config.heartbeat_stale_secs,
282        config.heartbeat_path.as_deref(),
283        "ta-agent-ollama",
284        staging,
285        disable_hooks_env,
286        None,
287    )?;
288
289    let mut review = parse_supervisor_response_or_text(&stdout, "ollama");
290    apply_hedging_quality_gate(&mut review);
291    Ok(review)
292}
293
294/// Check whether a stdout line is a Claude Code hook JSON event.
295///
296/// Hook events look like `{"type":"system","subtype":"hook_started",...}`. They arrive
297/// on stdout before any supervisor content when hooks fire (e.g., `SessionStart`). We
298/// discard these silently — they are not supervisor tokens and must not reset the stall
299/// watchdog timer.
300fn is_hook_json_line(line: &str) -> bool {
301    let trimmed = line.trim();
302    // Quick pre-check before JSON parse for performance.
303    if !trimmed.starts_with('{') || !trimmed.contains("\"type\"") {
304        return false;
305    }
306    if let Ok(val) = serde_json::from_str::<serde_json::Value>(trimmed) {
307        val.get("type").and_then(|t| t.as_str()) == Some("system")
308    } else {
309        false
310    }
311}
312
313/// Spawn a process and stream its stdout, writing a heartbeat file after each line received.
314///
315/// A reader thread collects stdout lines and sends them via a channel. The main thread
316/// polls with a short timeout and kills the child if no output has arrived for
317/// `stale_secs`. This replaces the wall-clock `spawn_with_timeout` approach: actively
318/// streaming supervisors are never killed, only truly stalled ones (no tokens for
319/// `stale_secs`).
320///
321/// `heartbeat_path` is optional; when `None`, heartbeat writes are skipped (e.g. in
322/// tests or when no workspace dir is available).
323///
324/// `extra_env` is a slice of `(key, value)` pairs injected into the subprocess environment.
325/// Pass `&[("CLAUDE_CODE_DISABLE_HOOKS", "1")]` to suppress hook stdout pollution from
326/// Claude Code session hooks.
327///
328/// Lines that match `is_hook_json_line` (i.e., `{"type":"system",...}`) are discarded
329/// silently — they do not count as heartbeat tokens and are not included in the output.
330#[allow(clippy::too_many_arguments)]
331fn spawn_with_heartbeat_monitor(
332    program: &str,
333    args: &[&str],
334    stale_secs: u64,
335    heartbeat_path: Option<&std::path::Path>,
336    label: &str,
337    current_dir: Option<&std::path::Path>,
338    extra_env: &[(&str, &str)],
339    stdin_input: Option<&str>,
340) -> anyhow::Result<String> {
341    use std::io::BufRead;
342    use std::sync::mpsc;
343
344    let mut cmd = std::process::Command::new(program);
345    cmd.args(args);
346    if let Some(dir) = current_dir {
347        cmd.current_dir(dir);
348    }
349    for (k, v) in extra_env {
350        cmd.env(k, v);
351    }
352    let stdin_stdio = if stdin_input.is_some() {
353        std::process::Stdio::piped()
354    } else {
355        std::process::Stdio::null()
356    };
357    let mut child = cmd
358        .stdin(stdin_stdio)
359        .stdout(std::process::Stdio::piped())
360        .stderr(std::process::Stdio::piped())
361        .spawn()
362        .map_err(|e| {
363            anyhow::anyhow!(
364                "Failed to spawn '{}': {} — is {} installed and on PATH?",
365                program,
366                e,
367                label
368            )
369        })?;
370
371    // Write initial heartbeat immediately so mtime is set from the moment of spawn.
372    if let Some(hb) = heartbeat_path {
373        let _ = std::fs::write(hb, b"");
374    }
375
376    // Write stdin input in a background thread to avoid deadlock: if the child fills its
377    // stdout buffer waiting for us to read while we're blocking on stdin write, we deadlock.
378    if let Some(input) = stdin_input {
379        if let Some(mut stdin_pipe) = child.stdin.take() {
380            let input_owned = input.to_string();
381            std::thread::spawn(move || {
382                use std::io::Write;
383                let _ = stdin_pipe.write_all(input_owned.as_bytes());
384                // stdin_pipe drops here, sending EOF to the child.
385            });
386        }
387    }
388
389    // Spawn reader thread: reads stdout lines and sends them via channel.
390    // Sends `None` on EOF.
391    let (line_tx, line_rx) = mpsc::channel::<Option<String>>();
392    let stdout = child.stdout.take();
393    let reader_handle = std::thread::spawn(move || {
394        if let Some(stdout) = stdout {
395            let reader = std::io::BufReader::new(stdout);
396            for line in reader.lines() {
397                match line {
398                    Ok(l) => {
399                        if line_tx.send(Some(l)).is_err() {
400                            break;
401                        }
402                    }
403                    Err(_) => break,
404                }
405            }
406        }
407        let _ = line_tx.send(None);
408    });
409
410    // Main loop: poll for lines, update heartbeat, check for stall.
411    let poll_interval = std::time::Duration::from_millis(100);
412    let stale_duration = std::time::Duration::from_secs(stale_secs);
413    let mut stdout_str = String::new();
414    let mut partial_output = String::new();
415    let mut last_token = std::time::Instant::now();
416    let mut eof = false;
417
418    while !eof {
419        match line_rx.recv_timeout(poll_interval) {
420            Ok(Some(line)) => {
421                // Discard hook JSON system events — they are not supervisor tokens.
422                // Hook lines like {"type":"system","subtype":"hook_started",...} appear
423                // on stdout before any real content when SessionStart hooks fire.
424                // Counting them as heartbeat tokens causes a false stall: the watchdog
425                // resets once on the hook line, then waits 30s for real content.
426                if is_hook_json_line(&line) {
427                    continue;
428                }
429                last_token = std::time::Instant::now();
430                stdout_str.push_str(&line);
431                stdout_str.push('\n');
432                // Accumulate partial output for stall error messages (cap at 200 chars).
433                if partial_output.len() < 200 {
434                    partial_output.push_str(&line);
435                    partial_output.push('\n');
436                }
437                // Update heartbeat on each received line.
438                if let Some(hb) = heartbeat_path {
439                    let _ = std::fs::write(hb, b"");
440                }
441            }
442            Ok(None) => {
443                eof = true; // Reader sent EOF sentinel.
444            }
445            Err(mpsc::RecvTimeoutError::Timeout) => {
446                // No output received in poll_interval — check for stall.
447                if last_token.elapsed() >= stale_duration {
448                    let _ = child.kill();
449                    let _ = reader_handle.join();
450                    if let Some(hb) = heartbeat_path {
451                        let _ = std::fs::remove_file(hb);
452                    }
453                    let partial = partial_output.trim().to_string();
454                    anyhow::bail!(
455                        "Supervisor stalled — no tokens received for {}s. Findings so far: {}",
456                        stale_secs,
457                        partial
458                    );
459                }
460            }
461            Err(mpsc::RecvTimeoutError::Disconnected) => {
462                eof = true;
463            }
464        }
465    }
466
467    let _ = reader_handle.join();
468
469    // Wait for child to exit.
470    let status = child.wait()?;
471
472    // Clean up heartbeat sentinel on completion.
473    if let Some(hb) = heartbeat_path {
474        let _ = std::fs::remove_file(hb);
475    }
476
477    if !status.success() && stdout_str.trim().is_empty() {
478        let mut stderr = String::new();
479        if let Some(mut err) = child.stderr.take() {
480            let _ = err.read_to_string(&mut stderr);
481        }
482        anyhow::bail!(
483            "{} exited with status {}: {}",
484            label,
485            status,
486            &stderr[..stderr.len().min(200)]
487        );
488    }
489
490    Ok(stdout_str)
491}
492
493/// Extract the final text content from Claude CLI's stream-json output.
494///
495/// Claude CLI with `--output-format stream-json` emits newline-delimited JSON events.
496/// We look for the `result` event (type = "result") and extract its text.
497/// Falls back to scanning for `assistant` message content.
498fn extract_claude_stream_json_text(stdout: &str) -> String {
499    // Scan in reverse for the last result event.
500    for line in stdout.lines().rev() {
501        let line = line.trim();
502        if line.is_empty() {
503            continue;
504        }
505        let Ok(val) = serde_json::from_str::<serde_json::Value>(line) else {
506            continue;
507        };
508        if val.get("type").and_then(|t| t.as_str()) == Some("result") {
509            // `result` field contains the final text in most CLI versions.
510            if let Some(text) = val.get("result").and_then(|r| r.as_str()) {
511                if !text.trim().is_empty() {
512                    return text.to_string();
513                }
514            }
515            // Some versions embed content in a `content` array.
516            if let Some(content) = val.get("content") {
517                let text = extract_content_text(content);
518                if !text.is_empty() {
519                    return text;
520                }
521            }
522        }
523    }
524
525    // Fallback: pick the last non-empty assistant text block.
526    for line in stdout.lines().rev() {
527        let line = line.trim();
528        if line.is_empty() {
529            continue;
530        }
531        let Ok(val) = serde_json::from_str::<serde_json::Value>(line) else {
532            continue;
533        };
534        if val.get("type").and_then(|t| t.as_str()) == Some("assistant") {
535            if let Some(content) = val.get("message").and_then(|m| m.get("content")) {
536                let text = extract_content_text(content);
537                if !text.is_empty() {
538                    return text;
539                }
540            }
541        }
542    }
543
544    // Last resort: return raw stdout (may contain JSON on one line).
545    stdout.to_string()
546}
547
548/// Extract plain text from a JSON content value (array of blocks or string).
549fn extract_content_text(content: &serde_json::Value) -> String {
550    if let Some(arr) = content.as_array() {
551        arr.iter()
552            .filter_map(|item| {
553                if item.get("type").and_then(|t| t.as_str()) == Some("text") {
554                    item.get("text")
555                        .and_then(|t| t.as_str())
556                        .map(|s| s.to_string())
557                } else {
558                    None
559                }
560            })
561            .collect::<Vec<_>>()
562            .join("")
563    } else {
564        content.as_str().unwrap_or("").to_string()
565    }
566}
567
568/// Parse supervisor response text, falling back to a warn verdict with the text as summary.
569///
570/// Tries structured JSON parsing first (with `extract_json`). If that fails, wraps
571/// the full text as `summary` with `verdict: warn` — so plain-text responses from
572/// agents that don't follow the JSON format still produce a useful review.
573fn parse_supervisor_response_or_text(text: &str, agent: &str) -> SupervisorReview {
574    if let Ok(review) = parse_supervisor_response(text) {
575        return SupervisorReview {
576            agent: agent.to_string(),
577            ..review
578        };
579    }
580    // Non-JSON response: treat full text as summary with warn.
581    let summary = if text.len() > 300 {
582        format!("{}…", &text[..300])
583    } else if text.trim().is_empty() {
584        format!("Supervisor agent '{}' returned empty response.", agent)
585    } else {
586        text.trim().to_string()
587    };
588    SupervisorReview {
589        verdict: SupervisorVerdict::Warn,
590        scope_ok: true,
591        findings: vec![],
592        summary,
593        agent: agent.to_string(),
594        duration_secs: 0.0,
595    }
596}
597
598/// Run a manifest-based custom supervisor agent.
599///
600/// Reads `.ta/agents/<name>.toml`, writes `.ta/supervisor_input.json`, spawns the
601/// command, waits for `.ta/supervisor_result.json` to be written by the agent,
602/// and parses the result. Falls back to warn on any failure.
603fn run_manifest_supervisor(
604    staging_path: &Path,
605    agent_name: &str,
606    objective: &str,
607    changed_files: &[String],
608    config: &SupervisorRunConfig,
609    started: Instant,
610) -> anyhow::Result<SupervisorReview> {
611    // Write input context for the custom agent.
612    let input = serde_json::json!({
613        "objective": objective,
614        "changed_files": changed_files,
615        "instruction": "Read the changed files using your available tools before forming each finding. \
616                        Cite file:line in every finding that references code. \
617                        Never write 'cannot be verified without viewing files' — view the files first.",
618    });
619    let input_path = staging_path.join(".ta/supervisor_input.json");
620    if let Err(e) = std::fs::write(
621        &input_path,
622        serde_json::to_string_pretty(&input).unwrap_or_default(),
623    ) {
624        tracing::warn!(error = %e, "Failed to write supervisor input file");
625    }
626
627    // Look up agent manifest.
628    let agent_manifest = staging_path
629        .join(".ta/agents")
630        .join(format!("{}.toml", agent_name));
631    if !agent_manifest.exists() {
632        anyhow::bail!(
633            "Custom supervisor agent '{}' manifest not found at .ta/agents/{}.toml",
634            agent_name,
635            agent_name
636        );
637    }
638
639    // Clear any stale result file.
640    let result_path = staging_path.join(".ta/supervisor_result.json");
641    let _ = std::fs::remove_file(&result_path);
642
643    // Read command from agent manifest.
644    let manifest_content = std::fs::read_to_string(&agent_manifest)
645        .map_err(|e| anyhow::anyhow!("Failed to read agent manifest: {}", e))?;
646    let manifest: toml::Value = toml::from_str(&manifest_content)
647        .map_err(|e| anyhow::anyhow!("Failed to parse agent manifest: {}", e))?;
648    let cmd_str = manifest
649        .get("agent")
650        .and_then(|a| a.get("command"))
651        .and_then(|c| c.as_str())
652        .unwrap_or("");
653    if cmd_str.is_empty() {
654        anyhow::bail!(
655            "Agent manifest '{}' missing [agent] command field",
656            agent_name
657        );
658    }
659
660    let parts: Vec<&str> = cmd_str.split_whitespace().collect();
661    let mut spawn_cmd = std::process::Command::new(parts[0]);
662    spawn_cmd
663        .args(&parts[1..])
664        .current_dir(staging_path)
665        .env("TA_SUPERVISOR_INPUT", input_path.to_str().unwrap_or(""))
666        .env("TA_SUPERVISOR_OUTPUT", result_path.to_str().unwrap_or(""));
667    if !config.enable_hooks {
668        spawn_cmd.env("CLAUDE_CODE_DISABLE_HOOKS", "1");
669    }
670    let mut child = spawn_cmd
671        .spawn()
672        .map_err(|e| anyhow::anyhow!("Failed to spawn custom agent '{}': {}", agent_name, e))?;
673
674    // Write initial heartbeat for manifest agent.
675    if let Some(ref hb) = config.heartbeat_path {
676        let _ = std::fs::write(hb, b"");
677    }
678
679    // Manifest agents write a result file rather than streaming stdout, so we poll
680    // for the result file and update the heartbeat on each poll tick.
681    let stale_secs = config.heartbeat_stale_secs;
682    let mut last_result_size: u64 = 0;
683    let mut last_progress = std::time::Instant::now();
684    loop {
685        match child.try_wait() {
686            Ok(Some(_)) => break,
687            Ok(None) => {
688                // Update heartbeat if the result file has grown (agent is making progress).
689                let current_size = std::fs::metadata(&result_path)
690                    .map(|m| m.len())
691                    .unwrap_or(0);
692                if current_size > last_result_size {
693                    last_result_size = current_size;
694                    last_progress = std::time::Instant::now();
695                    if let Some(ref hb) = config.heartbeat_path {
696                        let _ = std::fs::write(hb, b"");
697                    }
698                }
699                if last_progress.elapsed().as_secs() >= stale_secs {
700                    let _ = child.kill();
701                    // Clean up heartbeat.
702                    if let Some(ref hb) = config.heartbeat_path {
703                        let _ = std::fs::remove_file(hb);
704                    }
705                    anyhow::bail!(
706                        "Custom agent '{}' stalled — no progress for {}s",
707                        agent_name,
708                        stale_secs
709                    );
710                }
711                std::thread::sleep(std::time::Duration::from_millis(500));
712            }
713            Err(e) => {
714                anyhow::bail!("Error waiting for custom agent '{}': {}", agent_name, e);
715            }
716        }
717    }
718
719    // Clean up heartbeat on completion.
720    if let Some(ref hb) = config.heartbeat_path {
721        let _ = std::fs::remove_file(hb);
722    }
723
724    let content = std::fs::read_to_string(&result_path).map_err(|e| {
725        anyhow::anyhow!(
726            "Custom agent '{}' did not write result file (.ta/supervisor_result.json): {}",
727            agent_name,
728            e
729        )
730    })?;
731
732    let mut review: SupervisorReview = serde_json::from_str(&content).map_err(|e| {
733        anyhow::anyhow!(
734            "Failed to parse result JSON from custom agent '{}': {}",
735            agent_name,
736            e
737        )
738    })?;
739    review.agent = agent_name.to_string();
740    review.duration_secs = started.elapsed().as_secs_f32();
741    Ok(review)
742}
743
744/// Create a fallback `SupervisorReview` with `Warn` verdict for any failure path.
745pub fn fallback_supervisor_review(
746    agent: &str,
747    reason: &str,
748    duration_secs: f32,
749) -> SupervisorReview {
750    SupervisorReview {
751        verdict: SupervisorVerdict::Warn,
752        scope_ok: true,
753        findings: vec![format!("Supervisor review incomplete: {}", reason)],
754        summary: "Supervisor could not complete review (fallback to warn).".to_string(),
755        agent: agent.to_string(),
756        duration_secs,
757    }
758}
759
760/// Build the supervisor prompt.
761pub fn build_supervisor_prompt(
762    objective: &str,
763    changed_files: &[String],
764    constitution_text: Option<&str>,
765) -> String {
766    let files_list = if changed_files.is_empty() {
767        "  (no files changed)".to_string()
768    } else {
769        changed_files
770            .iter()
771            .map(|f| format!("  - {}", f))
772            .collect::<Vec<_>>()
773            .join("\n")
774    };
775
776    let constitution_section = match constitution_text {
777        Some(text) if !text.trim().is_empty() => format!(
778            "\n\nProject Constitution:\n```\n{}\n```",
779            &text[..text.len().min(3000)]
780        ),
781        _ => "\n\nProject Constitution: (not available — skip constitution check)".to_string(),
782    };
783
784    format!(
785        r#"You are a supervisor reviewing an AI agent's work for goal alignment and constitution compliance.
786
787Goal Objective:
788{objective}
789
790Changed Files:
791{files_list}{constitution_section}
792
793Read the files listed above using your Read/Grep/Glob tools before forming each finding.
794Cite `file:line` in every finding that references code.
795Never write 'cannot be verified without viewing files' — view the files first.
796
797Review the changes and answer:
7981. Did the agent stay within the goal scope? (Only files directly needed for the objective should be modified)
7992. Are any changes surprising, potentially harmful, or out of scope?
8003. Does the work appear to satisfy the objective?
8014. If a constitution was provided, does the work comply with it?
802
803Respond with ONLY a JSON object (no markdown, no explanation):
804{{
805  "verdict": "pass" | "warn" | "block",
806  "scope_ok": true | false,
807  "findings": ["finding 1", "finding 2"],
808  "summary": "One sentence summary"
809}}
810
811Use:
812- "pass": Changes align well with the objective and constitution
813- "warn": Minor concerns (e.g., one extra file touched, or minor scope drift)
814- "block": Significant concerns (e.g., unrelated system files modified, or clear constitution violation)
815
816Keep findings concise (1-2 sentences each, max 5 findings)."#
817    )
818}
819
820/// Hedging phrases that indicate the supervisor did not read the staged files.
821const HEDGING_PHRASES: &[&str] = &[
822    "cannot be verified",
823    "unable to confirm",
824    "without viewing",
825    "depends on implementation",
826    "cannot verify",
827    "unable to verify",
828    "not possible to confirm",
829];
830
831/// Scan findings for hedging phrases that indicate the supervisor failed to read files.
832///
833/// Returns `true` if any finding contains a hedging phrase, and mutates the findings
834/// to append a meta-finding explaining what happened.
835pub(crate) fn apply_hedging_quality_gate(review: &mut SupervisorReview) -> bool {
836    let mut hedged = false;
837    for finding in &review.findings {
838        let lower = finding.to_lowercase();
839        if HEDGING_PHRASES.iter().any(|p| lower.contains(p)) {
840            hedged = true;
841            break;
842        }
843    }
844    if hedged {
845        if review.verdict == SupervisorVerdict::Pass {
846            review.verdict = SupervisorVerdict::Warn;
847        }
848        review.findings.push(
849            "Supervisor produced unverified finding — staging access may be missing or supervisor did not read the file.".to_string()
850        );
851    }
852    hedged
853}
854
855fn parse_supervisor_response(text: &str) -> anyhow::Result<SupervisorReview> {
856    // Try to extract JSON from the text (handle potential markdown wrapping).
857    let json_str = extract_json(text);
858
859    let parsed: LlmSupervisorResponse = serde_json::from_str(json_str).map_err(|e| {
860        anyhow::anyhow!(
861            "Failed to parse supervisor JSON: {} — response: {}",
862            e,
863            &text[..text.len().min(300)]
864        )
865    })?;
866
867    let verdict = match parsed.verdict.as_deref() {
868        Some("pass") => SupervisorVerdict::Pass,
869        Some("block") => SupervisorVerdict::Block,
870        _ => SupervisorVerdict::Warn, // Default to warn for unknown values
871    };
872
873    Ok(SupervisorReview {
874        verdict,
875        scope_ok: parsed.scope_ok.unwrap_or(true),
876        findings: parsed.findings.unwrap_or_default(),
877        summary: parsed
878            .summary
879            .unwrap_or_else(|| "No summary provided.".to_string()),
880        agent: "builtin".to_string(),
881        duration_secs: 0.0, // Will be overwritten by caller
882    })
883}
884
885/// Extract a JSON object from text that might contain markdown or prose.
886fn extract_json(text: &str) -> &str {
887    // Look for ```json ... ``` blocks
888    if let Some(start) = text.find("```json") {
889        let after = &text[start + 7..];
890        if let Some(end) = after.find("```") {
891            return after[..end].trim();
892        }
893    }
894    // Look for ``` ... ``` blocks
895    if let Some(start) = text.find("```") {
896        let after = &text[start + 3..];
897        if let Some(end) = after.find("```") {
898            return after[..end].trim();
899        }
900    }
901    // Look for { ... } directly
902    if let Some(start) = text.find('{') {
903        if let Some(end) = text.rfind('}') {
904            if end > start {
905                return &text[start..=end];
906            }
907        }
908    }
909    text.trim()
910}
911
912/// Load the constitution text from the configured path or common fallback locations.
913pub fn load_constitution(staging_path: &Path, config: &SupervisorRunConfig) -> Option<String> {
914    // Try the configured path first.
915    if let Some(ref path) = config.constitution_path {
916        let full = staging_path.join(path);
917        if full.exists() {
918            return std::fs::read_to_string(&full).ok();
919        }
920    }
921    // Fallback: .ta/constitution.yaml (preferred for YAML-formatted rule sets)
922    let yaml_path = staging_path.join(".ta/constitution.yaml");
923    if yaml_path.exists() {
924        return std::fs::read_to_string(&yaml_path).ok();
925    }
926    // Fallback: .ta/constitution.toml
927    let toml_path = staging_path.join(".ta/constitution.toml");
928    if toml_path.exists() {
929        return std::fs::read_to_string(&toml_path).ok();
930    }
931    // Fallback: docs/TA-CONSTITUTION.md
932    let md_path = staging_path.join("docs/TA-CONSTITUTION.md");
933    if md_path.exists() {
934        return std::fs::read_to_string(&md_path).ok();
935    }
936    None
937}
938
939#[cfg(test)]
940mod tests {
941    use super::*;
942
943    /// Mutex to serialize tests that mutate the global PATH environment variable.
944    /// Tests that create mock `claude` binaries and prepend a temp dir to PATH must
945    /// acquire this lock to prevent parallel races where the wrong mock binary is found.
946    #[cfg(unix)]
947    static PATH_MUTEX: std::sync::Mutex<()> = std::sync::Mutex::new(());
948
949    #[test]
950    fn test_build_supervisor_prompt_includes_objective() {
951        let prompt = build_supervisor_prompt(
952            "Add JWT authentication to the API",
953            &["src/auth.rs".to_string(), "src/middleware.rs".to_string()],
954            None,
955        );
956        assert!(prompt.contains("Add JWT authentication to the API"));
957        assert!(prompt.contains("src/auth.rs"));
958        assert!(prompt.contains("src/middleware.rs"));
959    }
960
961    #[test]
962    fn test_build_supervisor_prompt_includes_constitution() {
963        let prompt = build_supervisor_prompt(
964            "Fix bug in parser",
965            &["src/parser.rs".to_string()],
966            Some("Never modify production database directly."),
967        );
968        assert!(prompt.contains("Never modify production database directly."));
969    }
970
971    #[test]
972    fn test_build_supervisor_prompt_no_constitution() {
973        let prompt = build_supervisor_prompt("Fix bug", &["src/foo.rs".to_string()], None);
974        assert!(prompt.contains("not available — skip constitution check"));
975    }
976
977    #[test]
978    fn test_build_supervisor_prompt_empty_files() {
979        let prompt = build_supervisor_prompt("Fix bug", &[], None);
980        assert!(prompt.contains("no files changed"));
981    }
982
983    #[test]
984    fn test_supervisor_verdict_display() {
985        assert_eq!(SupervisorVerdict::Pass.to_string(), "pass");
986        assert_eq!(SupervisorVerdict::Warn.to_string(), "warn");
987        assert_eq!(SupervisorVerdict::Block.to_string(), "block");
988    }
989
990    #[test]
991    fn test_supervisor_verdict_serde() {
992        let v: SupervisorVerdict = serde_json::from_str("\"pass\"").unwrap();
993        assert_eq!(v, SupervisorVerdict::Pass);
994        let v: SupervisorVerdict = serde_json::from_str("\"block\"").unwrap();
995        assert_eq!(v, SupervisorVerdict::Block);
996        let v: SupervisorVerdict = serde_json::from_str("\"warn\"").unwrap();
997        assert_eq!(v, SupervisorVerdict::Warn);
998    }
999
1000    #[test]
1001    fn test_parse_supervisor_response_pass() {
1002        let json =
1003            r#"{"verdict": "pass", "scope_ok": true, "findings": [], "summary": "All good."}"#;
1004        let review = parse_supervisor_response(json).unwrap();
1005        assert_eq!(review.verdict, SupervisorVerdict::Pass);
1006        assert!(review.scope_ok);
1007        assert_eq!(review.summary, "All good.");
1008        assert!(review.findings.is_empty());
1009    }
1010
1011    #[test]
1012    fn test_parse_supervisor_response_with_findings() {
1013        let json = r#"{"verdict": "warn", "scope_ok": false, "findings": ["Extra file modified", "Consider removing debug code"], "summary": "Minor concerns."}"#;
1014        let review = parse_supervisor_response(json).unwrap();
1015        assert_eq!(review.verdict, SupervisorVerdict::Warn);
1016        assert!(!review.scope_ok);
1017        assert_eq!(review.findings.len(), 2);
1018    }
1019
1020    #[test]
1021    fn test_parse_supervisor_response_markdown_wrapped() {
1022        let text = "Here is the review:\n```json\n{\"verdict\": \"pass\", \"scope_ok\": true, \"findings\": [], \"summary\": \"LGTM.\"}\n```";
1023        let review = parse_supervisor_response(text).unwrap();
1024        assert_eq!(review.verdict, SupervisorVerdict::Pass);
1025    }
1026
1027    #[test]
1028    fn test_parse_supervisor_response_unknown_verdict_falls_back_to_warn() {
1029        let json =
1030            r#"{"verdict": "unclear", "scope_ok": true, "findings": [], "summary": "Not sure."}"#;
1031        let review = parse_supervisor_response(json).unwrap();
1032        assert_eq!(review.verdict, SupervisorVerdict::Warn);
1033    }
1034
1035    #[test]
1036    fn test_parse_supervisor_response_block() {
1037        let json = r#"{"verdict": "block", "scope_ok": false, "findings": ["Modified unrelated system files"], "summary": "Significant scope violation."}"#;
1038        let review = parse_supervisor_response(json).unwrap();
1039        assert_eq!(review.verdict, SupervisorVerdict::Block);
1040        assert!(!review.scope_ok);
1041    }
1042
1043    #[test]
1044    fn test_extract_json_backtick_block() {
1045        let text = "Some prose\n```json\n{\"key\": \"value\"}\n```\nMore prose";
1046        let extracted = extract_json(text);
1047        assert_eq!(extracted, "{\"key\": \"value\"}");
1048    }
1049
1050    #[test]
1051    fn test_extract_json_plain() {
1052        let text = "{\"verdict\": \"pass\"}";
1053        let extracted = extract_json(text);
1054        assert_eq!(extracted, "{\"verdict\": \"pass\"}");
1055    }
1056
1057    #[test]
1058    fn test_fallback_supervisor_review_structure() {
1059        // Validates the structure of the fallback review returned when supervisor fails.
1060        let fallback = fallback_supervisor_review("builtin", "ANTHROPIC_API_KEY not set", 0.001);
1061        assert_eq!(fallback.verdict, SupervisorVerdict::Warn);
1062        assert!(fallback.scope_ok);
1063        assert!(!fallback.findings.is_empty());
1064        assert_eq!(fallback.agent, "builtin");
1065    }
1066
1067    #[test]
1068    fn test_extract_claude_stream_json_result_event() {
1069        // Stream-json with a result event containing the verdict JSON.
1070        let stream = r#"{"type":"system","subtype":"init"}
1071{"type":"assistant","message":{"content":[{"type":"text","text":"Analyzing..."}]}}
1072{"type":"result","subtype":"success","result":"{\"verdict\":\"pass\",\"scope_ok\":true,\"findings\":[],\"summary\":\"All good.\"}"}
1073"#;
1074        let text = extract_claude_stream_json_text(stream);
1075        assert!(text.contains("verdict"));
1076        assert!(text.contains("pass"));
1077    }
1078
1079    #[test]
1080    fn test_extract_claude_stream_json_fallback_to_assistant() {
1081        // No result event — should fall back to assistant message content.
1082        let stream = r#"{"type":"system","subtype":"init"}
1083{"type":"assistant","message":{"content":[{"type":"text","text":"{\"verdict\":\"warn\",\"scope_ok\":true,\"findings\":[],\"summary\":\"Minor issue.\"}"}]}}
1084"#;
1085        let text = extract_claude_stream_json_text(stream);
1086        assert!(text.contains("verdict"));
1087    }
1088
1089    #[test]
1090    fn test_parse_supervisor_response_or_text_plain_text() {
1091        // Plain text fallback: no JSON → warn verdict with text as summary.
1092        let text = "The changes look fine overall but one extra file was touched.";
1093        let review = parse_supervisor_response_or_text(text, "codex");
1094        assert_eq!(review.verdict, SupervisorVerdict::Warn);
1095        assert_eq!(review.agent, "codex");
1096        assert!(review.summary.contains("extra file"));
1097    }
1098
1099    #[test]
1100    fn test_parse_supervisor_response_or_text_structured_json() {
1101        let text = r#"{"verdict": "pass", "scope_ok": true, "findings": [], "summary": "LGTM."}"#;
1102        let review = parse_supervisor_response_or_text(text, "claude-code");
1103        assert_eq!(review.verdict, SupervisorVerdict::Pass);
1104        assert_eq!(review.agent, "claude-code");
1105    }
1106
1107    #[test]
1108    fn test_invoke_supervisor_agent_api_key_preflight_fails() {
1109        // When api_key_env is set and the var is missing, should fall back to warn immediately.
1110        let config = SupervisorRunConfig {
1111            enabled: true,
1112            agent: "codex".to_string(),
1113            verdict_on_block: "warn".to_string(),
1114            constitution_path: None,
1115            skip_if_no_constitution: true,
1116            heartbeat_stale_secs: 30,
1117            timeout_secs: 30,
1118            api_key_env: Some("TA_TEST_MISSING_KEY_XYZ_SUPERVISOR".to_string()),
1119            staging_path: None,
1120            heartbeat_path: None,
1121            agent_profile: None,
1122            resolved_model: None,
1123            enable_hooks: false,
1124        };
1125        // Ensure the env var is not set.
1126        std::env::remove_var("TA_TEST_MISSING_KEY_XYZ_SUPERVISOR");
1127        let review = invoke_supervisor_agent("test objective", &[], None, &config);
1128        assert_eq!(review.verdict, SupervisorVerdict::Warn);
1129        assert!(review.findings[0].contains("TA_TEST_MISSING_KEY_XYZ_SUPERVISOR"));
1130    }
1131
1132    #[test]
1133    fn test_heartbeat_written_per_chunk() {
1134        use tempfile::tempdir;
1135        // Use `echo` to produce output — available on all Unix-like systems.
1136        let dir = tempdir().unwrap();
1137        let hb_path = dir.path().join("supervisor.heartbeat");
1138
1139        // Ensure the heartbeat file doesn't exist before the call.
1140        assert!(!hb_path.exists());
1141
1142        // spawn_with_heartbeat_monitor with `echo` — produces one line of output.
1143        let result = spawn_with_heartbeat_monitor(
1144            "echo",
1145            &["heartbeat_test"],
1146            30, // stale_secs — won't trigger for a fast echo
1147            Some(hb_path.as_path()),
1148            "echo",
1149            None,
1150            &[],
1151            None,
1152        );
1153        // echo exits 0 so result is Ok.
1154        assert!(result.is_ok(), "echo should succeed: {:?}", result);
1155        let stdout = result.unwrap();
1156        assert!(stdout.contains("heartbeat_test"));
1157        // Heartbeat file is cleaned up on completion.
1158        assert!(
1159            !hb_path.exists(),
1160            "heartbeat file should be removed after completion"
1161        );
1162    }
1163
1164    #[test]
1165    fn test_monitor_kills_stalled_process() {
1166        use tempfile::tempdir;
1167        let dir = tempdir().unwrap();
1168        let hb_path = dir.path().join("supervisor_stall.heartbeat");
1169
1170        // Use `sleep 60` to simulate a process that produces no output.
1171        // stale_secs = 1 so it should be killed almost immediately.
1172        let result = spawn_with_heartbeat_monitor(
1173            "sleep",
1174            &["60"],
1175            1, // stale_secs — kill after 1s of no output
1176            Some(hb_path.as_path()),
1177            "sleep",
1178            None,
1179            &[],
1180            None,
1181        );
1182        assert!(result.is_err(), "stalled process should be killed");
1183        let err = result.unwrap_err().to_string();
1184        assert!(
1185            err.contains("stalled") || err.contains("no tokens"),
1186            "error should mention stall: {}",
1187            err
1188        );
1189        // Heartbeat file should be cleaned up.
1190        assert!(
1191            !hb_path.exists(),
1192            "heartbeat file should be removed after stall"
1193        );
1194    }
1195
1196    #[test]
1197    fn test_active_streaming_not_killed() {
1198        // A process that produces output frequently should NOT be killed.
1199        // We use a shell command to print multiple lines with small delays.
1200        // stale_secs = 5 (generous), process completes fast.
1201        let result = spawn_with_heartbeat_monitor(
1202            "sh",
1203            &["-c", "echo line1 && echo line2 && echo line3"],
1204            5,
1205            None, // no heartbeat file needed
1206            "sh",
1207            None,
1208            &[],
1209            None,
1210        );
1211        assert!(
1212            result.is_ok(),
1213            "fast-completing process should not be killed: {:?}",
1214            result
1215        );
1216        let stdout = result.unwrap();
1217        assert!(stdout.contains("line1"));
1218        assert!(stdout.contains("line3"));
1219    }
1220
1221    #[test]
1222    fn test_timeout_secs_field_preserved() {
1223        // timeout_secs is preserved for backward compat — verify it doesn't break construction.
1224        let config = SupervisorRunConfig {
1225            enabled: true,
1226            agent: "builtin".to_string(),
1227            verdict_on_block: "warn".to_string(),
1228            constitution_path: None,
1229            skip_if_no_constitution: true,
1230            heartbeat_stale_secs: 30,
1231            timeout_secs: 120, // deprecated alias — must still be accepted
1232            api_key_env: None,
1233            staging_path: None,
1234            heartbeat_path: None,
1235            agent_profile: None,
1236            resolved_model: None,
1237            enable_hooks: false,
1238        };
1239        assert_eq!(config.heartbeat_stale_secs, 30);
1240        assert_eq!(config.timeout_secs, 120);
1241    }
1242
1243    #[test]
1244    fn test_stall_message_includes_partial_output() {
1245        use tempfile::tempdir;
1246        let dir = tempdir().unwrap();
1247        let hb_path = dir.path().join("stall_partial.heartbeat");
1248
1249        // Use a shell command: print some output then hang.
1250        // stale_secs = 1.
1251        let result = spawn_with_heartbeat_monitor(
1252            "sh",
1253            &["-c", "echo partial_finding && sleep 60"],
1254            1,
1255            Some(hb_path.as_path()),
1256            "sh",
1257            None,
1258            &[],
1259            None,
1260        );
1261        assert!(result.is_err());
1262        let err = result.unwrap_err().to_string();
1263        // The stall message should include the partial output captured before stall.
1264        assert!(
1265            err.contains("partial_finding") || err.contains("Findings so far"),
1266            "stall error should include partial output: {}",
1267            err
1268        );
1269    }
1270
1271    #[test]
1272    fn test_invoke_supervisor_agent_custom_agent_no_staging_path() {
1273        // Custom agent with no staging_path → fallback to warn.
1274        let config = SupervisorRunConfig {
1275            enabled: true,
1276            agent: "my-custom-reviewer".to_string(),
1277            verdict_on_block: "warn".to_string(),
1278            constitution_path: None,
1279            skip_if_no_constitution: true,
1280            heartbeat_stale_secs: 30,
1281            timeout_secs: 30,
1282            api_key_env: None,
1283            staging_path: None,
1284            heartbeat_path: None,
1285            agent_profile: None,
1286            resolved_model: None,
1287            enable_hooks: false,
1288        };
1289        let review = invoke_supervisor_agent("test objective", &[], None, &config);
1290        assert_eq!(review.verdict, SupervisorVerdict::Warn);
1291    }
1292
1293    #[test]
1294    fn test_fallback_review_no_api_key_message() {
1295        // Structure test: fallback review should reference the missing env var.
1296        let config = SupervisorRunConfig {
1297            enabled: true,
1298            agent: "codex".to_string(),
1299            verdict_on_block: "warn".to_string(),
1300            constitution_path: None,
1301            skip_if_no_constitution: true,
1302            heartbeat_stale_secs: 30,
1303            timeout_secs: 30,
1304            api_key_env: Some("OPENAI_API_KEY".to_string()),
1305            staging_path: None,
1306            heartbeat_path: None,
1307            agent_profile: None,
1308            resolved_model: None,
1309            enable_hooks: false,
1310        };
1311        std::env::remove_var("OPENAI_API_KEY");
1312        let review = invoke_supervisor_agent("objective", &[], None, &config);
1313        assert_eq!(review.verdict, SupervisorVerdict::Warn);
1314        assert!(
1315            review.findings[0].contains("OPENAI_API_KEY"),
1316            "finding should mention the missing env var"
1317        );
1318    }
1319
1320    /// Verify that the claude CLI invocation includes `--verbose`.
1321    ///
1322    /// Creates a mock `claude` script on PATH that exits with an error if `--verbose` is absent
1323    /// and emits a plain-text JSON pass verdict if `--verbose` is present.  The supervisor
1324    /// picks up the JSON via its raw-stdout fallback path.  The test fails if the verdict is
1325    /// not `pass`, which would mean `--verbose` was dropped (mock exits 1, returns fallback Warn).
1326    #[test]
1327    #[cfg(unix)]
1328    fn test_claude_cli_supervisor_passes_verbose_flag() {
1329        use std::io::Write;
1330        use std::os::unix::fs::PermissionsExt;
1331
1332        let tmp = tempfile::tempdir().unwrap();
1333        let claude_path = tmp.path().join("claude");
1334        {
1335            let mut f = std::fs::File::create(&claude_path).unwrap();
1336            // The script checks that --verbose and --allowedTools are in $@ and emits a plain JSON verdict.
1337            // Using echo so there are no printf escaping issues with nested JSON.
1338            f.write_all(
1339                b"#!/bin/sh\n\
1340                  found_verbose=''\n\
1341                  found_tools=''\n\
1342                  for arg in \"$@\"; do\n\
1343                    [ \"$arg\" = \"--verbose\" ] && found_verbose=1\n\
1344                    [ \"$arg\" = \"--allowedTools\" ] && found_tools=1\n\
1345                  done\n\
1346                  if [ -z \"$found_verbose\" ]; then echo 'Error: --verbose missing' >&2; exit 1; fi\n\
1347                  if [ -z \"$found_tools\" ]; then echo 'Error: --allowedTools missing' >&2; exit 1; fi\n\
1348                  echo '{\"verdict\":\"pass\",\"scope_ok\":true,\"findings\":[],\"summary\":\"ok\"}'\n",
1349            )
1350            .unwrap();
1351        }
1352        let mut perms = std::fs::metadata(&claude_path).unwrap().permissions();
1353        perms.set_mode(0o755);
1354        std::fs::set_permissions(&claude_path, perms).unwrap();
1355
1356        let _lock = PATH_MUTEX.lock().unwrap();
1357        let old_path = std::env::var("PATH").unwrap_or_default();
1358        // Prepend temp dir so our mock `claude` takes precedence.
1359        std::env::set_var("PATH", format!("{}:{}", tmp.path().display(), old_path));
1360
1361        let config = SupervisorRunConfig {
1362            enabled: true,
1363            agent: "builtin".to_string(),
1364            verdict_on_block: "warn".to_string(),
1365            constitution_path: None,
1366            skip_if_no_constitution: true,
1367            heartbeat_stale_secs: 10,
1368            timeout_secs: 10,
1369            api_key_env: None,
1370            staging_path: None,
1371            heartbeat_path: None,
1372            agent_profile: None,
1373            resolved_model: None,
1374            enable_hooks: false,
1375        };
1376
1377        let review = invoke_supervisor_agent("test objective", &[], None, &config);
1378
1379        // Restore PATH before any assertions that might panic.
1380        std::env::set_var("PATH", old_path);
1381
1382        assert_eq!(
1383            review.verdict,
1384            SupervisorVerdict::Pass,
1385            "Supervisor must pass --verbose to claude CLI; got findings: {:?}",
1386            review.findings
1387        );
1388    }
1389
1390    #[test]
1391    fn test_build_supervisor_prompt_includes_file_inspection_instruction() {
1392        let prompt =
1393            build_supervisor_prompt("Add JWT authentication", &["src/auth.rs".to_string()], None);
1394        assert!(
1395            prompt.contains("Read"),
1396            "prompt must instruct supervisor to read files"
1397        );
1398        assert!(
1399            prompt.contains("file:line") || prompt.contains("file:"),
1400            "prompt must require file:line citations"
1401        );
1402        assert!(
1403            prompt.contains("cannot be verified") || prompt.contains("Never write"),
1404            "prompt must ban hedging phrases"
1405        );
1406    }
1407
1408    #[test]
1409    fn test_hedging_quality_gate_fires_on_hedging_phrase() {
1410        let mut review = SupervisorReview {
1411            verdict: SupervisorVerdict::Pass,
1412            scope_ok: true,
1413            findings: vec![
1414                "This change cannot be verified without viewing the actual file contents."
1415                    .to_string(),
1416            ],
1417            summary: "Looks fine.".to_string(),
1418            agent: "claude-code".to_string(),
1419            duration_secs: 0.0,
1420        };
1421        let fired = apply_hedging_quality_gate(&mut review);
1422        assert!(fired, "quality gate should fire on 'cannot be verified'");
1423        assert_eq!(
1424            review.verdict,
1425            SupervisorVerdict::Warn,
1426            "verdict should be upgraded to Warn"
1427        );
1428        assert!(
1429            review
1430                .findings
1431                .last()
1432                .unwrap()
1433                .contains("Supervisor produced unverified finding"),
1434            "meta-finding should be appended"
1435        );
1436    }
1437
1438    #[test]
1439    fn test_hedging_quality_gate_no_fire_on_clean_findings() {
1440        let mut review = SupervisorReview {
1441            verdict: SupervisorVerdict::Pass,
1442            scope_ok: true,
1443            findings: vec![
1444                "src/auth.rs:42: JWT secret is not rotated — consider adding rotation logic."
1445                    .to_string(),
1446            ],
1447            summary: "One finding.".to_string(),
1448            agent: "claude-code".to_string(),
1449            duration_secs: 0.0,
1450        };
1451        let fired = apply_hedging_quality_gate(&mut review);
1452        assert!(
1453            !fired,
1454            "quality gate should not fire on clean file:line findings"
1455        );
1456        assert_eq!(review.verdict, SupervisorVerdict::Pass);
1457    }
1458
1459    #[test]
1460    fn test_hedging_quality_gate_preserves_block_verdict() {
1461        let mut review = SupervisorReview {
1462            verdict: SupervisorVerdict::Block,
1463            scope_ok: false,
1464            findings: vec![
1465                "Unable to confirm whether the migration is reversible without viewing migration files.".to_string(),
1466            ],
1467            summary: "Block.".to_string(),
1468            agent: "claude-code".to_string(),
1469            duration_secs: 0.0,
1470        };
1471        apply_hedging_quality_gate(&mut review);
1472        // Block should not be downgraded — only Pass is upgraded to Warn
1473        assert_eq!(
1474            review.verdict,
1475            SupervisorVerdict::Block,
1476            "Block verdict must not be changed"
1477        );
1478    }
1479
1480    #[test]
1481    fn test_supervisor_run_config_agent_profile_field() {
1482        let config = SupervisorRunConfig {
1483            enabled: true,
1484            agent: "builtin".to_string(),
1485            verdict_on_block: "warn".to_string(),
1486            constitution_path: None,
1487            skip_if_no_constitution: true,
1488            heartbeat_stale_secs: 30,
1489            timeout_secs: 30,
1490            api_key_env: None,
1491            staging_path: None,
1492            heartbeat_path: None,
1493            agent_profile: Some("supervisor".to_string()),
1494            resolved_model: Some("claude-sonnet-4-6".to_string()),
1495            enable_hooks: false,
1496        };
1497        assert_eq!(config.agent_profile.as_deref(), Some("supervisor"));
1498        assert_eq!(config.resolved_model.as_deref(), Some("claude-sonnet-4-6"));
1499    }
1500
1501    #[cfg(unix)]
1502    #[test]
1503    fn test_claude_supervisor_sets_current_dir_in_staging() {
1504        use std::io::Write;
1505        use std::os::unix::fs::PermissionsExt;
1506        // Create a staging dir with a sentinel file so we can verify cwd.
1507        let staging = tempfile::tempdir().unwrap();
1508        let sentinel = staging.path().join("STAGING_SENTINEL.txt");
1509        std::fs::write(&sentinel, b"yes").unwrap();
1510
1511        // Create a mock `claude` that checks if STAGING_SENTINEL.txt exists in its cwd.
1512        let bin_dir = tempfile::tempdir().unwrap();
1513        let claude_path = bin_dir.path().join("claude");
1514        {
1515            let mut f = std::fs::File::create(&claude_path).unwrap();
1516            f.write_all(
1517                b"#!/bin/sh\n\
1518                  if [ ! -f STAGING_SENTINEL.txt ]; then\n\
1519                    echo 'Error: not running in staging dir' >&2; exit 1\n\
1520                  fi\n\
1521                  echo '{\"verdict\":\"pass\",\"scope_ok\":true,\"findings\":[],\"summary\":\"staging ok\"}'\n",
1522            )
1523            .unwrap();
1524        }
1525        let mut perms = std::fs::metadata(&claude_path).unwrap().permissions();
1526        perms.set_mode(0o755);
1527        std::fs::set_permissions(&claude_path, perms).unwrap();
1528
1529        let _lock = PATH_MUTEX.lock().unwrap();
1530        let old_path = std::env::var("PATH").unwrap_or_default();
1531        std::env::set_var("PATH", format!("{}:{}", bin_dir.path().display(), old_path));
1532
1533        let config = SupervisorRunConfig {
1534            enabled: true,
1535            agent: "builtin".to_string(),
1536            verdict_on_block: "warn".to_string(),
1537            constitution_path: None,
1538            skip_if_no_constitution: true,
1539            heartbeat_stale_secs: 10,
1540            timeout_secs: 10,
1541            api_key_env: None,
1542            staging_path: Some(staging.path().to_path_buf()),
1543            heartbeat_path: None,
1544            agent_profile: None,
1545            resolved_model: None,
1546            enable_hooks: false,
1547        };
1548
1549        let review = invoke_supervisor_agent("test objective", &[], None, &config);
1550        std::env::set_var("PATH", old_path);
1551
1552        assert_eq!(
1553            review.verdict,
1554            SupervisorVerdict::Pass,
1555            "Supervisor must run in staging dir; got findings: {:?}",
1556            review.findings
1557        );
1558    }
1559
1560    // ── v0.15.14.6 — Supervisor Hook JSON Filtering ───────────────────────
1561
1562    #[test]
1563    fn test_is_hook_json_line_detects_system_type() {
1564        // SessionStart hook JSON that fires before supervisor content.
1565        let hook_line = r#"{"type":"system","subtype":"hook_started","hook_name":"SessionStart"}"#;
1566        assert!(
1567            is_hook_json_line(hook_line),
1568            "SessionStart hook JSON must be detected"
1569        );
1570    }
1571
1572    #[test]
1573    fn test_is_hook_json_line_ignores_non_system_type() {
1574        // Real supervisor content should NOT be filtered.
1575        let result_line =
1576            r#"{"type":"result","subtype":"success","result":"{\"verdict\":\"pass\"}"}"#;
1577        assert!(
1578            !is_hook_json_line(result_line),
1579            "result event must not be filtered"
1580        );
1581
1582        let assistant_line =
1583            r#"{"type":"assistant","message":{"content":[{"type":"text","text":"hi"}]}}"#;
1584        assert!(
1585            !is_hook_json_line(assistant_line),
1586            "assistant event must not be filtered"
1587        );
1588    }
1589
1590    #[test]
1591    fn test_is_hook_json_line_ignores_plain_text() {
1592        assert!(!is_hook_json_line("some plain output"));
1593        assert!(!is_hook_json_line(""));
1594        assert!(!is_hook_json_line("not json at all"));
1595    }
1596
1597    #[test]
1598    fn test_is_hook_json_line_ignores_non_json_braces() {
1599        // A line that starts with { but is not valid JSON should not crash.
1600        assert!(!is_hook_json_line("{not valid json}"));
1601    }
1602
1603    /// Hook JSON lines must NOT be counted as heartbeat tokens and must NOT appear in
1604    /// the returned stdout string.
1605    #[cfg(unix)]
1606    #[test]
1607    fn test_hook_json_line_filtered_from_output() {
1608        // Emit a hook JSON line followed by a real content line.
1609        // The monitor should filter the hook line and return only the content line.
1610        let hook_json = r#"{"type":"system","subtype":"hook_started","hook_name":"SessionStart"}"#;
1611        let real_content = r#"{"type":"result","result":"done"}"#;
1612        let script = format!("echo '{}' && echo '{}'", hook_json, real_content);
1613
1614        let result =
1615            spawn_with_heartbeat_monitor("sh", &["-c", &script], 5, None, "sh", None, &[], None);
1616        assert!(result.is_ok(), "process should succeed: {:?}", result);
1617        let stdout = result.unwrap();
1618        // Hook line must be excluded from output.
1619        assert!(
1620            !stdout.contains("hook_started"),
1621            "hook JSON must not appear in output: {}",
1622            stdout
1623        );
1624        // Real content must be present.
1625        assert!(
1626            stdout.contains("result"),
1627            "real content must be in output: {}",
1628            stdout
1629        );
1630    }
1631
1632    /// A stream consisting only of hook JSON lines should still trigger a stall — the
1633    /// stall timer must NOT be reset by hook lines.
1634    #[cfg(unix)]
1635    #[test]
1636    fn test_only_hook_json_lines_triggers_stall() {
1637        // Print a hook JSON line then hang for 60s.
1638        // stale_secs=1 — should stall because the hook line is filtered.
1639        let hook_json = r#"{"type":"system","subtype":"hook_started","hook_name":"SessionStart"}"#;
1640        let script = format!("echo '{}' && sleep 60", hook_json);
1641
1642        let result = spawn_with_heartbeat_monitor(
1643            "sh",
1644            &["-c", &script],
1645            1, // stale_secs — very short so the test is fast
1646            None,
1647            "sh",
1648            None,
1649            &[],
1650            None,
1651        );
1652        assert!(
1653            result.is_err(),
1654            "stream of only hook JSON should trigger stall"
1655        );
1656        let err = result.unwrap_err().to_string();
1657        assert!(
1658            err.contains("stalled") || err.contains("no tokens"),
1659            "stall error expected: {}",
1660            err
1661        );
1662    }
1663
1664    /// CLAUDE_CODE_DISABLE_HOOKS env var must be set in the supervisor subprocess env
1665    /// when enable_hooks is false.
1666    #[cfg(unix)]
1667    #[test]
1668    fn test_disable_hooks_env_var_set_when_enable_hooks_false() {
1669        use std::io::Write;
1670        use std::os::unix::fs::PermissionsExt;
1671
1672        let tmp = tempfile::tempdir().unwrap();
1673        let claude_path = tmp.path().join("claude");
1674        {
1675            let mut f = std::fs::File::create(&claude_path).unwrap();
1676            // Script checks that CLAUDE_CODE_DISABLE_HOOKS=1 is set.
1677            f.write_all(
1678                b"#!/bin/sh\n\
1679                  if [ \"$CLAUDE_CODE_DISABLE_HOOKS\" = \"1\" ]; then\n\
1680                    echo '{\"verdict\":\"pass\",\"scope_ok\":true,\"findings\":[],\"summary\":\"hooks disabled\"}'\n\
1681                  else\n\
1682                    echo '{\"verdict\":\"block\",\"scope_ok\":false,\"findings\":[\"CLAUDE_CODE_DISABLE_HOOKS not set\"],\"summary\":\"hooks not disabled\"}'\n\
1683                  fi\n",
1684            )
1685            .unwrap();
1686        }
1687        let mut perms = std::fs::metadata(&claude_path).unwrap().permissions();
1688        perms.set_mode(0o755);
1689        std::fs::set_permissions(&claude_path, perms).unwrap();
1690
1691        let _lock = PATH_MUTEX.lock().unwrap();
1692        let old_path = std::env::var("PATH").unwrap_or_default();
1693        std::env::set_var("PATH", format!("{}:{}", tmp.path().display(), old_path));
1694
1695        let config = SupervisorRunConfig {
1696            enabled: true,
1697            agent: "builtin".to_string(),
1698            verdict_on_block: "warn".to_string(),
1699            constitution_path: None,
1700            skip_if_no_constitution: true,
1701            heartbeat_stale_secs: 10,
1702            timeout_secs: 10,
1703            api_key_env: None,
1704            staging_path: None,
1705            heartbeat_path: None,
1706            agent_profile: None,
1707            resolved_model: None,
1708            enable_hooks: false, // hooks should be suppressed
1709        };
1710
1711        let review = invoke_supervisor_agent("test objective", &[], None, &config);
1712        std::env::set_var("PATH", old_path);
1713
1714        assert_eq!(
1715            review.verdict,
1716            SupervisorVerdict::Pass,
1717            "CLAUDE_CODE_DISABLE_HOOKS=1 must be set when enable_hooks=false; got: {:?}",
1718            review.findings
1719        );
1720    }
1721
1722    /// When enable_hooks=true, CLAUDE_CODE_DISABLE_HOOKS must NOT be set.
1723    #[cfg(unix)]
1724    #[test]
1725    fn test_enable_hooks_true_does_not_set_disable_env() {
1726        use std::io::Write;
1727        use std::os::unix::fs::PermissionsExt;
1728
1729        let tmp = tempfile::tempdir().unwrap();
1730        let claude_path = tmp.path().join("claude");
1731        {
1732            let mut f = std::fs::File::create(&claude_path).unwrap();
1733            // Script checks that CLAUDE_CODE_DISABLE_HOOKS is NOT set to "1".
1734            f.write_all(
1735                b"#!/bin/sh\n\
1736                  if [ \"$CLAUDE_CODE_DISABLE_HOOKS\" = \"1\" ]; then\n\
1737                    echo '{\"verdict\":\"block\",\"scope_ok\":false,\"findings\":[\"CLAUDE_CODE_DISABLE_HOOKS was set unexpectedly\"],\"summary\":\"fail\"}'\n\
1738                  else\n\
1739                    echo '{\"verdict\":\"pass\",\"scope_ok\":true,\"findings\":[],\"summary\":\"hooks allowed\"}'\n\
1740                  fi\n",
1741            )
1742            .unwrap();
1743        }
1744        let mut perms = std::fs::metadata(&claude_path).unwrap().permissions();
1745        perms.set_mode(0o755);
1746        std::fs::set_permissions(&claude_path, perms).unwrap();
1747
1748        let _lock = PATH_MUTEX.lock().unwrap();
1749        let old_path = std::env::var("PATH").unwrap_or_default();
1750        // Make sure the env var is cleared in the parent too.
1751        std::env::remove_var("CLAUDE_CODE_DISABLE_HOOKS");
1752        std::env::set_var("PATH", format!("{}:{}", tmp.path().display(), old_path));
1753
1754        let config = SupervisorRunConfig {
1755            enabled: true,
1756            agent: "builtin".to_string(),
1757            verdict_on_block: "warn".to_string(),
1758            constitution_path: None,
1759            skip_if_no_constitution: true,
1760            heartbeat_stale_secs: 10,
1761            timeout_secs: 10,
1762            api_key_env: None,
1763            staging_path: None,
1764            heartbeat_path: None,
1765            agent_profile: None,
1766            resolved_model: None,
1767            enable_hooks: true, // hooks explicitly enabled — must NOT set DISABLE var
1768        };
1769
1770        let review = invoke_supervisor_agent("test objective", &[], None, &config);
1771        std::env::set_var("PATH", old_path);
1772
1773        assert_eq!(
1774            review.verdict,
1775            SupervisorVerdict::Pass,
1776            "CLAUDE_CODE_DISABLE_HOOKS must not be set when enable_hooks=true; got: {:?}",
1777            review.findings
1778        );
1779    }
1780}
ta_changeset/supervisor_review.rs

ta_changeset/
supervisor_review.rs