car-external-agents 0.25.0

Detection of installed agentic CLIs (Claude Code, Codex, Gemini) for the Common Agent Runtime.
//! Per-tool health checks — ground truth for "is this CLI configured
//! and usable right now."
//!
//! Replaces the Phase 1 credential-file shape heuristic. Each adapter
//! delegates to the tool's own auth-status command:
//!
//! - **Claude Code:** `claude auth status` — returns structured JSON
//!   (`{loggedIn, authMethod, subscriptionType, ...}`).
//! - **Codex:** `codex login status` — returns plain text ("Logged
//!   in using ChatGPT" / "Logged in using API key" / "Not logged in").
//! - **Gemini:** no safe headless status command (running the binary
//!   without args triggers a browser OAuth flow). Falls back to
//!   credential-file shape inspection.
//!
//! Health-check spawn-and-parse takes ~hundreds of ms per tool, so
//! a 30s in-memory TTL cache prevents callers from hammering the
//! probe in tight invocation loops. Force-refresh with
//! [`check_one`] / [`check_all`] passing `force = true`.

use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use std::sync::OnceLock;
use std::time::Duration;
use tokio::sync::Mutex;

use crate::adapters::{self, Adapter};
use crate::types::{AdapterId, ExternalAgentSpec};

const HEALTH_PROBE_TIMEOUT: Duration = Duration::from_secs(5);
const CACHE_TTL_SECS: u64 = 30;

/// Ground-truth health bucket for one external agent.
///
/// `Ready` is the only state that justifies invoking the tool;
/// every other state should be surfaced to the user with the
/// adapter's `reason` field so they know what's wrong.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum HealthStatus {
    /// Tool is authenticated and the status command succeeded.
    /// Invocations should work modulo transient network issues.
    Ready,
    /// Tool is installed but no credentials present. The user
    /// needs to run the tool's login flow.
    NotConfigured,
    /// Credentials present but rejected (revoked token, expired
    /// OAuth, etc.). The user needs to re-authenticate.
    Expired,
    /// Status command failed for transport reasons (network down,
    /// vendor service unreachable). Retry later.
    NetworkError,
    /// Probe didn't return enough information to classify, or the
    /// tool doesn't expose a safe headless status command (Gemini).
    /// Don't make trust decisions on this — fall back to the
    /// Phase 1 credential-file heuristic.
    #[default]
    Unknown,
}

/// Health-check result for one external agent.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExternalAgentHealth {
    /// Adapter id (`"claude-code"`, `"codex"`, `"gemini"`).
    pub id: String,
    /// Bucket — see [`HealthStatus`].
    pub status: HealthStatus,
    /// Tool-specific structured details parsed from the status
    /// command output. Shape varies per adapter; consumers should
    /// treat unknown fields as opaque. Empty object when the tool
    /// doesn't expose structured status.
    #[serde(default)]
    pub details: Value,
    /// Free-form human-readable reason. Populated when the bucket
    /// isn't `Ready` so the user has something actionable.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub reason: Option<String>,
    /// UNIX seconds when the check ran.
    pub checked_at: u64,
}

fn now_secs() -> u64 {
    std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .map(|d| d.as_secs())
        .unwrap_or(0)
}

fn cache() -> &'static Mutex<HashMap<String, ExternalAgentHealth>> {
    static CACHE: OnceLock<Mutex<HashMap<String, ExternalAgentHealth>>> = OnceLock::new();
    CACHE.get_or_init(|| Mutex::new(HashMap::new()))
}

/// Run health checks for every adapter that's currently detected on
/// `$PATH`. Pass `force = true` to bypass the 30s TTL cache and
/// re-run every probe.
///
/// Order matches `detect()` — alphabetical by `id`. Adapters whose
/// binary isn't installed are omitted from the result, same as
/// detection.
pub async fn check_all(detected: &[ExternalAgentSpec], force: bool) -> Vec<ExternalAgentHealth> {
    let mut out = Vec::with_capacity(detected.len());
    for spec in detected {
        let adapter_id = match spec.id.as_str() {
            "claude-code" => AdapterId::ClaudeCode,
            "codex" => AdapterId::Codex,
            "gemini" => AdapterId::Gemini,
            _ => continue,
        };
        let adapter = adapters::all()
            .iter()
            .find(|a| a.id == adapter_id)
            .expect("adapter id from detection must exist");
        out.push(check_one_inner(adapter, &spec.binary_path, force).await);
    }
    out
}

/// Run a health check for a single adapter id. Returns `None` when
/// the id is not a known adapter or its binary isn't installed.
pub async fn check_one(id: &str, force: bool) -> Option<ExternalAgentHealth> {
    let detected = crate::detect().await;
    let spec = detected.iter().find(|s| s.id == id)?;
    let adapter_id = match spec.id.as_str() {
        "claude-code" => AdapterId::ClaudeCode,
        "codex" => AdapterId::Codex,
        "gemini" => AdapterId::Gemini,
        _ => return None,
    };
    let adapter = adapters::all()
        .iter()
        .find(|a| a.id == adapter_id)
        .expect("adapter id from detection must exist");
    Some(check_one_inner(adapter, &spec.binary_path, force).await)
}

async fn check_one_inner(
    adapter: &Adapter,
    binary_path: &std::path::Path,
    force: bool,
) -> ExternalAgentHealth {
    let id = adapter.id.as_str().to_string();
    if !force {
        let guard = cache().lock().await;
        if let Some(cached) = guard.get(&id) {
            if now_secs().saturating_sub(cached.checked_at) < CACHE_TTL_SECS {
                return cached.clone();
            }
        }
    }
    let result = (adapter.health_check)(binary_path).await;
    let mut guard = cache().lock().await;
    guard.insert(id, result.clone());
    result
}

// --- per-adapter health-check implementations -----------------------

/// Run `<bin> <args>` with the global health-probe timeout, return
/// the captured stdout + stderr + exit code. Stderr is preserved
/// because some tools (codex) write status to stderr.
pub(crate) async fn run_status_probe(
    bin: &std::path::Path,
    args: &[&str],
) -> Result<(String, String, i32), String> {
    use tokio::process::Command;
    let mut cmd = Command::new(bin);
    cmd.args(args);
    cmd.stdin(std::process::Stdio::null());
    cmd.stdout(std::process::Stdio::piped());
    cmd.stderr(std::process::Stdio::piped());
    cmd.kill_on_drop(true);
    let child = cmd.spawn().map_err(|e| format!("spawn failed: {e}"))?;
    let output = match tokio::time::timeout(HEALTH_PROBE_TIMEOUT, child.wait_with_output()).await {
        Ok(Ok(out)) => out,
        Ok(Err(e)) => return Err(format!("wait failed: {e}")),
        Err(_) => return Err("timed out".to_string()),
    };
    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
    let code = output.status.code().unwrap_or(-1);
    Ok((stdout, stderr, code))
}

/// Build a base health record. Per-adapter probes mutate it to
/// reflect their findings.
pub(crate) fn base_health(id: AdapterId) -> ExternalAgentHealth {
    ExternalAgentHealth {
        id: id.as_str().to_string(),
        status: HealthStatus::Unknown,
        details: Value::Object(Default::default()),
        reason: None,
        checked_at: now_secs(),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn health_status_roundtrips_serde() {
        let cases = [
            (HealthStatus::Ready, r#""ready""#),
            (HealthStatus::NotConfigured, r#""not_configured""#),
            (HealthStatus::Expired, r#""expired""#),
            (HealthStatus::NetworkError, r#""network_error""#),
            (HealthStatus::Unknown, r#""unknown""#),
        ];
        for (status, json) in cases {
            let serialized = serde_json::to_string(&status).unwrap();
            assert_eq!(serialized, json, "{status:?}");
            let parsed: HealthStatus = serde_json::from_str(json).unwrap();
            assert_eq!(parsed, status);
        }
    }

    #[tokio::test]
    async fn check_all_skips_unknown_adapter_ids() {
        let bogus = ExternalAgentSpec {
            id: "made-up".to_string(),
            display_name: "Made Up".to_string(),
            binary_path: "/usr/bin/false".into(),
            version: None,
            auth_kind: crate::AuthKind::Unknown,
            capabilities: crate::Capabilities::default(),
            detected_at: 0,
            health: None,
        };
        let healths = check_all(&[bogus], true).await;
        assert!(healths.is_empty(), "unknown adapter id must be skipped");
    }
}