car-server-core 0.25.0

//! Per-turn run-trace recorder (agent run tracing, U2).
//!
//! A "turn" is one submitted proposal (no inference chain-of-thought
//! capture — KTD1). Given the submitted [`car_ir::ActionProposal`] and the
//! resulting [`car_ir::ActionResult`]s, [`record_turns`] produces an
//! ordered stream of [`car_proto::RunRecord::Turn`] records, one per
//! action, tagged with the session's current `run_id` by the caller.
//!
//! ## Join correctness
//!
//! `ActionResult` carries `action_id` / `status` / `output` / `error` but
//! NOT the tool name or `parameters`, so each result is correlated back to
//! the submitted proposal's `actions[i]` by `action_id` — never by
//! position (the DAG executor may reorder concurrent actions). The
//! submitted action supplies `tool` + `parameters` (and the `prompt`); the
//! result supplies `output` + `status` + `error`.
//!
//! ## Tool-agnostic capture + thin Bulldozer classifier
//!
//! Every action is recorded generically (`tool` / `parameters` / `output`)
//! so non-Bulldozer agents still get a usable trail (KTD1). On top of that
//! a thin, optional classifier interprets two Bulldozer tools by their
//! return-shape contract — these field names are defined by the agent
//! (`~/car-agents/bulldozer/agent.mjs`), not the runtime:
//!
//! - `drive_cli` → [`car_proto::CliOutcome`] from the result's
//!   `timed_out` / `exit_code` / `signal`.
//! - `check_outcome` → [`car_proto::VerifierVerdict`] from `passed`.
//! - any `ActionStatus::Rejected` → [`car_proto::PolicyRejection`] parsed
//!   from `ActionResult.error`, with `cli_outcome = not-run` (the tool
//!   body never executed — R11).
//!
//! U3 persists this stream to disk; U4 broadcasts it. Both consume the
//! same ordered `RunRecord::Turn` output, so this module stays free of any
//! storage or transport concern.

use car_ir::{ActionProposal, ActionResult, ActionStatus, ActionType};
use car_proto::{CliOutcome, PolicyRejection, RunRecord, RunTurn, VerifierVerdict};
use serde_json::Value;
use std::collections::HashMap;

/// Tool name Bulldozer uses to drive `claude` / `codex` headless.
const DRIVE_CLI_TOOL: &str = "drive_cli";
/// Tool name Bulldozer uses to run its verifier command.
const CHECK_OUTCOME_TOOL: &str = "check_outcome";

/// Record the turns for one submitted proposal, starting numbering at
/// `start_index` (the count of turns already recorded for this run, so the
/// `index` field is monotonic across proposals).
///
/// Returns one [`RunRecord::Turn`] per action in `proposal.actions`, in
/// submission order, joined to its [`ActionResult`] by `action_id`. An
/// action with no matching result (e.g. skipped before execution) still
/// produces a turn with no output — the trail records what was *proposed*,
/// not only what produced a result.
pub fn record_turns(
    proposal: &ActionProposal,
    results: &[ActionResult],
    start_index: usize,
) -> Vec<RunRecord> {
    // Index results by action_id for an O(1) join — the DAG executor may
    // return them in a different order than the actions were submitted, so
    // positional pairing is wrong (the load-bearing join-correctness note).
    let by_id: HashMap<&str, &ActionResult> =
        results.iter().map(|r| (r.action_id.as_str(), r)).collect();

    proposal
        .actions
        .iter()
        .enumerate()
        .map(|(offset, action)| {
            let result = by_id.get(action.id.as_str()).copied();
            RunRecord::Turn(turn_for(action, result, start_index + offset))
        })
        .collect()
}

/// Build the [`RunTurn`] for a single action + (optional) result.
fn turn_for(action: &car_ir::Action, result: Option<&ActionResult>, index: usize) -> RunTurn {
    let tool = action.tool.clone();
    // Generic, tool-agnostic capture: prompt (when present), the full
    // parameters, and the tool output.
    let prompt = action
        .parameters
        .get("prompt")
        .and_then(Value::as_str)
        .map(str::to_string);
    let parameters = serde_json::to_value(&action.parameters).unwrap_or(Value::Null);
    let output = result.and_then(|r| r.output.clone());

    // Rejection short-circuits the classifier: the tool body never ran, so
    // there is no CLI outcome and no verifier verdict (R11). The rejection
    // reason is the verbatim `ActionResult.error`.
    if let Some(res) = result {
        if res.status == ActionStatus::Rejected {
            return RunTurn {
                index,
                prompt,
                tool,
                parameters,
                output,
                cli_outcome: None,
                verifier_verdict: VerifierVerdict::NotRun,
                policy_rejected: Some(parse_rejection(res.error.as_deref())),
            };
        }
    }

    // Thin Bulldozer classifier, keyed on the agent's tool return shape.
    // Only ToolCall actions carry a tool name to classify on.
    let is_tool_call = action.action_type == ActionType::ToolCall;
    let (cli_outcome, verifier_verdict) = match (is_tool_call, tool.as_deref(), output.as_ref()) {
        (true, Some(DRIVE_CLI_TOOL), out) => (classify_cli_outcome(out), VerifierVerdict::NotRun),
        (true, Some(CHECK_OUTCOME_TOOL), out) => (None, classify_verifier(out)),
        // Any other tool: generic turn, no classification.
        _ => (None, VerifierVerdict::NotRun),
    };

    RunTurn {
        index,
        prompt,
        tool,
        parameters,
        output,
        cli_outcome,
        verifier_verdict,
        policy_rejected: None,
    }
}

/// Classify a `drive_cli` result's CLI outcome from the Bulldozer return
/// shape (`timed_out` / `exit_code` / `signal`). Precedence: a reported
/// timeout wins, then a numeric exit code, then a signal kill. `None` when
/// the output is missing or carries none of these (a malformed/absent
/// result — recorded as a generic driving turn with no classified
/// outcome).
fn classify_cli_outcome(output: Option<&Value>) -> Option<CliOutcome> {
    let out = output?;
    // Timeout takes precedence — the tool sets `timed_out = true` and may
    // still carry a null exit_code (R11 partial turn).
    if out.get("timed_out").and_then(Value::as_bool) == Some(true) {
        return Some(CliOutcome::Timeout);
    }
    // A numeric exit_code means the process ran to completion.
    if let Some(code) = out.get("exit_code").and_then(Value::as_i64) {
        return Some(CliOutcome::Exited { code });
    }
    // No numeric exit code but a signal present → killed.
    if out
        .get("signal")
        .map(|v| !v.is_null())
        .unwrap_or(false)
    {
        return Some(CliOutcome::Killed);
    }
    None
}

/// Classify a `check_outcome` result's verifier verdict from the `passed`
/// boolean. Missing/absent output → `NotRun` (the verifier produced no
/// usable verdict for this turn).
fn classify_verifier(output: Option<&Value>) -> VerifierVerdict {
    match output.and_then(|o| o.get("passed")).and_then(Value::as_bool) {
        Some(true) => VerifierVerdict::Pass,
        Some(false) => VerifierVerdict::Fail,
        None => VerifierVerdict::NotRun,
    }
}

/// Build a [`PolicyRejection`] from a `Rejected` action's error string.
/// The `rule` is the verbatim error; `param` is best-effort extracted from
/// the `param '<name>'` token a `deny_tool_param` rejection carries
/// (`policy '<name>': param '<param>' matches '<pattern>'`).
fn parse_rejection(error: Option<&str>) -> PolicyRejection {
    let rule = error.unwrap_or("").to_string();
    PolicyRejection {
        param: extract_param(&rule),
        rule,
    }
}

/// Pull the blocked parameter name out of a `... param '<name>' ...`
/// rejection reason. Returns `None` when no such token is present (a
/// `deny_tool` or capability rejection has no param).
fn extract_param(reason: &str) -> Option<String> {
    let marker = "param '";
    let start = reason.find(marker)? + marker.len();
    let rest = &reason[start..];
    let end = rest.find('\'')?;
    let name = &rest[..end];
    if name.is_empty() {
        None
    } else {
        Some(name.to_string())
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use car_ir::Action;
    use serde_json::json;
    use std::collections::HashMap;

    fn drive_action(id: &str, prompt: &str) -> Action {
        Action {
            id: id.to_string(),
            action_type: ActionType::ToolCall,
            tool: Some(DRIVE_CLI_TOOL.to_string()),
            parameters: [
                ("cli".to_string(), json!("claude")),
                ("prompt".to_string(), json!(prompt)),
            ]
            .into_iter()
            .collect(),
            preconditions: vec![],
            expected_effects: HashMap::new(),
            state_dependencies: vec![],
            read_set: vec![],
            write_set: vec![],
            assumptions: vec![],
            idempotent: false,
            max_retries: 3,
            failure_behavior: car_ir::FailureBehavior::Abort,
            timeout_ms: None,
            metadata: HashMap::new(),
        }
    }

    fn check_action(id: &str, command: &str) -> Action {
        Action {
            id: id.to_string(),
            action_type: ActionType::ToolCall,
            tool: Some(CHECK_OUTCOME_TOOL.to_string()),
            parameters: [("command".to_string(), json!(command))]
                .into_iter()
                .collect(),
            preconditions: vec![],
            expected_effects: HashMap::new(),
            state_dependencies: vec![],
            read_set: vec![],
            write_set: vec![],
            assumptions: vec![],
            idempotent: false,
            max_retries: 3,
            failure_behavior: car_ir::FailureBehavior::Abort,
            timeout_ms: None,
            metadata: HashMap::new(),
        }
    }

    fn proposal(actions: Vec<Action>) -> ActionProposal {
        ActionProposal {
            id: "p1".to_string(),
            source: "test".to_string(),
            actions,
            timestamp: chrono::Utc::now(),
            context: HashMap::new(),
        }
    }

    fn result(id: &str, status: ActionStatus, output: Option<Value>, error: Option<&str>) -> ActionResult {
        ActionResult {
            action_id: id.to_string(),
            status,
            output,
            error: error.map(str::to_string),
            state_changes: HashMap::new(),
            duration_ms: None,
            timestamp: chrono::Utc::now(),
        }
    }

    fn turn(rec: &RunRecord) -> &RunTurn {
        match rec {
            RunRecord::Turn(t) => t,
            other => panic!("expected Turn, got {other:?}"),
        }
    }

    #[test]
    fn drive_cli_turn_records_prompt_output_and_exit_code() {
        let prop = proposal(vec![drive_action("a1", "make the test pass")]);
        let out = json!({ "cli": "claude", "exit_code": 0, "output_tail": "all green" });
        let results = vec![result("a1", ActionStatus::Succeeded, Some(out.clone()), None)];

        let recs = record_turns(&prop, &results, 0);
        assert_eq!(recs.len(), 1);
        let t = turn(&recs[0]);
        assert_eq!(t.index, 0);
        assert_eq!(t.prompt.as_deref(), Some("make the test pass"));
        assert_eq!(t.tool.as_deref(), Some(DRIVE_CLI_TOOL));
        assert_eq!(t.cli_outcome, Some(CliOutcome::Exited { code: 0 }));
        assert_eq!(t.verifier_verdict, VerifierVerdict::NotRun);
        assert!(t.policy_rejected.is_none());
        // output_tail is preserved verbatim in the captured output.
        assert_eq!(
            t.output.as_ref().unwrap().get("output_tail").unwrap(),
            &json!("all green")
        );
    }

    #[test]
    fn check_outcome_pass_and_fail() {
        let prop = proposal(vec![check_action("v1", "test -f built")]);
        let pass = vec![result(
            "v1",
            ActionStatus::Succeeded,
            Some(json!({ "passed": true, "output_tail": "ok" })),
            None,
        )];
        let pass_recs = record_turns(&prop, &pass, 0);
        assert_eq!(turn(&pass_recs[0]).verifier_verdict, VerifierVerdict::Pass);

        let fail = vec![result(
            "v1",
            ActionStatus::Succeeded,
            Some(json!({ "passed": false, "output_tail": "still failing" })),
            None,
        )];
        let fail_recs = record_turns(&prop, &fail, 0);
        let t = turn(&fail_recs[0]);
        assert_eq!(t.verifier_verdict, VerifierVerdict::Fail);
        // Fail is the healthy re-prod case — no cli_outcome on a verifier turn.
        assert!(t.cli_outcome.is_none());
    }

    #[test]
    fn policy_rejected_drive_records_rejection_and_not_run() {
        let prop = proposal(vec![drive_action("a1", "rm -rf /")]);
        // Rejected action: no output (the tool body never ran), error
        // carries the policy reason.
        let results = vec![result(
            "a1",
            ActionStatus::Rejected,
            None,
            Some("policy 'no-destructive': param 'prompt' matches 'rm -rf'"),
        )];
        let recs = record_turns(&prop, &results, 0);
        let t = turn(&recs[0]);
        let pr = t.policy_rejected.as_ref().expect("rejection recorded");
        assert!(pr.rule.contains("no-destructive"));
        assert_eq!(pr.param.as_deref(), Some("prompt"));
        // The tool body never ran: no CLI outcome, no verdict, no output.
        assert_eq!(t.cli_outcome, None);
        assert_eq!(t.verifier_verdict, VerifierVerdict::NotRun);
        assert!(t.output.is_none());
    }

    #[test]
    fn timed_out_drive_records_timeout_and_not_run() {
        let prop = proposal(vec![drive_action("a1", "long task")]);
        let out = json!({ "timed_out": true, "exit_code": null, "output_tail": "..." });
        let results = vec![result("a1", ActionStatus::Succeeded, Some(out), None)];
        let recs = record_turns(&prop, &results, 0);
        let t = turn(&recs[0]);
        assert_eq!(t.cli_outcome, Some(CliOutcome::Timeout));
        assert_eq!(t.verifier_verdict, VerifierVerdict::NotRun);
    }

    #[test]
    fn killed_drive_records_killed() {
        let prop = proposal(vec![drive_action("a1", "task")]);
        let out = json!({ "timed_out": false, "exit_code": null, "signal": "SIGKILL" });
        let results = vec![result("a1", ActionStatus::Succeeded, Some(out), None)];
        let recs = record_turns(&prop, &results, 0);
        assert_eq!(turn(&recs[0]).cli_outcome, Some(CliOutcome::Killed));
    }

    #[test]
    fn generic_tool_records_tool_params_output() {
        // A non-Bulldozer tool still produces a generic turn.
        let mut action = drive_action("a1", "ignored");
        action.tool = Some("search".to_string());
        action.parameters = [("query".to_string(), json!("rust"))].into_iter().collect();
        let prop = proposal(vec![action]);
        let results = vec![result(
            "a1",
            ActionStatus::Succeeded,
            Some(json!("results")),
            None,
        )];
        let recs = record_turns(&prop, &results, 0);
        let t = turn(&recs[0]);
        assert_eq!(t.tool.as_deref(), Some("search"));
        assert_eq!(t.parameters.get("query").unwrap(), &json!("rust"));
        assert_eq!(t.output, Some(json!("results")));
        // No Bulldozer classification for an unknown tool.
        assert_eq!(t.cli_outcome, None);
        assert_eq!(t.verifier_verdict, VerifierVerdict::NotRun);
        assert!(t.policy_rejected.is_none());
    }

    #[test]
    fn join_is_by_action_id_not_position() {
        // Results returned out of order (DAG concurrency) must still pair
        // to the right action by id.
        let prop = proposal(vec![
            drive_action("a1", "first"),
            check_action("a2", "verify"),
        ]);
        let results = vec![
            result("a2", ActionStatus::Succeeded, Some(json!({ "passed": true })), None),
            result(
                "a1",
                ActionStatus::Succeeded,
                Some(json!({ "exit_code": 0 })),
                None,
            ),
        ];
        let recs = record_turns(&prop, &results, 0);
        // Turn order follows submission order, not result order.
        let t0 = turn(&recs[0]);
        assert_eq!(t0.tool.as_deref(), Some(DRIVE_CLI_TOOL));
        assert_eq!(t0.cli_outcome, Some(CliOutcome::Exited { code: 0 }));
        let t1 = turn(&recs[1]);
        assert_eq!(t1.tool.as_deref(), Some(CHECK_OUTCOME_TOOL));
        assert_eq!(t1.verifier_verdict, VerifierVerdict::Pass);
    }

    #[test]
    fn index_is_monotonic_across_proposals() {
        let prop = proposal(vec![drive_action("a1", "x")]);
        let results = vec![result("a1", ActionStatus::Succeeded, Some(json!({ "exit_code": 0 })), None)];
        // Second proposal in the same run starts numbering at 1.
        let recs = record_turns(&prop, &results, 1);
        assert_eq!(turn(&recs[0]).index, 1);
    }

    #[test]
    fn extract_param_handles_missing_token() {
        assert_eq!(extract_param("policy 'x': tool 'deploy' denied"), None);
        assert_eq!(
            extract_param("policy 'x': param 'prompt' matches 'rm'"),
            Some("prompt".to_string())
        );
        assert_eq!(extract_param("param ''"), None);
    }
}