car-ffi-common 0.32.1

Shared logic for FFI bindings (NAPI, PyO3) — JSON wrappers for verify, multi-agent, scheduler
//! JSON wrappers for Code World Models (Slice 1 — arXiv 2510.04542).
//!
//! Two stateless helpers, binding-only like the rest of the verify family
//! (`docs/proposals/code-world-models.md`):
//!
//! - [`score`] computes the paper's **transition accuracy** + structured
//!   failures over `(state_before, action, state_after)` records, given
//!   predictions the caller already produced by running the generated model
//!   (e.g. in a sandbox). Code execution is the caller's job, so this stays a
//!   pure function — mirroring how `car_builder` keeps inference injected.
//! - [`transitions_from_events`] rebuilds those transition records from a
//!   `car_eventlog` JSONL tail, the trajectories CAR already records. Lives
//!   here, not in `car-verify`, because only this crate depends on the event
//!   log — keeping `car-verify` dependency-light.

use car_ir::ActionProposal;
use car_verify::cwm::{
    score_predictions, simulate_with_model, GatedEffectModel, GatedPrediction, State, Transition,
};
use serde_json::Value;
use std::collections::HashMap;

/// Score precomputed predictions against recorded transitions.
///
/// `transitions_json` is a JSON array of `{ state_before, action, state_after }`
/// objects (e.g. from [`transitions_from_events`]). `predictions_json` is a
/// JSON array aligned by index, each element either the model's predicted
/// post-state object, or `{ "error": "<stack trace>" }` when running the model
/// threw. Returns the `ScoreReport` JSON `{ total, correct, errored, accuracy,
/// failures: [{ index, action, expected, predicted?, error? }] }` — the
/// paper's transition-accuracy metric. A length mismatch is an error, not a
/// silent truncation.
pub fn score(transitions_json: &str, predictions_json: &str) -> Result<String, String> {
    let transitions: Vec<Transition> = crate::from_json("transitions", transitions_json)?;

    // Each prediction is a state object, or {"error": "..."} for a thrown run.
    let raw: Vec<Value> = crate::from_json("predictions", predictions_json)?;
    let predictions: Vec<Result<State, String>> = raw
        .into_iter()
        .map(|v| match v {
            Value::Object(ref m) if m.contains_key("error") => Err(m
                .get("error")
                .and_then(|e| e.as_str())
                .unwrap_or("error")
                .to_string()),
            Value::Object(m) => {
                Ok(m.into_iter().collect::<HashMap<String, Value>>())
            }
            other => Err(format!("prediction is not an object: {other}")),
        })
        .collect();

    let report = score_predictions(&transitions, &predictions)?;
    crate::to_json(&report)
}

/// Rebuild `(state_before, action, state_after)` transitions from a
/// [`car_eventlog`] JSONL tail (one event per line). Folds the recorded
/// `StateChanged` deltas (`data.changes` = `{key: new_value}`, as the executor
/// writes them) over `initial_state_json` in log order: each delta closes one
/// transition and advances the running state.
///
/// `actions_json` (optional) maps an `action_id` → the action JSON to attach,
/// so a transition carries the proposal's `tool`/`parameters`; absent a match,
/// the transition's `action` is `{"id": <action_id>}`. Lines that don't parse
/// as events are skipped. Returns a JSON array of `Transition` objects, ready
/// to feed [`score`].
pub fn transitions_from_events(
    events_jsonl: &str,
    initial_state_json: Option<&str>,
    actions_json: Option<&str>,
) -> Result<String, String> {
    use car_eventlog::{Event, EventKind};

    let mut state: State =
        crate::from_json_opt("initial_state", initial_state_json)?.unwrap_or_default();
    let actions: Option<HashMap<String, Value>> = crate::from_json_opt("actions", actions_json)?;

    let mut out: Vec<Transition> = Vec::new();
    for line in events_jsonl.lines().filter(|l| !l.trim().is_empty()) {
        let ev: Event = match serde_json::from_str(line) {
            Ok(e) => e,
            Err(_) => continue,
        };
        if ev.kind != EventKind::StateChanged {
            continue;
        }
        let changes = match ev.data.get("changes").and_then(|c| c.as_object()) {
            Some(c) => c,
            None => continue,
        };
        let before = state.clone();
        for (k, v) in changes {
            state.insert(k.clone(), v.clone());
        }
        let action = ev
            .action_id
            .as_ref()
            .and_then(|id| actions.as_ref().and_then(|m| m.get(id)).cloned())
            .or_else(|| ev.action_id.as_ref().map(|id| serde_json::json!({ "id": id })))
            .unwrap_or(Value::Null);
        out.push(Transition {
            state_before: before,
            action,
            state_after: state.clone(),
        });
    }
    crate::to_json(&out)
}

/// Predictive simulation (Slice 2): simulate a proposal's final state using
/// per-action effect predictions from a verified Code World Model, gated by
/// accuracy. `proposal_json` is an `ActionProposal`; `initial_state_json`
/// (optional) seeds state; `predictions_json` maps `action_id` →
/// `{ "effects": {key: value, ...}, "accuracy": <0..1> }` (effects the caller
/// computed by running the generated model). A prediction is applied only when
/// `accuracy >= min_accuracy`; otherwise — and for any action without a
/// prediction — the simulator falls back to the action's declared
/// `expected_effects` (i.e. static `simulate`). So an under-accurate model can
/// never worsen the result. Returns the final state JSON.
pub fn simulate_with_predictions(
    proposal_json: &str,
    initial_state_json: Option<&str>,
    predictions_json: &str,
    min_accuracy: f64,
) -> Result<String, String> {
    let proposal: ActionProposal = crate::from_json("proposal", proposal_json)?;
    let initial_state: Option<State> = crate::from_json_opt("initial_state", initial_state_json)?;
    let predictions: HashMap<String, GatedPrediction> =
        crate::from_json("predictions", predictions_json)?;

    let model = GatedEffectModel {
        predictions,
        min_accuracy,
    };
    let state = simulate_with_model(&proposal, initial_state.as_ref(), &model);
    crate::to_json(&state)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn score_roundtrip_perfect() {
        let transitions = r#"[
            {"state_before":{"count":0},"action":{"inc":1},"state_after":{"count":1}},
            {"state_before":{"count":1},"action":{"inc":2},"state_after":{"count":3}}
        ]"#;
        let predictions = r#"[{"count":1},{"count":3}]"#;
        let out = score(transitions, predictions).unwrap();
        let v: Value = serde_json::from_str(&out).unwrap();
        assert_eq!(v["total"], 2);
        assert_eq!(v["correct"], 2);
        assert_eq!(v["accuracy"], 1.0);
    }

    #[test]
    fn score_reports_error_predictions() {
        let transitions =
            r#"[{"state_before":{},"action":{},"state_after":{"x":1}}]"#;
        let predictions = r#"[{"error":"NameError: boom"}]"#;
        let out = score(transitions, predictions).unwrap();
        let v: Value = serde_json::from_str(&out).unwrap();
        assert_eq!(v["errored"], 1);
        assert_eq!(v["failures"][0]["error"], "NameError: boom");
    }

    #[test]
    fn score_length_mismatch_errs() {
        let transitions =
            r#"[{"state_before":{},"action":{},"state_after":{"x":1}}]"#;
        assert!(score(transitions, "[]").is_err());
    }

    #[test]
    fn transitions_from_events_bridge() {
        let jsonl = [
            r#"{"kind":"state_changed","action_id":"a1","proposal_id":"p","data":{"changes":{"count":1}},"timestamp":"2026-06-27T00:00:00Z"}"#,
            r#"{"kind":"action_succeeded","action_id":"a1","proposal_id":"p","data":{},"timestamp":"2026-06-27T00:00:01Z"}"#,
            r#"{"kind":"state_changed","action_id":"a2","proposal_id":"p","data":{"changes":{"count":3}},"timestamp":"2026-06-27T00:00:02Z"}"#,
        ]
        .join("\n");
        let actions = r#"{"a1":{"tool":"inc","by":1}}"#;
        let out =
            transitions_from_events(&jsonl, Some(r#"{"count":0}"#), Some(actions)).unwrap();
        let v: Value = serde_json::from_str(&out).unwrap();
        assert_eq!(v.as_array().unwrap().len(), 2);
        assert_eq!(v[0]["state_before"]["count"], 0);
        assert_eq!(v[0]["state_after"]["count"], 1);
        assert_eq!(v[0]["action"]["tool"], "inc");
        assert_eq!(v[1]["action"]["id"], "a2"); // no map entry -> id fallback

        // The bridge output feeds score directly.
        let preds = r#"[{"count":1},{"count":3}]"#;
        let report = score(&out, preds).unwrap();
        let rv: Value = serde_json::from_str(&report).unwrap();
        assert_eq!(rv["accuracy"], 1.0);
    }

    #[test]
    fn simulate_with_predictions_gate() {
        let proposal = r#"{"actions":[
            {"type":"tool_call","id":"a1","tool":"t","expected_effects":{"x":1}}
        ]}"#;
        // Accurate prediction overrides the declared effect.
        let preds = r#"{"a1":{"effects":{"x":42},"accuracy":0.99}}"#;
        let out = simulate_with_predictions(proposal, None, preds, 0.9).unwrap();
        let v: Value = serde_json::from_str(&out).unwrap();
        assert_eq!(v["x"], 42);

        // Below-threshold prediction is ignored — falls back to declared x=1.
        let low = r#"{"a1":{"effects":{"x":42},"accuracy":0.5}}"#;
        let out2 = simulate_with_predictions(proposal, None, low, 0.9).unwrap();
        let v2: Value = serde_json::from_str(&out2).unwrap();
        assert_eq!(v2["x"], 1);
    }
}