car-server-core 0.24.1

//! In-daemon execution of declarative agents, and the coder→agent build loop.
//!
//! Two pieces:
//! - [`DeclarativeAgentRunner`] runs a [`DeclarativeAgentSpec`] on an input —
//!   a model→tool loop executed entirely inside the daemon, with the tool set
//!   restricted to the spec's allowlist and policy-gated by the executor's
//!   [`InspectorChain`]. No external process.
//! - [`build_agent`] is the coder→agent loop: it asks the model for an agent
//!   spec that satisfies the user's intent, runs the spec's scenarios through
//!   the runner, and repairs until every scenario passes (or it gives up) —
//!   the same generate→verify→repair shape as contract derivation, so an
//!   Agent project never touches the file-editing native loop.


use car_engine::ToolExecutor;
use car_inference::tasks::generate::Message;
use car_inference::{GenerateParams, GenerateRequest};
use serde_json::Value;

pub use car_registry::declarative::{DeclarativeAgentSpec, Scenario};

use super::native_loop::TurnGenerator;
use super::shell_tool::WorktreeExecutor;

/// Result of one declarative-agent run.
#[derive(Debug, Clone)]
pub struct AgentRunResult {
    pub output: String,
    pub turns: u32,
    pub tool_calls: u32,
    pub error: Option<String>,
}

/// Filter the executor's available tool schemas to the spec's allowlist.
/// **Strict**: an empty intersection yields ZERO tools (NOT all) — a typo'd or
/// empty allowlist must never silently grant the full toolset. Denied tools
/// are removed even if allowlisted.
pub fn select_tool_defs_strict(all: &[Value], allow: &[String], deny: &[String]) -> Vec<Value> {
    all.iter()
        .filter(|d| {
            let name = d.get("name").and_then(Value::as_str).unwrap_or("");
            allow.iter().any(|a| a == name) && !deny.iter().any(|x| x == name)
        })
        .cloned()
        .collect()
}

/// Runs a declarative agent in-daemon.
pub struct DeclarativeAgentRunner<'a> {
    spec: &'a DeclarativeAgentSpec,
    generator: &'a dyn TurnGenerator,
    executor: &'a WorktreeExecutor,
    max_turns: u32,
    max_tokens_per_turn: usize,
}

impl<'a> DeclarativeAgentRunner<'a> {
    pub fn new(
        spec: &'a DeclarativeAgentSpec,
        generator: &'a dyn TurnGenerator,
        executor: &'a WorktreeExecutor,
    ) -> Self {
        Self {
            spec,
            generator,
            executor,
            max_turns: 12,
            max_tokens_per_turn: 2048,
        }
    }

    fn system_prompt(&self) -> String {
        let mut p = self.spec.identity.trim().to_string();
        if !self.spec.standing_goal.trim().is_empty() {
            p.push_str("\n\nStanding goal: ");
            p.push_str(self.spec.standing_goal.trim());
        }
        p
    }

    /// Run the agent on `input`, returning its final text answer.
    pub async fn run(&self, input: &str) -> AgentRunResult {
        let tools = select_tool_defs_strict(
            &self.executor.all_tool_defs(),
            &self.spec.tools,
            &self.spec.denied_tools,
        );
        let tools = if tools.is_empty() { None } else { Some(tools) };

        let mut messages = vec![
            Message::System {
                content: self.system_prompt(),
            },
            Message::User {
                content: input.to_string(),
            },
        ];

        let mut tool_calls_total = 0u32;
        for turn in 1..=self.max_turns {
            let req = GenerateRequest {
                prompt: input.to_string(),
                params: GenerateParams {
                    temperature: 0.0,
                    max_tokens: self.max_tokens_per_turn,
                    // Deterministic tool use, not open reasoning: force thinking
                    // OFF. Hybrid-thinking models (Qwen3) otherwise burn the
                    // whole budget inside an unclosed `<think>` and return empty
                    // text — the same failure the coder's contract derivation
                    // hit. Route on the Code hint so a capable model wins.
                    thinking: car_inference::tasks::generate::ThinkingMode::Off,
                    ..Default::default()
                },
                tools: tools.clone(),
                messages: Some(messages.clone()),
                intent: Some(car_inference::IntentHint {
                    task: Some(car_inference::TaskHint::Code),
                    // A declarative agent's correctness matters more than its
                    // latency (it's verified against scenarios at build time and
                    // invoked deliberately) — run it on the most capable model.
                    prefer_quality: true,
                    ..Default::default()
                }),
                ..Default::default()
            };
            let result = match self.generator.generate(req).await {
                Ok(r) => r,
                Err(e) => {
                    return AgentRunResult {
                        output: String::new(),
                        turns: turn,
                        tool_calls: tool_calls_total,
                        error: Some(format!("inference failed: {e}")),
                    }
                }
            };

            if result.tool_calls.is_empty() {
                return AgentRunResult {
                    output: result.text,
                    turns: turn,
                    tool_calls: tool_calls_total,
                    error: None,
                };
            }

            let mut calls = result.tool_calls.clone();
            for (i, call) in calls.iter_mut().enumerate() {
                if call.id.is_none() {
                    call.id = Some(format!("call_{turn}_{i}"));
                }
            }
            messages.push(Message::Assistant {
                content: result.text.clone(),
                tool_calls: calls.clone(),
            });
            for call in &calls {
                let params = Value::Object(call.arguments.clone().into_iter().collect());
                // The allowlist already removed disallowed tools from the model's
                // view; this is the hard backstop if a name leaks in anyway.
                let (_, content) = if tools_contains(&self.spec.tools, &call.name)
                    && !self.spec.denied_tools.iter().any(|d| d == &call.name)
                {
                    match self.executor.execute(&call.name, &params).await {
                        Ok(v) => (true, v.to_string()),
                        Err(e) => (false, format!("ERROR: {e}")),
                    }
                } else {
                    (
                        false,
                        format!("ERROR: tool '{}' is not allowed for this agent", call.name),
                    )
                };
                tool_calls_total += 1;
                messages.push(Message::ToolResult {
                    tool_use_id: call.id.clone().expect("assigned above"),
                    content,
                });
            }
        }

        AgentRunResult {
            output: String::new(),
            turns: self.max_turns,
            tool_calls: tool_calls_total,
            error: Some("max_turns_exceeded".into()),
        }
    }
}

fn tools_contains(allow: &[String], name: &str) -> bool {
    allow.iter().any(|a| a == name)
}

/// Evaluate every scenario against the spec. Returns per-scenario pass/fail and
/// the failures rendered for a repair prompt.
pub struct ScenarioResults {
    pub passed: usize,
    pub total: usize,
    pub failures: Vec<String>,
}

impl ScenarioResults {
    pub fn all_passed(&self) -> bool {
        self.passed == self.total
    }
}

pub async fn run_scenarios(
    spec: &DeclarativeAgentSpec,
    generator: &dyn TurnGenerator,
    executor: &WorktreeExecutor,
) -> ScenarioResults {
    let mut passed = 0;
    let mut failures = Vec::new();
    let total = spec.scenarios.len();
    for (i, scenario) in spec.scenarios.iter().enumerate() {
        let runner = DeclarativeAgentRunner::new(spec, generator, executor);
        let result = runner.run(&scenario.input).await;
        // Case-insensitive substring: the `expect` is a property the output
        // must contain, and small models vary capitalization freely. Exact
        // case would reject "Hello" against an expect of "hello".
        let ok = result.error.is_none()
            && result
                .output
                .to_lowercase()
                .contains(&scenario.expect.to_lowercase());
        if ok {
            passed += 1;
        } else {
            failures.push(format!(
                "scenario #{} (input {:?}) expected output containing {:?} but got {:?}{}",
                i + 1,
                scenario.input,
                scenario.expect,
                truncate(&result.output, 200),
                result
                    .error
                    .as_ref()
                    .map(|e| format!(" [error: {e}]"))
                    .unwrap_or_default()
            ));
        }
    }
    ScenarioResults { passed, total, failures }
}

fn truncate(s: &str, max: usize) -> String {
    if s.len() <= max {
        return s.to_string();
    }
    let mut end = max;
    while !s.is_char_boundary(end) {
        end -= 1;
    }
    format!("{}…", &s[..end])
}

// ---------------------------------------------------------------------------
// The coder→agent build loop
// ---------------------------------------------------------------------------

/// Tunables for [`build_agent`].
pub struct BuildAgentConfig {
    pub agent_id: String,
    pub available_tools: Vec<String>,
    pub max_attempts: u32,
}

/// Outcome of the build loop.
pub struct BuildAgentOutcome {
    /// The best spec produced (valid + scenarios pass on success; the last
    /// parseable attempt otherwise).
    pub spec: Option<DeclarativeAgentSpec>,
    pub passed: bool,
    /// Per-attempt issue summary (empty on first-try success).
    pub issues: Vec<String>,
    pub attempts: u32,
}

fn build_prompt(intent: &str, available_tools: &[String], feedback: &[String]) -> String {
    let mut p = format!(
        "You are designing an in-daemon CAR agent from a user's request. Output ONLY a JSON \
         object (no prose, no fences) describing the agent:\n\
         {{\n  \"name\": \"short human name\",\n  \"identity\": \"system prompt — who the agent \
         is and how it behaves\",\n  \"tools\": [\"only names from the AVAILABLE TOOLS list\"],\n  \
         \"standing_goal\": \"the agent's persistent objective\",\n  \"scenarios\": [{{\"input\": \
         \"an example request\", \"expect\": \"a stable substring the correct output must \
         contain\"}}]\n}}\n\n\
         User request:\n{intent}\n\n\
         AVAILABLE TOOLS (use only these names; pick the minimal set, or [] for a pure-reasoning \
         agent):\n{}\n\n\
         Rules:\n\
         - 1 to 3 scenarios. CRITICAL: each `expect` must be the SHORTEST string that proves the \
           answer is correct — usually a single word, number, or short phrase taken from the \
           USER'S REQUEST itself. NEVER a full sentence you imagine the agent saying, and never \
           a value you haven't computed.\n\
           Example — request \"a greeter that always says hello\": a good scenario is \
           {{\"input\": \"hi\", \"expect\": \"hello\"}} (matched case-insensitively). A BAD scenario \
           invents a whole reply like \"Hello! How can I help you today?\".\n\
           Example — request \"converts Celsius to Fahrenheit\": for input \"100\" the `expect` is \
           \"212\" (you must actually compute 100*9/5+32), NOT \"273.15\" (that is Kelvin) and NOT a \
           sentence.\n\
         - `expect` is matched as a case-insensitive substring of the agent's output.\n\
         - Prefer no tools unless the task truly needs to read/write files or run commands.\n\
         - Write `identity` so the agent answers DIRECTLY and deterministically (it should perform \
           the task, not chat about it) — terse enough to reliably contain each `expect`.\n",
        if available_tools.is_empty() {
            "(none)".to_string()
        } else {
            available_tools.join(", ")
        }
    );
    if !feedback.is_empty() {
        p.push_str("\nYour previous attempt did not pass its own scenarios — revise so they do:\n");
        for f in feedback {
            p.push_str(&format!("- {f}\n"));
        }
    }
    p
}

pub(crate) fn extract_json_object(text: &str) -> Result<Value, String> {
    let start = text.find('{').ok_or("no JSON object in output")?;
    let end = text.rfind('}').ok_or("no closing brace in output")?;
    if end < start {
        return Err("malformed JSON object".into());
    }
    serde_json::from_str(&text[start..=end]).map_err(|e| format!("invalid JSON: {e}"))
}

/// Generate an agent spec from `intent`, run its scenarios, and repair until
/// they pass. Tool names the model invents that aren't in `available_tools`
/// are dropped (the allowlist can only contain real tools).
pub async fn build_agent(
    intent: &str,
    generator: &dyn TurnGenerator,
    executor: &WorktreeExecutor,
    cfg: &BuildAgentConfig,
) -> BuildAgentOutcome {
    let max = cfg.max_attempts.max(1);
    let mut feedback: Vec<String> = Vec::new();
    let mut last_spec: Option<DeclarativeAgentSpec> = None;
    let mut last_issues: Vec<String> = Vec::new();

    for attempt in 1..=max {
        let prompt = build_prompt(intent, &cfg.available_tools, &feedback);
        let text = match generator
            .generate(GenerateRequest {
                prompt: prompt.clone(),
                params: GenerateParams {
                    temperature: 0.0,
                    // Structured JSON extraction — force thinking OFF and give
                    // room for the object (hybrid models otherwise return empty
                    // text after an unclosed `<think>`; surfaced live on
                    // Qwen3-1.7B during the agent-build shakedown).
                    max_tokens: 2048,
                    thinking: car_inference::tasks::generate::ThinkingMode::Off,
                    ..Default::default()
                },
                messages: Some(vec![Message::User { content: prompt }]),
                intent: Some(car_inference::IntentHint {
                    task: Some(car_inference::TaskHint::Code),
                    require: vec![car_inference::ModelCapability::Code],
                    // Building an agent is infrequent and quality-critical — a
                    // weak code model writes broken specs/scenarios (the live
                    // shakedown saw Qwen3-1.7B win on cost and fail). Prefer the
                    // most capable code model, not the cheapest.
                    prefer_quality: true,
                    ..Default::default()
                }),
                ..Default::default()
            })
            .await
        {
            Ok(r) => r.text,
            Err(e) => {
                last_issues = vec![format!("generation failed: {e}")];
                continue;
            }
        };

        let value = match extract_json_object(&text) {
            Ok(v) => v,
            Err(e) => {
                feedback = vec![format!("output did not parse: {e}. Return ONLY the JSON object.")];
                last_issues = feedback.clone();
                continue;
            }
        };

        // Build the spec; force the id, clamp tools to the real available set.
        let mut spec = match parse_spec(&value, &cfg.agent_id, &cfg.available_tools) {
            Ok(s) => s,
            Err(e) => {
                feedback = vec![e.clone()];
                last_issues = vec![e];
                continue;
            }
        };
        spec.enabled = true;

        let problems = spec.validate();
        if !problems.is_empty() {
            feedback = problems.clone();
            last_issues = problems;
            last_spec = Some(spec);
            continue;
        }
        if spec.scenarios.is_empty() {
            feedback = vec!["include at least one scenario".into()];
            last_issues = feedback.clone();
            last_spec = Some(spec);
            continue;
        }

        let results = run_scenarios(&spec, generator, executor).await;
        if results.all_passed() {
            return BuildAgentOutcome {
                spec: Some(spec),
                passed: true,
                issues: Vec::new(),
                attempts: attempt,
            };
        }
        feedback = results.failures.clone();
        last_issues = results.failures;
        last_spec = Some(spec);
    }

    BuildAgentOutcome {
        spec: last_spec,
        passed: false,
        issues: last_issues,
        attempts: max,
    }
}

/// Parse a spec from model JSON, forcing the id and clamping the tool allowlist
/// to names that actually exist (the model can't invent tools).
fn parse_spec(
    value: &Value,
    agent_id: &str,
    available_tools: &[String],
) -> Result<DeclarativeAgentSpec, String> {
    let name = value.get("name").and_then(Value::as_str).unwrap_or("").trim().to_string();
    let identity = value
        .get("identity")
        .and_then(Value::as_str)
        .unwrap_or("")
        .trim()
        .to_string();
    let standing_goal = value
        .get("standing_goal")
        .and_then(Value::as_str)
        .unwrap_or("")
        .to_string();
    let tools: Vec<String> = value
        .get("tools")
        .and_then(Value::as_array)
        .map(|a| {
            a.iter()
                .filter_map(|t| t.as_str())
                .map(String::from)
                .filter(|t| available_tools.iter().any(|a| a == t))
                .collect()
        })
        .unwrap_or_default();
    let scenarios: Vec<Scenario> = value
        .get("scenarios")
        .and_then(Value::as_array)
        .map(|a| {
            a.iter()
                .filter_map(|s| {
                    Some(Scenario {
                        input: s.get("input")?.as_str()?.to_string(),
                        expect: s.get("expect")?.as_str()?.to_string(),
                    })
                })
                .collect()
        })
        .unwrap_or_default();

    Ok(DeclarativeAgentSpec {
        id: agent_id.to_string(),
        name: if name.is_empty() { agent_id.to_string() } else { name },
        identity,
        tools,
        denied_tools: Vec::new(),
        standing_goal,
        scenarios,
        enabled: true,
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::coder::native_loop::TurnGenerator as _;
    use async_trait::async_trait;
    use car_inference::{GenerateRequest, InferenceResult};
    use serde_json::json;
    use std::sync::atomic::{AtomicUsize, Ordering};

    struct Script {
        turns: Vec<InferenceResult>,
        cursor: AtomicUsize,
    }
    fn turn(text: &str, tool_calls: Value) -> InferenceResult {
        serde_json::from_value(json!({
            "text": text, "tool_calls": tool_calls,
            "trace_id": "t", "model_used": "scripted", "latency_ms": 0,
        }))
        .unwrap()
    }
    #[async_trait]
    impl TurnGenerator for Script {
        async fn generate(&self, _req: GenerateRequest) -> Result<InferenceResult, String> {
            let i = self.cursor.fetch_add(1, Ordering::SeqCst);
            self.turns.get(i).cloned().ok_or_else(|| "script exhausted".into())
        }
    }

    fn spec_with(tools: Vec<&str>) -> DeclarativeAgentSpec {
        DeclarativeAgentSpec {
            id: "t".into(),
            name: "T".into(),
            identity: "You answer.".into(),
            tools: tools.into_iter().map(String::from).collect(),
            denied_tools: vec![],
            standing_goal: "help".into(),
            scenarios: vec![],
            enabled: true,
        }
    }

    #[test]
    fn strict_allowlist_empty_intersection_is_zero_tools() {
        let all = WorktreeExecutor::tool_defs();
        assert!(!all.is_empty());
        // Allowlist that matches nothing → ZERO, never all.
        assert!(select_tool_defs_strict(&all, &["nonexistent".into()], &[]).is_empty());
        // Empty allowlist → zero.
        assert!(select_tool_defs_strict(&all, &[], &[]).is_empty());
        // A real name → exactly that one.
        let sel = select_tool_defs_strict(&all, &["read_file".into()], &[]);
        assert_eq!(sel.len(), 1);
        assert_eq!(sel[0]["name"], "read_file");
        // Denied even if allowed.
        assert!(select_tool_defs_strict(&all, &["read_file".into()], &["read_file".into()]).is_empty());
    }

    #[tokio::test]
    async fn runner_returns_text_answer_with_no_tools() {
        let dir = tempfile::tempdir().unwrap();
        let exec = WorktreeExecutor::new(dir.path());
        let script = Script {
            turns: vec![turn("the answer is 42", json!([]))],
            cursor: AtomicUsize::new(0),
        };
        let spec = spec_with(vec![]);
        let runner = DeclarativeAgentRunner::new(&spec, &script, &exec);
        let r = runner.run("what is the answer?").await;
        assert_eq!(r.output, "the answer is 42");
        assert_eq!(r.tool_calls, 0);
        assert!(r.error.is_none());
    }

    #[tokio::test]
    async fn runner_executes_an_allowed_tool() {
        let dir = tempfile::tempdir().unwrap();
        std::fs::write(dir.path().join("data.txt"), "secret content").unwrap();
        let exec = WorktreeExecutor::new(dir.path());
        let script = Script {
            turns: vec![
                turn("", json!([{"id":"c1","name":"read_file","arguments":{"path":"data.txt"}}])),
                turn("the file says secret content", json!([])),
            ],
            cursor: AtomicUsize::new(0),
        };
        let spec = spec_with(vec!["read_file"]);
        let runner = DeclarativeAgentRunner::new(&spec, &script, &exec);
        let r = runner.run("read data.txt").await;
        assert!(r.output.contains("secret content"));
        assert_eq!(r.tool_calls, 1);
    }

    #[tokio::test]
    async fn runner_blocks_a_disallowed_tool_even_if_the_model_calls_it() {
        let dir = tempfile::tempdir().unwrap();
        let exec = WorktreeExecutor::new(dir.path());
        // Agent allows only read_file, but the model tries write_file.
        let script = Script {
            turns: vec![
                turn("", json!([{"id":"c1","name":"write_file","arguments":{"path":"x","content":"y"}}])),
                turn("done", json!([])),
            ],
            cursor: AtomicUsize::new(0),
        };
        let spec = spec_with(vec!["read_file"]);
        let runner = DeclarativeAgentRunner::new(&spec, &script, &exec);
        let _ = runner.run("write a file").await;
        // The disallowed write must not have happened.
        assert!(!dir.path().join("x").exists(), "disallowed tool executed");
    }

    #[tokio::test]
    async fn build_agent_generates_then_passes_scenarios() {
        let dir = tempfile::tempdir().unwrap();
        let exec = WorktreeExecutor::new(dir.path());
        // Turn 1: the agent spec (a greeter with one scenario).
        // Turn 2: the scenario run — agent answers containing "hello".
        let script = Script {
            turns: vec![
                turn(
                    r#"{"name":"Greeter","identity":"You greet people warmly.","tools":[],
                        "standing_goal":"greet","scenarios":[{"input":"hi","expect":"hello"}]}"#,
                    json!([]),
                ),
                turn("hello there, friend!", json!([])),
            ],
            cursor: AtomicUsize::new(0),
        };
        let cfg = BuildAgentConfig {
            agent_id: "greeter".into(),
            available_tools: vec!["read_file".into(), "write_file".into()],
            max_attempts: 3,
        };
        let outcome = build_agent("make a friendly greeter", &script, &exec, &cfg).await;
        assert!(outcome.passed, "issues: {:?}", outcome.issues);
        let spec = outcome.spec.unwrap();
        assert_eq!(spec.id, "greeter");
        assert_eq!(spec.name, "Greeter");
        assert_eq!(spec.scenarios.len(), 1);
    }

    #[tokio::test]
    async fn build_agent_drops_invented_tool_names() {
        let dir = tempfile::tempdir().unwrap();
        let exec = WorktreeExecutor::new(dir.path());
        let script = Script {
            turns: vec![
                turn(
                    r#"{"name":"X","identity":"You help.","tools":["send_email","read_file"],
                        "standing_goal":"g","scenarios":[{"input":"q","expect":"a"}]}"#,
                    json!([]),
                ),
                turn("answer: a", json!([])),
            ],
            cursor: AtomicUsize::new(0),
        };
        let cfg = BuildAgentConfig {
            agent_id: "x".into(),
            available_tools: vec!["read_file".into()],
            max_attempts: 2,
        };
        let outcome = build_agent("intent", &script, &exec, &cfg).await;
        assert!(outcome.passed);
        // send_email isn't a real tool → dropped; read_file kept.
        assert_eq!(outcome.spec.unwrap().tools, vec!["read_file".to_string()]);
    }

    #[tokio::test]
    async fn build_agent_repairs_a_failing_scenario() {
        let dir = tempfile::tempdir().unwrap();
        let exec = WorktreeExecutor::new(dir.path());
        let script = Script {
            turns: vec![
                // Attempt 1 spec.
                turn(
                    r#"{"name":"A","identity":"v1","tools":[],"standing_goal":"g","scenarios":[{"input":"q","expect":"RIGHT"}]}"#,
                    json!([]),
                ),
                // Scenario run for attempt 1 → wrong.
                turn("WRONG", json!([])),
                // Attempt 2 spec (repaired).
                turn(
                    r#"{"name":"A","identity":"v2","tools":[],"standing_goal":"g","scenarios":[{"input":"q","expect":"RIGHT"}]}"#,
                    json!([]),
                ),
                // Scenario run for attempt 2 → right.
                turn("the RIGHT answer", json!([])),
            ],
            cursor: AtomicUsize::new(0),
        };
        let cfg = BuildAgentConfig { agent_id: "a".into(), available_tools: vec![], max_attempts: 3 };
        let outcome = build_agent("intent", &script, &exec, &cfg).await;
        assert!(outcome.passed);
        assert_eq!(outcome.attempts, 2);
        assert_eq!(outcome.spec.unwrap().identity, "v2");
    }
}