use car_engine::ToolExecutor;
use car_inference::tasks::generate::Message;
use car_inference::{GenerateParams, GenerateRequest};
use serde_json::Value;
pub use car_registry::declarative::{DeclarativeAgentSpec, Scenario};
use super::native_loop::TurnGenerator;
use super::shell_tool::WorktreeExecutor;
#[derive(Debug, Clone)]
pub struct AgentRunResult {
pub output: String,
pub turns: u32,
pub tool_calls: u32,
pub error: Option<String>,
}
pub fn select_tool_defs_strict(all: &[Value], allow: &[String], deny: &[String]) -> Vec<Value> {
all.iter()
.filter(|d| {
let name = d.get("name").and_then(Value::as_str).unwrap_or("");
allow.iter().any(|a| a == name) && !deny.iter().any(|x| x == name)
})
.cloned()
.collect()
}
pub struct DeclarativeAgentRunner<'a> {
spec: &'a DeclarativeAgentSpec,
generator: &'a dyn TurnGenerator,
executor: &'a WorktreeExecutor,
max_turns: u32,
max_tokens_per_turn: usize,
}
impl<'a> DeclarativeAgentRunner<'a> {
pub fn new(
spec: &'a DeclarativeAgentSpec,
generator: &'a dyn TurnGenerator,
executor: &'a WorktreeExecutor,
) -> Self {
Self {
spec,
generator,
executor,
max_turns: 12,
max_tokens_per_turn: 2048,
}
}
fn system_prompt(&self) -> String {
let mut p = self.spec.identity.trim().to_string();
if !self.spec.standing_goal.trim().is_empty() {
p.push_str("\n\nStanding goal: ");
p.push_str(self.spec.standing_goal.trim());
}
p
}
pub async fn run(&self, input: &str) -> AgentRunResult {
let tools = select_tool_defs_strict(
&self.executor.all_tool_defs(),
&self.spec.tools,
&self.spec.denied_tools,
);
let tools = if tools.is_empty() { None } else { Some(tools) };
let mut messages = vec![
Message::System {
content: self.system_prompt(),
},
Message::User {
content: input.to_string(),
},
];
let mut tool_calls_total = 0u32;
for turn in 1..=self.max_turns {
let req = GenerateRequest {
prompt: input.to_string(),
params: GenerateParams {
temperature: 0.0,
max_tokens: self.max_tokens_per_turn,
thinking: car_inference::tasks::generate::ThinkingMode::Off,
..Default::default()
},
tools: tools.clone(),
messages: Some(messages.clone()),
intent: Some(car_inference::IntentHint {
task: Some(car_inference::TaskHint::Code),
prefer_quality: true,
..Default::default()
}),
..Default::default()
};
let result = match self.generator.generate(req).await {
Ok(r) => r,
Err(e) => {
return AgentRunResult {
output: String::new(),
turns: turn,
tool_calls: tool_calls_total,
error: Some(format!("inference failed: {e}")),
}
}
};
if result.tool_calls.is_empty() {
return AgentRunResult {
output: result.text,
turns: turn,
tool_calls: tool_calls_total,
error: None,
};
}
let mut calls = result.tool_calls.clone();
for (i, call) in calls.iter_mut().enumerate() {
if call.id.is_none() {
call.id = Some(format!("call_{turn}_{i}"));
}
}
messages.push(Message::Assistant {
content: result.text.clone(),
tool_calls: calls.clone(),
});
for call in &calls {
let params = Value::Object(call.arguments.clone().into_iter().collect());
let (_, content) = if tools_contains(&self.spec.tools, &call.name)
&& !self.spec.denied_tools.iter().any(|d| d == &call.name)
{
match self.executor.execute(&call.name, ¶ms).await {
Ok(v) => (true, v.to_string()),
Err(e) => (false, format!("ERROR: {e}")),
}
} else {
(
false,
format!("ERROR: tool '{}' is not allowed for this agent", call.name),
)
};
tool_calls_total += 1;
messages.push(Message::ToolResult {
tool_use_id: call.id.clone().expect("assigned above"),
content,
});
}
}
AgentRunResult {
output: String::new(),
turns: self.max_turns,
tool_calls: tool_calls_total,
error: Some("max_turns_exceeded".into()),
}
}
}
fn tools_contains(allow: &[String], name: &str) -> bool {
allow.iter().any(|a| a == name)
}
pub struct ScenarioResults {
pub passed: usize,
pub total: usize,
pub failures: Vec<String>,
}
impl ScenarioResults {
pub fn all_passed(&self) -> bool {
self.passed == self.total
}
}
pub async fn run_scenarios(
spec: &DeclarativeAgentSpec,
generator: &dyn TurnGenerator,
executor: &WorktreeExecutor,
) -> ScenarioResults {
let mut passed = 0;
let mut failures = Vec::new();
let total = spec.scenarios.len();
for (i, scenario) in spec.scenarios.iter().enumerate() {
let runner = DeclarativeAgentRunner::new(spec, generator, executor);
let result = runner.run(&scenario.input).await;
let ok = result.error.is_none()
&& result
.output
.to_lowercase()
.contains(&scenario.expect.to_lowercase());
if ok {
passed += 1;
} else {
failures.push(format!(
"scenario #{} (input {:?}) expected output containing {:?} but got {:?}{}",
i + 1,
scenario.input,
scenario.expect,
truncate(&result.output, 200),
result
.error
.as_ref()
.map(|e| format!(" [error: {e}]"))
.unwrap_or_default()
));
}
}
ScenarioResults { passed, total, failures }
}
fn truncate(s: &str, max: usize) -> String {
if s.len() <= max {
return s.to_string();
}
let mut end = max;
while !s.is_char_boundary(end) {
end -= 1;
}
format!("{}…", &s[..end])
}
pub struct BuildAgentConfig {
pub agent_id: String,
pub available_tools: Vec<String>,
pub max_attempts: u32,
}
pub struct BuildAgentOutcome {
pub spec: Option<DeclarativeAgentSpec>,
pub passed: bool,
pub issues: Vec<String>,
pub attempts: u32,
}
fn build_prompt(intent: &str, available_tools: &[String], feedback: &[String]) -> String {
let mut p = format!(
"You are designing an in-daemon CAR agent from a user's request. Output ONLY a JSON \
object (no prose, no fences) describing the agent:\n\
{{\n \"name\": \"short human name\",\n \"identity\": \"system prompt — who the agent \
is and how it behaves\",\n \"tools\": [\"only names from the AVAILABLE TOOLS list\"],\n \
\"standing_goal\": \"the agent's persistent objective\",\n \"scenarios\": [{{\"input\": \
\"an example request\", \"expect\": \"a stable substring the correct output must \
contain\"}}]\n}}\n\n\
User request:\n{intent}\n\n\
AVAILABLE TOOLS (use only these names; pick the minimal set, or [] for a pure-reasoning \
agent):\n{}\n\n\
Rules:\n\
- 1 to 3 scenarios. CRITICAL: each `expect` must be the SHORTEST string that proves the \
answer is correct — usually a single word, number, or short phrase taken from the \
USER'S REQUEST itself. NEVER a full sentence you imagine the agent saying, and never \
a value you haven't computed.\n\
Example — request \"a greeter that always says hello\": a good scenario is \
{{\"input\": \"hi\", \"expect\": \"hello\"}} (matched case-insensitively). A BAD scenario \
invents a whole reply like \"Hello! How can I help you today?\".\n\
Example — request \"converts Celsius to Fahrenheit\": for input \"100\" the `expect` is \
\"212\" (you must actually compute 100*9/5+32), NOT \"273.15\" (that is Kelvin) and NOT a \
sentence.\n\
- `expect` is matched as a case-insensitive substring of the agent's output.\n\
- Prefer no tools unless the task truly needs to read/write files or run commands.\n\
- Write `identity` so the agent answers DIRECTLY and deterministically (it should perform \
the task, not chat about it) — terse enough to reliably contain each `expect`.\n",
if available_tools.is_empty() {
"(none)".to_string()
} else {
available_tools.join(", ")
}
);
if !feedback.is_empty() {
p.push_str("\nYour previous attempt did not pass its own scenarios — revise so they do:\n");
for f in feedback {
p.push_str(&format!("- {f}\n"));
}
}
p
}
pub(crate) fn extract_json_object(text: &str) -> Result<Value, String> {
let start = text.find('{').ok_or("no JSON object in output")?;
let end = text.rfind('}').ok_or("no closing brace in output")?;
if end < start {
return Err("malformed JSON object".into());
}
serde_json::from_str(&text[start..=end]).map_err(|e| format!("invalid JSON: {e}"))
}
pub async fn build_agent(
intent: &str,
generator: &dyn TurnGenerator,
executor: &WorktreeExecutor,
cfg: &BuildAgentConfig,
) -> BuildAgentOutcome {
let max = cfg.max_attempts.max(1);
let mut feedback: Vec<String> = Vec::new();
let mut last_spec: Option<DeclarativeAgentSpec> = None;
let mut last_issues: Vec<String> = Vec::new();
for attempt in 1..=max {
let prompt = build_prompt(intent, &cfg.available_tools, &feedback);
let text = match generator
.generate(GenerateRequest {
prompt: prompt.clone(),
params: GenerateParams {
temperature: 0.0,
max_tokens: 2048,
thinking: car_inference::tasks::generate::ThinkingMode::Off,
..Default::default()
},
messages: Some(vec![Message::User { content: prompt }]),
intent: Some(car_inference::IntentHint {
task: Some(car_inference::TaskHint::Code),
require: vec![car_inference::ModelCapability::Code],
prefer_quality: true,
..Default::default()
}),
..Default::default()
})
.await
{
Ok(r) => r.text,
Err(e) => {
last_issues = vec![format!("generation failed: {e}")];
continue;
}
};
let value = match extract_json_object(&text) {
Ok(v) => v,
Err(e) => {
feedback = vec![format!("output did not parse: {e}. Return ONLY the JSON object.")];
last_issues = feedback.clone();
continue;
}
};
let mut spec = match parse_spec(&value, &cfg.agent_id, &cfg.available_tools) {
Ok(s) => s,
Err(e) => {
feedback = vec![e.clone()];
last_issues = vec![e];
continue;
}
};
spec.enabled = true;
let problems = spec.validate();
if !problems.is_empty() {
feedback = problems.clone();
last_issues = problems;
last_spec = Some(spec);
continue;
}
if spec.scenarios.is_empty() {
feedback = vec!["include at least one scenario".into()];
last_issues = feedback.clone();
last_spec = Some(spec);
continue;
}
let results = run_scenarios(&spec, generator, executor).await;
if results.all_passed() {
return BuildAgentOutcome {
spec: Some(spec),
passed: true,
issues: Vec::new(),
attempts: attempt,
};
}
feedback = results.failures.clone();
last_issues = results.failures;
last_spec = Some(spec);
}
BuildAgentOutcome {
spec: last_spec,
passed: false,
issues: last_issues,
attempts: max,
}
}
fn parse_spec(
value: &Value,
agent_id: &str,
available_tools: &[String],
) -> Result<DeclarativeAgentSpec, String> {
let name = value.get("name").and_then(Value::as_str).unwrap_or("").trim().to_string();
let identity = value
.get("identity")
.and_then(Value::as_str)
.unwrap_or("")
.trim()
.to_string();
let standing_goal = value
.get("standing_goal")
.and_then(Value::as_str)
.unwrap_or("")
.to_string();
let tools: Vec<String> = value
.get("tools")
.and_then(Value::as_array)
.map(|a| {
a.iter()
.filter_map(|t| t.as_str())
.map(String::from)
.filter(|t| available_tools.iter().any(|a| a == t))
.collect()
})
.unwrap_or_default();
let scenarios: Vec<Scenario> = value
.get("scenarios")
.and_then(Value::as_array)
.map(|a| {
a.iter()
.filter_map(|s| {
Some(Scenario {
input: s.get("input")?.as_str()?.to_string(),
expect: s.get("expect")?.as_str()?.to_string(),
})
})
.collect()
})
.unwrap_or_default();
Ok(DeclarativeAgentSpec {
id: agent_id.to_string(),
name: if name.is_empty() { agent_id.to_string() } else { name },
identity,
tools,
denied_tools: Vec::new(),
standing_goal,
scenarios,
enabled: true,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::coder::native_loop::TurnGenerator as _;
use async_trait::async_trait;
use car_inference::{GenerateRequest, InferenceResult};
use serde_json::json;
use std::sync::atomic::{AtomicUsize, Ordering};
struct Script {
turns: Vec<InferenceResult>,
cursor: AtomicUsize,
}
fn turn(text: &str, tool_calls: Value) -> InferenceResult {
serde_json::from_value(json!({
"text": text, "tool_calls": tool_calls,
"trace_id": "t", "model_used": "scripted", "latency_ms": 0,
}))
.unwrap()
}
#[async_trait]
impl TurnGenerator for Script {
async fn generate(&self, _req: GenerateRequest) -> Result<InferenceResult, String> {
let i = self.cursor.fetch_add(1, Ordering::SeqCst);
self.turns.get(i).cloned().ok_or_else(|| "script exhausted".into())
}
}
fn spec_with(tools: Vec<&str>) -> DeclarativeAgentSpec {
DeclarativeAgentSpec {
id: "t".into(),
name: "T".into(),
identity: "You answer.".into(),
tools: tools.into_iter().map(String::from).collect(),
denied_tools: vec![],
standing_goal: "help".into(),
scenarios: vec![],
enabled: true,
}
}
#[test]
fn strict_allowlist_empty_intersection_is_zero_tools() {
let all = WorktreeExecutor::tool_defs();
assert!(!all.is_empty());
assert!(select_tool_defs_strict(&all, &["nonexistent".into()], &[]).is_empty());
assert!(select_tool_defs_strict(&all, &[], &[]).is_empty());
let sel = select_tool_defs_strict(&all, &["read_file".into()], &[]);
assert_eq!(sel.len(), 1);
assert_eq!(sel[0]["name"], "read_file");
assert!(select_tool_defs_strict(&all, &["read_file".into()], &["read_file".into()]).is_empty());
}
#[tokio::test]
async fn runner_returns_text_answer_with_no_tools() {
let dir = tempfile::tempdir().unwrap();
let exec = WorktreeExecutor::new(dir.path());
let script = Script {
turns: vec![turn("the answer is 42", json!([]))],
cursor: AtomicUsize::new(0),
};
let spec = spec_with(vec![]);
let runner = DeclarativeAgentRunner::new(&spec, &script, &exec);
let r = runner.run("what is the answer?").await;
assert_eq!(r.output, "the answer is 42");
assert_eq!(r.tool_calls, 0);
assert!(r.error.is_none());
}
#[tokio::test]
async fn runner_executes_an_allowed_tool() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("data.txt"), "secret content").unwrap();
let exec = WorktreeExecutor::new(dir.path());
let script = Script {
turns: vec![
turn("", json!([{"id":"c1","name":"read_file","arguments":{"path":"data.txt"}}])),
turn("the file says secret content", json!([])),
],
cursor: AtomicUsize::new(0),
};
let spec = spec_with(vec!["read_file"]);
let runner = DeclarativeAgentRunner::new(&spec, &script, &exec);
let r = runner.run("read data.txt").await;
assert!(r.output.contains("secret content"));
assert_eq!(r.tool_calls, 1);
}
#[tokio::test]
async fn runner_blocks_a_disallowed_tool_even_if_the_model_calls_it() {
let dir = tempfile::tempdir().unwrap();
let exec = WorktreeExecutor::new(dir.path());
let script = Script {
turns: vec![
turn("", json!([{"id":"c1","name":"write_file","arguments":{"path":"x","content":"y"}}])),
turn("done", json!([])),
],
cursor: AtomicUsize::new(0),
};
let spec = spec_with(vec!["read_file"]);
let runner = DeclarativeAgentRunner::new(&spec, &script, &exec);
let _ = runner.run("write a file").await;
assert!(!dir.path().join("x").exists(), "disallowed tool executed");
}
#[tokio::test]
async fn build_agent_generates_then_passes_scenarios() {
let dir = tempfile::tempdir().unwrap();
let exec = WorktreeExecutor::new(dir.path());
let script = Script {
turns: vec![
turn(
r#"{"name":"Greeter","identity":"You greet people warmly.","tools":[],
"standing_goal":"greet","scenarios":[{"input":"hi","expect":"hello"}]}"#,
json!([]),
),
turn("hello there, friend!", json!([])),
],
cursor: AtomicUsize::new(0),
};
let cfg = BuildAgentConfig {
agent_id: "greeter".into(),
available_tools: vec!["read_file".into(), "write_file".into()],
max_attempts: 3,
};
let outcome = build_agent("make a friendly greeter", &script, &exec, &cfg).await;
assert!(outcome.passed, "issues: {:?}", outcome.issues);
let spec = outcome.spec.unwrap();
assert_eq!(spec.id, "greeter");
assert_eq!(spec.name, "Greeter");
assert_eq!(spec.scenarios.len(), 1);
}
#[tokio::test]
async fn build_agent_drops_invented_tool_names() {
let dir = tempfile::tempdir().unwrap();
let exec = WorktreeExecutor::new(dir.path());
let script = Script {
turns: vec![
turn(
r#"{"name":"X","identity":"You help.","tools":["send_email","read_file"],
"standing_goal":"g","scenarios":[{"input":"q","expect":"a"}]}"#,
json!([]),
),
turn("answer: a", json!([])),
],
cursor: AtomicUsize::new(0),
};
let cfg = BuildAgentConfig {
agent_id: "x".into(),
available_tools: vec!["read_file".into()],
max_attempts: 2,
};
let outcome = build_agent("intent", &script, &exec, &cfg).await;
assert!(outcome.passed);
assert_eq!(outcome.spec.unwrap().tools, vec!["read_file".to_string()]);
}
#[tokio::test]
async fn build_agent_repairs_a_failing_scenario() {
let dir = tempfile::tempdir().unwrap();
let exec = WorktreeExecutor::new(dir.path());
let script = Script {
turns: vec![
turn(
r#"{"name":"A","identity":"v1","tools":[],"standing_goal":"g","scenarios":[{"input":"q","expect":"RIGHT"}]}"#,
json!([]),
),
turn("WRONG", json!([])),
turn(
r#"{"name":"A","identity":"v2","tools":[],"standing_goal":"g","scenarios":[{"input":"q","expect":"RIGHT"}]}"#,
json!([]),
),
turn("the RIGHT answer", json!([])),
],
cursor: AtomicUsize::new(0),
};
let cfg = BuildAgentConfig { agent_id: "a".into(), available_tools: vec![], max_attempts: 3 };
let outcome = build_agent("intent", &script, &exec, &cfg).await;
assert!(outcome.passed);
assert_eq!(outcome.attempts, 2);
assert_eq!(outcome.spec.unwrap().identity, "v2");
}
}