use crate::budget::budget_skipped_output;
use crate::error::MultiError;
use crate::mailbox::Mailbox;
use crate::runner::AgentRunner;
use crate::shared::SharedInfra;
use crate::types::{AgentOutput, AgentSpec};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tracing::instrument;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MatchResult {
pub round: u32,
pub a: String,
pub b: String,
pub winner: String,
pub rationale: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TournamentResult {
pub task: String,
pub candidates: Vec<AgentOutput>,
pub matches: Vec<MatchResult>,
pub winner_name: String,
pub winner_answer: String,
pub ranking: Vec<String>,
}
pub struct Tournament {
pub competitors: Vec<AgentSpec>,
pub judge: AgentSpec,
}
impl Tournament {
pub fn new(competitors: Vec<AgentSpec>, judge: AgentSpec) -> Self {
Self { competitors, judge }
}
#[instrument(name = "multi.tournament", skip_all)]
pub async fn run(
&self,
task: &str,
runner: &Arc<dyn AgentRunner>,
infra: &SharedInfra,
) -> Result<TournamentResult, MultiError> {
let candidates = self.gather_candidates(task, runner, infra).await;
let mut alive: Vec<(String, String)> = candidates
.iter()
.filter(|o| o.succeeded())
.map(|o| (o.name.clone(), o.answer.clone()))
.collect();
let mut matches = Vec::new();
let mut elimination_order: Vec<String> = Vec::new();
if alive.is_empty() {
return Ok(TournamentResult {
task: task.to_string(),
candidates,
matches,
winner_name: String::new(),
winner_answer: String::new(),
ranking: Vec::new(),
});
}
let mut round = 1;
while alive.len() > 1 {
let mut next: Vec<(String, String)> = Vec::new();
let mut i = 0;
while i + 1 < alive.len() {
let (name_a, ans_a) = alive[i].clone();
let (name_b, ans_b) = alive[i + 1].clone();
let (winner_side, rationale) = self
.judge_pair(task, &name_a, &ans_a, &name_b, &ans_b, runner, infra)
.await;
let (winner_name, winner_ans, loser_name) = match winner_side {
Side::A => (name_a.clone(), ans_a.clone(), name_b.clone()),
Side::B => (name_b.clone(), ans_b.clone(), name_a.clone()),
};
matches.push(MatchResult {
round,
a: name_a,
b: name_b,
winner: winner_name.clone(),
rationale,
});
elimination_order.push(loser_name);
next.push((winner_name, winner_ans));
i += 2;
}
if i < alive.len() {
next.push(alive[i].clone());
}
alive = next;
round += 1;
}
let (winner_name, winner_answer) = alive
.into_iter()
.next()
.unwrap_or_else(|| (String::new(), String::new()));
let mut ranking = vec![winner_name.clone()];
ranking.extend(elimination_order.into_iter().rev());
Ok(TournamentResult {
task: task.to_string(),
candidates,
matches,
winner_name,
winner_answer,
ranking,
})
}
async fn gather_candidates(
&self,
task: &str,
runner: &Arc<dyn AgentRunner>,
infra: &SharedInfra,
) -> Vec<AgentOutput> {
let mailbox = Arc::new(Mailbox::default());
enum Slot {
Spawned(usize),
Skipped(AgentOutput),
}
let mut handles: Vec<tokio::task::JoinHandle<Result<AgentOutput, MultiError>>> = Vec::new();
let mut slots: Vec<Slot> = Vec::new();
for spec in &self.competitors {
if let Err(e) = infra.begin_agent() {
slots.push(Slot::Skipped(budget_skipped_output(&spec.name, &e)));
continue;
}
let runner = Arc::clone(runner);
let spec = spec.clone();
let task = task.to_string();
let mailbox = Arc::clone(&mailbox);
let rt = infra.make_runtime();
for tool in &spec.tools {
rt.register_tool(tool).await;
}
handles.push(tokio::spawn(async move {
runner.run(&spec, &task, &rt, &mailbox).await
}));
slots.push(Slot::Spawned(handles.len() - 1));
}
let mut results: Vec<Option<_>> = futures::future::join_all(handles)
.await
.into_iter()
.map(Some)
.collect();
let mut outputs = Vec::new();
for (i, slot) in slots.into_iter().enumerate() {
let idx = match slot {
Slot::Skipped(out) => {
outputs.push(out);
continue;
}
Slot::Spawned(idx) => idx,
};
match results.get_mut(idx).and_then(Option::take) {
Some(Ok(Ok(out))) => {
infra.record_output(&out);
outputs.push(out);
}
Some(Ok(Err(e))) => outputs.push(failed_output(&self.competitors[i].name, e.to_string())),
Some(Err(e)) => {
outputs.push(failed_output(&self.competitors[i].name, format!("join error: {e}")))
}
None => outputs.push(failed_output(
&self.competitors[i].name,
"internal: missing join result".into(),
)),
}
}
outputs
}
async fn judge_pair(
&self,
task: &str,
name_a: &str,
ans_a: &str,
name_b: &str,
ans_b: &str,
runner: &Arc<dyn AgentRunner>,
infra: &SharedInfra,
) -> (Side, String) {
if infra.begin_agent().is_err() {
return (Side::A, "budget exhausted: defaulted to first candidate".into());
}
let judge_task = format!(
r#"You are judging a head-to-head comparison for this task:
## Task
{task}
## Candidate A
{ans_a}
## Candidate B
{ans_b}
Decide which candidate better accomplishes the task. Be decisive.
Respond with a JSON object:
```json
{{"winner": "A", "rationale": "one sentence why"}}
```
`winner` must be exactly "A" or "B"."#,
);
let mut judge_spec = self.judge.clone();
judge_spec.name = format!("{}_{}_vs_{}", self.judge.name, name_a, name_b);
let mailbox = Mailbox::default();
let rt = infra.make_runtime();
match runner.run(&judge_spec, &judge_task, &rt, &mailbox).await {
Ok(out) => {
infra.record_output(&out);
parse_verdict(&out.answer)
}
Err(e) => (Side::A, format!("judge failed, defaulted to A: {e}")),
}
}
}
#[derive(Clone, Copy)]
enum Side {
A,
B,
}
fn failed_output(name: &str, error: String) -> AgentOutput {
AgentOutput {
name: name.to_string(),
answer: String::new(),
turns: 0,
tool_calls: 0,
duration_ms: 0.0,
error: Some(error),
outcome: None,
tokens: None,
tools_used: Vec::new(),
}
}
fn parse_verdict(answer: &str) -> (Side, String) {
if let Some(json) = car_ir::json_extract::extract_json_object(answer) {
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&json) {
let rationale = v
.get("rationale")
.and_then(|r| r.as_str())
.unwrap_or("")
.to_string();
if let Some(w) = v.get("winner").and_then(|w| w.as_str()) {
let side = if w.trim().eq_ignore_ascii_case("b") {
Side::B
} else {
Side::A
};
return (side, rationale);
}
}
}
const FALLBACK_RATIONALE: &str = "verdict parsed heuristically (no JSON winner field)";
let upper = answer.to_uppercase();
let phrase_a = ["CANDIDATE A", "WINNER: A", "WINNER A", "\"A\"", "ANSWER A"]
.iter()
.filter_map(|p| upper.find(p))
.min();
let phrase_b = ["CANDIDATE B", "WINNER: B", "WINNER B", "\"B\"", "ANSWER B"]
.iter()
.filter_map(|p| upper.find(p))
.min();
match (phrase_a, phrase_b) {
(Some(a), Some(b)) => return (if b < a { Side::B } else { Side::A }, FALLBACK_RATIONALE.into()),
(Some(_), None) => return (Side::A, FALLBACK_RATIONALE.into()),
(None, Some(_)) => return (Side::B, FALLBACK_RATIONALE.into()),
(None, None) => {}
}
if let Some(side) = first_standalone_ab(&upper) {
return (side, FALLBACK_RATIONALE.into());
}
(Side::A, FALLBACK_RATIONALE.into())
}
fn first_standalone_ab(upper: &str) -> Option<Side> {
let bytes = upper.as_bytes();
for (i, &c) in bytes.iter().enumerate() {
if c == b'A' || c == b'B' {
let prev_alnum = i > 0 && bytes[i - 1].is_ascii_alphanumeric();
let next_alnum = i + 1 < bytes.len() && bytes[i + 1].is_ascii_alphanumeric();
if !prev_alnum && !next_alnum {
return Some(if c == b'B' { Side::B } else { Side::A });
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use car_engine::Runtime;
struct ScriptedRunner;
#[async_trait::async_trait]
impl AgentRunner for ScriptedRunner {
async fn run(
&self,
spec: &AgentSpec,
task: &str,
_runtime: &Runtime,
_mailbox: &Mailbox,
) -> Result<AgentOutput, MultiError> {
let answer = if spec.name.contains("_vs_") {
let a = extract_block(task, "## Candidate A");
let b = extract_block(task, "## Candidate B");
let winner = if b > a { "B" } else { "A" };
format!("{{\"winner\": \"{winner}\", \"rationale\": \"later sorts higher\"}}")
} else {
spec.name.clone()
};
Ok(AgentOutput {
name: spec.name.clone(),
answer,
turns: 1,
tool_calls: 0,
duration_ms: 1.0,
error: None,
outcome: None,
tokens: None,
tools_used: Vec::new(),
})
}
}
fn extract_block<'a>(task: &'a str, header: &str) -> &'a str {
task.split(header)
.nth(1)
.map(|s| s.split("##").next().unwrap_or("").trim())
.unwrap_or("")
}
#[tokio::test]
async fn single_elimination_picks_alphabetical_max() {
let competitors = vec![
AgentSpec::new("alpha", ""),
AgentSpec::new("bravo", ""),
AgentSpec::new("charlie", ""),
AgentSpec::new("delta", ""),
];
let judge = AgentSpec::new("judge", "pick the better answer");
let runner: Arc<dyn AgentRunner> = Arc::new(ScriptedRunner);
let infra = SharedInfra::new();
let r = Tournament::new(competitors, judge)
.run("rank these", &runner, &infra)
.await
.unwrap();
assert_eq!(r.winner_name, "delta");
assert_eq!(r.winner_answer, "delta");
assert_eq!(r.matches.len(), 3);
assert_eq!(r.ranking.first().unwrap(), "delta");
assert_eq!(r.ranking.len(), 4);
}
#[tokio::test]
async fn odd_competitor_gets_a_bye() {
let competitors = vec![
AgentSpec::new("alpha", ""),
AgentSpec::new("bravo", ""),
AgentSpec::new("charlie", ""),
];
let judge = AgentSpec::new("judge", "");
let runner: Arc<dyn AgentRunner> = Arc::new(ScriptedRunner);
let infra = SharedInfra::new();
let r = Tournament::new(competitors, judge)
.run("rank", &runner, &infra)
.await
.unwrap();
assert_eq!(r.matches.len(), 2);
assert_eq!(r.winner_name, "charlie"); }
#[test]
fn parse_verdict_handles_prose_not_just_json() {
let (s, _) = parse_verdict("Candidate B is clearly better, it covers more cases.");
assert!(matches!(s, Side::B), "phrase 'Candidate B' should win");
let (s, _) = parse_verdict("After careful analysis, B.");
assert!(matches!(s, Side::B), "standalone trailing 'B' should win");
let (s, _) = parse_verdict("Answer A is the stronger submission.");
assert!(matches!(s, Side::A));
let (s, r) = parse_verdict("I think... {\"winner\": \"B\", \"rationale\": \"x\"}");
assert!(matches!(s, Side::B));
assert_eq!(r, "x");
}
#[test]
fn parse_verdict_does_not_leak_full_answer_on_fallback() {
let huge = format!("Candidate B wins. {}", "blah ".repeat(500));
let (_, rationale) = parse_verdict(&huge);
assert!(
rationale.len() < 100,
"fallback rationale must not echo the whole answer"
);
}
#[tokio::test]
async fn budget_cap_limits_competitors() {
let competitors: Vec<AgentSpec> = (0..4)
.map(|i| AgentSpec::new(&format!("c{i}"), ""))
.collect();
let judge = AgentSpec::new("judge", "");
let runner: Arc<dyn AgentRunner> = Arc::new(ScriptedRunner);
let infra = SharedInfra::new().with_budget(crate::BudgetLimits {
max_agents: Some(2),
..Default::default()
});
let r = Tournament::new(competitors, judge)
.run("rank", &runner, &infra)
.await
.unwrap();
let produced = r.candidates.iter().filter(|o| o.succeeded()).count();
assert_eq!(produced, 2, "only two competitors fit the agent budget");
}
}