use std::collections::BTreeMap;
use std::path::PathBuf;
use eframe::egui::{self, RichText, ScrollArea};
use super::facett_theme::{Theme, RED, AMBER};
use crate::warehouse::agent_model_runs::{
leaderboard, matrix_grid, query_agent_model_runs, AgentModelRunRow, BakeoffSelector,
LeaderboardEntry, MatrixGrid,
};
use crate::warehouse::iceberg::IcebergWarehouse;
enum Src {
Local(PathBuf),
Remote { endpoint: String, token: String, workspace: String },
}
#[derive(Debug, Clone)]
struct RunSummary {
run_id: String,
prompt_id: String,
ts_micros: i64,
model_count: usize,
agent_count: usize,
winner: String,
}
pub struct LeaderboardTabState {
src: Src,
loaded: bool,
error: Option<String>,
rows: Vec<AgentModelRunRow>,
summaries: Vec<RunSummary>,
selected_run: Option<String>,
theme: Theme,
}
impl LeaderboardTabState {
pub fn local(root: PathBuf) -> Self {
Self::with(Src::Local(root))
}
pub fn remote(endpoint: String, token: String, workspace: String) -> Self {
Self::with(Src::Remote { endpoint, token, workspace })
}
pub fn set_workspace(&mut self, workspace: String) {
if let Src::Remote { workspace: w, .. } = &mut self.src {
*w = workspace;
}
self.reload();
}
fn with(src: Src) -> Self {
Self {
src,
loaded: false,
error: None,
rows: Vec::new(),
summaries: Vec::new(),
selected_run: None,
theme: Theme::default(),
}
}
pub fn set_palette(&mut self, t: Theme) {
self.theme = t;
}
#[doc(hidden)]
pub fn inject_for_test(&mut self, mut rows: Vec<AgentModelRunRow>) {
rows.sort_by(|a, b| {
(a.ts_micros, &a.run_id, &a.model, &a.prompt_id)
.cmp(&(b.ts_micros, &b.run_id, &b.model, &b.prompt_id))
});
self.summaries = summarize_runs(&rows);
self.selected_run = self.summaries.first().map(|s| s.run_id.clone());
self.rows = rows;
self.loaded = true;
self.error = None;
}
pub fn reload(&mut self) {
self.loaded = false;
self.error = None;
self.rows.clear();
self.summaries.clear();
self.selected_run = None;
}
fn load(&mut self) {
if self.loaded {
return;
}
self.loaded = true;
let rows = match &self.src {
Src::Local(root) => match IcebergWarehouse::open_read_only(root)
.and_then(|wh| wh.block_on(query_agent_model_runs(&wh, &BakeoffSelector::All)))
{
Ok(rows) => rows,
Err(e) => {
self.error = Some(format!("{e:#}"));
return;
}
},
Src::Remote { endpoint, token, workspace } => {
match super::remote::fetch_bakeoff_results(endpoint, token, workspace) {
Ok(rows) => rows,
Err(e) => {
self.error = Some(format!("{e:#}"));
return;
}
}
}
};
self.summaries = summarize_runs(&rows);
self.selected_run = self.summaries.first().map(|s| s.run_id.clone());
self.rows = rows;
}
fn current(&self) -> Option<&RunSummary> {
let sel = self.selected_run.as_deref();
sel.and_then(|id| self.summaries.iter().find(|s| s.run_id == id))
.or_else(|| self.summaries.first())
}
fn current_rows(&self) -> Vec<&AgentModelRunRow> {
let Some(run) = self.current() else { return Vec::new() };
self.rows.iter().filter(|r| r.run_id == run.run_id).collect()
}
fn current_board(&self) -> Vec<LeaderboardEntry> {
let rows: Vec<AgentModelRunRow> = self.current_rows().into_iter().cloned().collect();
leaderboard(&rows)
}
fn current_grid(&self) -> MatrixGrid {
let rows: Vec<AgentModelRunRow> = self.current_rows().into_iter().cloned().collect();
matrix_grid(&rows)
}
pub fn draw(&mut self, ui: &mut egui::Ui) {
let theme = self.theme;
self.load();
if let Some(err) = self.error.clone() {
ui.colored_label(RED, format!("agent_model_runs read failed:\n{err}"));
return;
}
if self.summaries.is_empty() {
ui.vertical_centered(|ui| {
ui.add_space(40.0);
ui.heading("🏆 Bake-off matrix — no runs recorded yet");
ui.label("Cross a task over agents × models to light this up:");
ui.monospace("nornir bakeoff demo # seed a mock matrix");
ui.monospace("nornir bakeoff run --task \"…\" --agents a,b --models x,y");
ui.monospace("nornir bakeoff run --task \"…\" --mock # offline");
ui.monospace("nornir bakeoff leaderboard --matrix # CLI twin");
});
return;
}
let runs: Vec<(String, String, usize)> = self
.summaries
.iter()
.map(|s| (s.run_id.clone(), s.prompt_id.clone(), s.model_count))
.collect();
egui::TopBottomPanel::top("bakeoff_controls").show_inside(ui, |ui| {
ui.horizontal_wrapped(|ui| {
ui.label("run:");
let sel = self
.selected_run
.clone()
.unwrap_or_else(|| runs.first().map(|r| r.0.clone()).unwrap_or_default());
egui::ComboBox::from_id_salt("bakeoff_run")
.selected_text(short(&sel))
.show_ui(ui, |ui| {
for (id, prompt_id, n) in &runs {
ui.selectable_value(
&mut self.selected_run,
Some(id.clone()),
format!("{} · {} · {n} cells", prompt_id, short(id)),
);
}
});
if ui.button("↻ reload").on_hover_text("re-read agent_model_runs").clicked() {
self.reload();
}
});
});
egui::SidePanel::left("bakeoff_runs")
.resizable(true)
.default_width(280.0)
.show_inside(ui, |ui| {
ui.strong("runs (newest first)");
ui.separator();
ScrollArea::vertical().auto_shrink([false, false]).show(ui, |ui| {
for s in &self.summaries {
let label = format!(
"🏆 {} ({}×{} cells)",
s.prompt_id, s.agent_count, s.model_count
);
let selected = self.selected_run.as_deref() == Some(s.run_id.as_str());
if ui.selectable_label(selected, label).clicked() {
self.selected_run = Some(s.run_id.clone());
}
ui.weak(format!(" winner: {}", s.winner));
}
});
});
egui::CentralPanel::default().show_inside(ui, |ui| {
let Some(run) = self.current().cloned() else { return };
ui.horizontal(|ui| {
ui.strong(format!("task `{}` · run {}", run.prompt_id, short(&run.run_id)));
ui.separator();
ui.weak(format!("{} agents × {} cells", run.agent_count, run.model_count));
});
ui.separator();
let grid = self.current_grid();
ScrollArea::vertical().auto_shrink([false, false]).show(ui, |ui| {
ui.strong("agent × model matrix");
ui.add_space(2.0);
draw_matrix_grid(ui, &theme, &grid);
ui.add_space(10.0);
ui.separator();
ui.strong("ranked cells (best first)");
ui.add_space(2.0);
ui.horizontal(|ui| {
ui.label(RichText::new("#").monospace().strong());
ui.label(RichText::new(" agent/model").monospace().strong());
ui.label(RichText::new(" score · tok/s · cost · mcp").monospace().strong());
});
for e in self.current_board() {
board_row(ui, &e, &theme);
}
});
});
}
pub fn state_json(&self) -> serde_json::Value {
let current = self.current();
let board: Vec<serde_json::Value> = self
.current_board()
.iter()
.map(|e| {
serde_json::json!({
"rank": e.rank,
"agent": e.agent,
"model": e.model,
"cell": e.cell_label(),
"tokens_per_s": e.tokens_per_s,
"score": e.score,
"latency_ms": e.latency_ms,
"tokens_out": e.tokens_out,
"cost_usd": e.cost_usd,
"mcp_tool_calls": e.mcp_tool_calls,
"ok": e.ok,
})
})
.collect();
let runs: Vec<serde_json::Value> = self
.summaries
.iter()
.map(|s| {
serde_json::json!({
"run_id": s.run_id,
"prompt_id": s.prompt_id,
"model_count": s.model_count,
"agent_count": s.agent_count,
"winner": s.winner,
})
})
.collect();
let grid = self.current_grid();
let grid_cells: Vec<serde_json::Value> = grid
.cells
.iter()
.map(|c| {
serde_json::json!({
"agent": c.agent,
"model": c.model,
"rank": c.rank,
"score": c.score,
"tokens_per_s": c.tokens_per_s,
"latency_ms": c.latency_ms,
"cost_usd": c.cost_usd,
"mcp_tool_calls": c.mcp_tool_calls,
"task_count": c.task_count,
"ok": c.ok,
})
})
.collect();
let grid_winner = grid.winner().map(|w| {
serde_json::json!({ "agent": w.agent, "model": w.model, "score": w.score })
});
serde_json::json!({
"source": match &self.src {
Src::Local(p) => format!("local {}", p.display()),
Src::Remote { endpoint, workspace, .. } => {
format!("remote {endpoint} ws={workspace} (Viz.BakeoffResults)")
}
},
"error": self.error,
"runs": runs,
"run_count": self.summaries.len(),
"selected_run": current.map(|s| s.run_id.clone()),
"selected_prompt": current.map(|s| s.prompt_id.clone()),
"winner": current.map(|s| s.winner.clone()),
"leaderboard": board,
"matrix": {
"agents": grid.agents,
"models": grid.models,
"cells": grid_cells,
"winner": grid_winner,
},
"palette": self.theme.name,
})
}
}
fn summarize_runs(rows: &[AgentModelRunRow]) -> Vec<RunSummary> {
let mut by_run: BTreeMap<String, Vec<&AgentModelRunRow>> = BTreeMap::new();
for r in rows {
by_run.entry(r.run_id.clone()).or_default().push(r);
}
let mut out: Vec<RunSummary> = by_run
.into_iter()
.map(|(run_id, group)| {
let ts_micros = group.iter().map(|r| r.ts_micros).max().unwrap_or(0);
let prompt_id = group.first().map(|r| r.prompt_id.clone()).unwrap_or_default();
let owned: Vec<AgentModelRunRow> = group.iter().map(|r| (*r).clone()).collect();
let board = leaderboard(&owned);
let winner = board.first().map(|e| e.cell_label()).unwrap_or_default();
let agent_count = {
let mut a: Vec<&str> = owned.iter().map(|r| r.agent.as_str()).collect();
a.sort_unstable();
a.dedup();
a.len()
};
RunSummary {
run_id,
prompt_id,
ts_micros,
model_count: owned.len(),
agent_count,
winner,
}
})
.collect();
out.sort_by(|a, b| b.ts_micros.cmp(&a.ts_micros));
out
}
fn board_row(ui: &mut egui::Ui, e: &LeaderboardEntry, theme: &Theme) {
let (chip, col) = if !e.ok {
("✗", RED)
} else if e.rank == 1 {
("🥇", AMBER)
} else {
("·", theme.text_dim)
};
ui.horizontal(|ui| {
ui.label(RichText::new(format!("{:<2}", e.rank)).monospace().strong());
ui.label(RichText::new(chip).color(col));
ui.label(RichText::new(e.cell_label()).strong());
ui.weak(format!(
"{:.3} score · {:.1} tok/s · {:.0}ms · ${:.4} · {} mcp",
e.score, e.tokens_per_s, e.latency_ms, e.cost_usd, e.mcp_tool_calls
));
});
}
fn draw_matrix_grid(ui: &mut egui::Ui, theme: &Theme, grid: &MatrixGrid) {
if grid.cells.is_empty() {
ui.weak("(no cells)");
return;
}
egui::Grid::new("bakeoff_matrix_grid")
.striped(true)
.spacing([14.0, 4.0])
.show(ui, |ui| {
ui.label(RichText::new("agent ╲ model").monospace().strong().color(theme.text));
for m in &grid.models {
ui.label(RichText::new(m).monospace().strong().color(theme.text));
}
ui.end_row();
for a in &grid.agents {
ui.label(RichText::new(a).strong().color(theme.text));
for m in &grid.models {
match grid.cell(a, m) {
Some(c) => {
let winner = c.rank == 1;
let col = if !c.ok {
RED
} else if winner {
AMBER
} else {
theme.heat(c.score.clamp(0.0, 1.0) as f32)
};
let crown = if winner { "🥇 " } else if !c.ok { "✗ " } else { "" };
ui.label(
RichText::new(format!("{crown}{:.3} #{}", c.score, c.rank))
.color(col),
)
.on_hover_text(format!(
"{a}/{m}\nscore {:.3} · {:.1} tok/s · {:.0}ms\n${:.4} · {} mcp · {} task(s){}",
c.score,
c.tokens_per_s,
c.latency_ms,
c.cost_usd,
c.mcp_tool_calls,
c.task_count,
if c.ok { "" } else { "\n(failed cell)" },
));
}
None => {
ui.weak("·");
}
}
}
ui.end_row();
}
});
if let Some(w) = grid.winner() {
ui.add_space(2.0);
ui.label(
RichText::new(format!("🥇 winner: {}/{} (score {:.3})", w.agent, w.model, w.score))
.color(AMBER),
);
}
}
fn short(run_id: &str) -> String {
crate::warehouse::agent_model_runs::short_run(run_id)
}