use crate::cache::Cache;
use crate::config::Config;
use crate::models::{MetricValue, ModelScore, SourceResult, SourceStatus};
use crate::sources::Source;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use std::collections::HashMap;
use std::process::Command;
pub struct SweRebench;
impl Source for SweRebench {
fn name(&self) -> &str {
"swe-rebench"
}
fn fetch(&self, config: &Config, cache: &Cache) -> Result<SourceResult> {
if let Some((fetched_at, cached_data)) = cache.get("swe-rebench") {
return Ok(self.parse_cached(&cached_data, Some(fetched_at), SourceStatus::Cached));
}
let agent_browser = config.agent_browser_path();
if let Err(err) = run_agent_browser(agent_browser, &["open", "https://swe-rebench.com/"]) {
return Ok(map_command_error(self.name(), "open", err));
}
let _ = run_agent_browser(agent_browser, &["wait", "2000"]);
let page_text = match run_agent_browser(agent_browser, &["snapshot"]) {
Ok(text) => text,
Err(err) => return Ok(map_command_error(self.name(), "snapshot", err)),
};
let mut parsed = parse_scores_from_text(&page_text);
if parsed.is_empty() {
return Ok(SourceResult {
source: self.name().into(),
fetched_at: Some(Utc::now()),
status: SourceStatus::Error(
"Failed to parse any model scores from SWE-rebench page output".into(),
),
scores: vec![],
});
}
parsed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let cached_rows: Vec<serde_json::Value> = parsed
.iter()
.map(|(source_model_name, score)| {
serde_json::json!({
"source_model_name": source_model_name,
"score": score,
})
})
.collect();
let cache_value = serde_json::json!({ "scores": cached_rows });
cache.set("swe-rebench", &cache_value)?;
Ok(self.parse_cached(&cache_value, Some(Utc::now()), SourceStatus::Ok))
}
}
impl SweRebench {
fn parse_cached(
&self,
data: &serde_json::Value,
fetched_at: Option<DateTime<Utc>>,
status: SourceStatus,
) -> SourceResult {
let mut rows: Vec<(String, f64)> = data
.get("scores")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|entry| {
let source_model_name = entry
.get("source_model_name")
.and_then(|v| v.as_str())
.map(ToOwned::to_owned)?;
let score = entry.get("score").and_then(|v| v.as_f64())?;
Some((source_model_name, score))
})
.collect::<Vec<_>>()
})
.unwrap_or_default();
rows.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let scores = rows
.into_iter()
.enumerate()
.map(|(idx, (source_model_name, score))| {
let rank = (idx + 1) as u32;
let mut metrics = HashMap::new();
metrics.insert("resolve_rate".into(), MetricValue::Float(score));
metrics.insert("rank".into(), MetricValue::Int(rank as i64));
ModelScore {
model: normalize_model_name(&source_model_name),
source_model_name,
metrics,
rank: Some(rank),
}
})
.collect();
SourceResult {
source: self.name().into(),
fetched_at,
status,
scores,
}
}
}
fn run_agent_browser(agent_browser_path: &str, args: &[&str]) -> Result<String> {
let output = Command::new(agent_browser_path)
.args(args)
.output()
.with_context(|| {
format!(
"Failed to execute {} {}",
agent_browser_path,
args.join(" ")
)
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
let details = if !stderr.is_empty() {
stderr
} else if !stdout.is_empty() {
stdout
} else {
format!("Exit status: {}", output.status)
};
anyhow::bail!("agent-browser {} failed: {}", args.join(" "), details);
}
Ok(String::from_utf8_lossy(&output.stdout).to_string())
}
fn map_command_error(source: &str, step: &str, err: anyhow::Error) -> SourceResult {
let unavailable = err
.root_cause()
.downcast_ref::<std::io::Error>()
.map(|io_err| io_err.kind() == std::io::ErrorKind::NotFound)
.unwrap_or(false);
if unavailable {
SourceResult {
source: source.into(),
fetched_at: None,
status: SourceStatus::Unavailable,
scores: vec![],
}
} else {
SourceResult {
source: source.into(),
fetched_at: None,
status: SourceStatus::Error(format!("SWE-rebench scrape failed at {}: {}", step, err)),
scores: vec![],
}
}
}
fn parse_scores_from_text(text: &str) -> Vec<(String, f64)> {
let mut results: HashMap<String, f64> = HashMap::new();
let lines: Vec<&str> = text.lines().collect();
let mut i = 0;
while i < lines.len() {
let trimmed = lines[i].trim();
if trimmed.starts_with("- row \"") && trimmed.contains('%') {
let mut cells: Vec<String> = Vec::new();
let mut j = i + 1;
while j < lines.len() {
let cell_line = lines[j].trim();
if cell_line.starts_with("- cell \"") {
if let Some(val) = extract_cell_value(cell_line) {
cells.push(val);
}
} else if cell_line.starts_with("- row ") {
break;
}
j += 1;
}
if cells.len() >= 3 {
let model_name = &cells[1];
let score_str = cells[2].trim_end_matches('%');
if let Ok(score) = score_str.parse::<f64>()
&& !model_name.is_empty()
&& model_name.chars().any(|c| c.is_ascii_alphabetic())
&& model_name != "Model"
{
results.entry(model_name.clone()).or_insert(score);
}
}
i = j;
} else {
i += 1;
}
}
results.into_iter().collect()
}
fn extract_cell_value(line: &str) -> Option<String> {
let start = line.find('"')? + 1;
let end = line[start..].find('"')? + start;
Some(line[start..end].to_string())
}
fn normalize_model_name(name: &str) -> String {
name.to_lowercase().replace([' ', '_'], "-")
}