use crate::cache::Cache;
use crate::config::Config;
use crate::models::{MetricValue, ModelScore, SourceResult, SourceStatus};
use crate::sources::Source;
use anyhow::{Context, Result};
use chrono::Utc;
use std::collections::HashMap;
use std::time::Duration;
pub struct SweBench;
impl Source for SweBench {
fn name(&self) -> &str {
"swebench"
}
fn fetch(&self, _config: &Config, cache: &Cache) -> Result<SourceResult> {
if let Some((fetched_at, cached_data)) = cache.get("swebench") {
return Ok(SourceResult {
source: self.name().into(),
fetched_at: Some(fetched_at),
status: SourceStatus::Cached,
scores: parse_scores(&cached_data),
});
}
let url = "https://raw.githubusercontent.com/SWE-bench/swe-bench.github.io/master/data/leaderboards.json";
let client = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(30))
.build()
.context("Failed to build HTTP client")?;
let response = client
.get(url)
.send()
.context("Failed to fetch SWE-bench leaderboard data")?;
if !response.status().is_success() {
return Ok(SourceResult {
source: self.name().into(),
fetched_at: None,
status: SourceStatus::Error(format!("HTTP {}", response.status())),
scores: vec![],
});
}
let data: serde_json::Value = response.json().context("Failed to parse SWE-bench JSON")?;
cache.set("swebench", &data)?;
Ok(SourceResult {
source: self.name().into(),
fetched_at: Some(Utc::now()),
status: SourceStatus::Ok,
scores: parse_scores(&data),
})
}
}
fn parse_scores(data: &serde_json::Value) -> Vec<ModelScore> {
let mut scores = Vec::new();
let entries = data
.get("leaderboards")
.or_else(|| data.get("results"))
.and_then(|v| v.as_array())
.or_else(|| data.as_array());
let Some(entries) = entries else {
return scores;
};
for entry in entries {
if let Some(results) = entry.get("results").and_then(|v| v.as_array()) {
for result in results {
if let Some(score) = extract_model_score(result) {
scores.push(score);
}
}
} else {
if let Some(score) = extract_model_score(entry) {
scores.push(score);
}
}
}
scores.sort_by(|a, b| {
let a_rate = get_float(&a.metrics, "resolved_rate");
let b_rate = get_float(&b.metrics, "resolved_rate");
b_rate
.partial_cmp(&a_rate)
.unwrap_or(std::cmp::Ordering::Equal)
});
for (i, score) in scores.iter_mut().enumerate() {
score.rank = Some((i + 1) as u32);
}
scores
}
fn extract_model_score(result: &serde_json::Value) -> Option<ModelScore> {
let name = result.get("name").and_then(|v| v.as_str())?;
let mut metrics = HashMap::new();
if let Some(rate) = result.get("resolved").and_then(|v| v.as_f64()) {
metrics.insert("resolved_rate".into(), MetricValue::Float(rate));
}
if let Some(count) = result.get("resolved_count").and_then(|v| v.as_i64()) {
metrics.insert("resolved_count".into(), MetricValue::Int(count));
}
if let Some(date) = result.get("date").and_then(|v| v.as_str()) {
metrics.insert("date".into(), MetricValue::Text(date.to_string()));
}
Some(ModelScore {
model: name.to_lowercase().replace([' ', '_'], "-"),
source_model_name: name.to_string(),
metrics,
rank: None,
})
}
fn get_float(metrics: &HashMap<String, MetricValue>, key: &str) -> f64 {
match metrics.get(key) {
Some(MetricValue::Float(f)) => *f,
_ => 0.0,
}
}