use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Issue {
pub instance_id: String,
pub repo: String,
pub base_commit: String,
pub problem_statement: String,
pub test_patch: String,
pub gold_patch: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Outcome {
pub instance_id: String,
pub passed: bool,
pub wall_clock_ms: u64,
pub tool_calls: u32,
pub error: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Fixture {
pub issues: Vec<Issue>,
}
impl Fixture {
pub fn parse_jsonl(raw: &str) -> Result<Self, String> {
let mut issues = Vec::new();
for (i, line) in raw.lines().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let issue: Issue =
serde_json::from_str(trimmed).map_err(|e| format!("line {}: {e}", i + 1))?;
issues.push(issue);
}
Ok(Self { issues })
}
#[must_use]
pub fn take(&self, n: usize) -> Self {
Self {
issues: self.issues.iter().take(n).cloned().collect(),
}
}
#[must_use]
pub fn summarize(outcomes: &[Outcome]) -> String {
let total = outcomes.len();
if total == 0 {
return "no outcomes".to_string();
}
let passed = outcomes.iter().filter(|o| o.passed).count();
let pct = (passed * 100) / total;
let mut wall_clock_ms: Vec<u64> = outcomes.iter().map(|o| o.wall_clock_ms).collect();
wall_clock_ms.sort_unstable();
let median_ms = wall_clock_ms[wall_clock_ms.len() / 2];
format!("passed={passed}/{total} ({pct}%) median_ms={median_ms}")
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_issue(id: &str) -> Issue {
Issue {
instance_id: id.to_string(),
repo: "example/repo".to_string(),
base_commit: "deadbeef".to_string(),
problem_statement: "fix the thing".to_string(),
test_patch: "diff --git a/x b/x\n".to_string(),
gold_patch: "diff --git a/x b/x\n".to_string(),
}
}
fn outcome(id: &str, passed: bool, ms: u64) -> Outcome {
Outcome {
instance_id: id.to_string(),
passed,
wall_clock_ms: ms,
tool_calls: 5,
error: None,
}
}
#[test]
fn parse_jsonl_reads_multiple_issues() {
let issue1 = serde_json::to_string(&sample_issue("a-1")).unwrap();
let issue2 = serde_json::to_string(&sample_issue("b-2")).unwrap();
let raw = format!("{issue1}\n{issue2}\n");
let fx = Fixture::parse_jsonl(&raw).unwrap();
assert_eq!(fx.issues.len(), 2);
assert_eq!(fx.issues[0].instance_id, "a-1");
assert_eq!(fx.issues[1].instance_id, "b-2");
}
#[test]
fn parse_jsonl_skips_blank_lines() {
let issue1 = serde_json::to_string(&sample_issue("x")).unwrap();
let raw = format!("\n\n{issue1}\n\n");
let fx = Fixture::parse_jsonl(&raw).unwrap();
assert_eq!(fx.issues.len(), 1);
}
#[test]
fn parse_jsonl_errors_on_bad_line_with_index() {
let issue1 = serde_json::to_string(&sample_issue("x")).unwrap();
let raw = format!("{issue1}\nnot json\n");
let err = Fixture::parse_jsonl(&raw).unwrap_err();
assert!(err.contains("line 2"), "got: {err}");
}
#[test]
fn take_returns_first_n_issues() {
let mut fx = Fixture { issues: Vec::new() };
for i in 0..10 {
fx.issues.push(sample_issue(&format!("id-{i}")));
}
let s = fx.take(3);
assert_eq!(s.issues.len(), 3);
assert_eq!(s.issues[0].instance_id, "id-0");
assert_eq!(s.issues[2].instance_id, "id-2");
}
#[test]
fn summarize_reports_pass_rate_and_median() {
let outcomes = vec![
outcome("a", true, 1000),
outcome("b", true, 2000),
outcome("c", false, 3000),
outcome("d", true, 4000),
];
let s = Fixture::summarize(&outcomes);
assert!(s.contains("3/4"));
assert!(s.contains("75%"));
assert!(s.contains("3000")); }
#[test]
fn summarize_handles_empty_vec() {
assert_eq!(Fixture::summarize(&[]), "no outcomes");
}
#[test]
fn outcome_round_trips_through_json() {
let o = outcome("astropy-7166", true, 12_500);
let json = serde_json::to_string(&o).unwrap();
let back: Outcome = serde_json::from_str(&json).unwrap();
assert_eq!(o, back);
}
}