use akribes_sdk::AkribesError;
use akribes_sdk::models::{
CreateBenchCaseRequest, CreateOrUpdateBenchRequest, PatchBenchCaseRequest, PromoteCaseEdits,
PromoteExecutionRequest, TriggerBenchRunRequest,
};
use akribes_sdk::{AkribesClient, BenchClient, BenchRunsClient};
use mockito::{Matcher, Server};
fn make_client(server: &Server) -> AkribesClient {
AkribesClient::builder(server.url())
.project_id(7)
.name("bench-test")
.id("bench-id")
.build()
}
fn bench(server: &Server) -> BenchClient {
make_client(server).project(7).bench()
}
fn runs(server: &Server) -> BenchRunsClient {
make_client(server).bench_runs()
}
#[tokio::test]
async fn list_project_summaries_maps_to_projects_benches_route() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/benches")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"[{"bench_id":3,"script_id":11,"script_name":"summarise",
"judge_script_id":12,"judge_script_name":"judge_quality",
"judge_channel":"production","case_count":18,
"latest_run_id":42,"latest_run_status":"completed",
"latest_run_channel":"production","latest_run_workflow_version_id":99,
"latest_run_at":"2026-01-02T00:00:00Z","latest_run_mean_score":0.91,
"latest_run_cost_usd":0.42,"updated_at":"2026-01-02T00:00:00Z"}]"#,
)
.create_async()
.await;
let rows = bench(&server).list_project_summaries().await.unwrap();
assert_eq!(rows.len(), 1);
let r = &rows[0];
assert_eq!(r.bench_id, 3);
assert_eq!(r.script_name, "summarise");
assert_eq!(r.judge_script_id, Some(12));
assert_eq!(r.judge_script_name.as_deref(), Some("judge_quality"));
assert_eq!(r.case_count, 18);
assert_eq!(r.latest_run_id, Some(42));
assert_eq!(r.latest_run_status.as_deref(), Some("completed"));
assert_eq!(r.latest_run_mean_score, Some(0.91));
assert_eq!(r.latest_run_cost_usd, Some(0.42));
}
#[tokio::test]
async fn list_project_summaries_tolerates_null_latest_run_block() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/benches")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"[{"bench_id":4,"script_id":13,"script_name":"never_run",
"judge_script_id":null,"judge_script_name":null,
"judge_channel":"draft","case_count":0,
"latest_run_id":null,"latest_run_status":null,
"latest_run_channel":null,"latest_run_workflow_version_id":null,
"latest_run_at":null,"latest_run_mean_score":null,
"latest_run_cost_usd":null,"updated_at":"2026-01-02T00:00:00Z"}]"#,
)
.create_async()
.await;
let rows = bench(&server).list_project_summaries().await.unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0].script_name, "never_run");
assert_eq!(rows[0].case_count, 0);
assert!(rows[0].latest_run_id.is_none());
assert!(rows[0].judge_script_id.is_none());
assert_eq!(rows[0].judge_channel, "draft");
}
#[tokio::test]
async fn list_project_summaries_404_is_empty() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/benches")
.with_status(404)
.create_async()
.await;
assert!(
bench(&server)
.list_project_summaries()
.await
.unwrap()
.is_empty()
);
}
#[tokio::test]
async fn get_bench_hit() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/scripts/summarise/bench")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"id":3,"script_id":11,"judge_script_id":12,"judge_channel":"production",
"config":{"k":"v"},"created_at":"2026-01-01T00:00:00Z",
"updated_at":"2026-01-02T00:00:00Z"}"#,
)
.create_async()
.await;
let got = bench(&server).get("summarise").await.unwrap();
let b = got.expect("bench present");
assert_eq!(b.id, 3);
assert_eq!(b.script_id, 11);
assert_eq!(b.judge_script_id, Some(12));
assert_eq!(b.judge_channel, "production");
assert_eq!(b.config["k"], "v");
}
#[tokio::test]
async fn get_bench_404_is_none() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/scripts/nope/bench")
.with_status(404)
.create_async()
.await;
assert!(bench(&server).get("nope").await.unwrap().is_none());
}
#[tokio::test]
async fn create_or_update_bench_sends_only_set_fields() {
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/projects/7/scripts/summarise/bench")
.match_body(Matcher::Json(serde_json::json!({"judge_script_id": 12})))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"id":3,"script_id":11,"judge_script_id":12,"judge_channel":"draft",
"config":{},"created_at":"2026-01-01T00:00:00Z",
"updated_at":"2026-01-01T00:00:00Z"}"#,
)
.create_async()
.await;
let req = CreateOrUpdateBenchRequest {
judge_script_id: Some(12),
judge_channel: None,
config: None,
};
let b = bench(&server)
.create_or_update("summarise", &req)
.await
.unwrap();
assert_eq!(b.judge_channel, "draft");
}
#[tokio::test]
async fn delete_bench_true_on_200() {
let mut server = Server::new_async().await;
let _m = server
.mock("DELETE", "/projects/7/scripts/summarise/bench")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(r#"{"deleted":true}"#)
.create_async()
.await;
assert!(bench(&server).delete("summarise").await.unwrap());
}
#[tokio::test]
async fn delete_bench_false_on_404() {
let mut server = Server::new_async().await;
let _m = server
.mock("DELETE", "/projects/7/scripts/gone/bench")
.with_status(404)
.create_async()
.await;
assert!(!bench(&server).delete("gone").await.unwrap());
}
#[tokio::test]
async fn get_signature_maps_to_signature_route_not_bench() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/scripts/summarise/signature")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(r#"{"inputs":[{"name":"q","type":"String"}]}"#)
.create_async()
.await;
let sig = bench(&server).get_signature("summarise").await.unwrap();
assert_eq!(sig["inputs"][0]["name"], "q");
}
#[tokio::test]
async fn get_signature_404_yields_empty_object() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/scripts/x/signature")
.with_status(404)
.create_async()
.await;
let sig = bench(&server).get_signature("x").await.unwrap();
assert_eq!(sig, serde_json::json!({}));
}
#[tokio::test]
async fn contract_preview_sends_judge_and_channel_query() {
let mut server = Server::new_async().await;
let _m = server
.mock(
"GET",
"/projects/7/scripts/summarise/bench/contract-preview",
)
.match_query(Matcher::AllOf(vec![
Matcher::UrlEncoded("judge".into(), "12".into()),
Matcher::UrlEncoded("channel".into(), "production".into()),
]))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(r#"{"breaks":[]}"#)
.create_async()
.await;
let prev = bench(&server)
.contract_preview("summarise", 12, Some("production"))
.await
.unwrap();
assert_eq!(prev["breaks"], serde_json::json!([]));
}
#[tokio::test]
async fn contract_preview_omits_channel_when_none() {
let mut server = Server::new_async().await;
let _m = server
.mock(
"GET",
"/projects/7/scripts/summarise/bench/contract-preview",
)
.match_query(Matcher::Exact("judge=12".into()))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(r#"{"breaks":[]}"#)
.create_async()
.await;
bench(&server)
.contract_preview("summarise", 12, None)
.await
.unwrap();
}
#[tokio::test]
async fn list_cases_404_is_empty() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/scripts/summarise/bench/cases")
.with_status(404)
.create_async()
.await;
assert!(
bench(&server)
.list_cases("summarise")
.await
.unwrap()
.is_empty()
);
}
#[tokio::test]
async fn list_cases_deserialises_rows() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/scripts/summarise/bench/cases")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"[{"id":"case_a","project_id":7,"script_name":"summarise","bench_id":3,
"kind":"case","frozen":true,"case_name":"happy",
"inputs":{"q":"hi"},"expected_output":{"a":1},
"created_at":"2026-01-01T00:00:00Z"}]"#,
)
.create_async()
.await;
let cases = bench(&server).list_cases("summarise").await.unwrap();
assert_eq!(cases.len(), 1);
assert_eq!(cases[0].id, "case_a");
assert_eq!(cases[0].case_name.as_deref(), Some("happy"));
assert!(cases[0].frozen);
assert_eq!(cases[0].inputs.as_ref().unwrap()["q"], "hi");
}
#[tokio::test]
async fn create_case_sends_inputs_and_optional_fields() {
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/projects/7/scripts/summarise/bench/cases")
.match_body(Matcher::Json(serde_json::json!({
"inputs": {"q": "hi"},
"expected_output": {"a": 1},
})))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"id":"case_new","project_id":7,"script_name":"summarise","kind":"case",
"frozen":true,"created_at":"2026-01-01T00:00:00Z"}"#,
)
.create_async()
.await;
let req = CreateBenchCaseRequest {
inputs: serde_json::json!({"q": "hi"}),
expected_output: Some(serde_json::json!({"a": 1})),
ground_truth: None,
name: None,
};
let case = bench(&server).create_case("summarise", &req).await.unwrap();
assert_eq!(case.id, "case_new");
}
#[tokio::test]
async fn case_contract_drift_404_yields_empty_report() {
let mut server = Server::new_async().await;
let _m = server
.mock(
"GET",
"/projects/7/scripts/summarise/bench/cases/contract-drift",
)
.with_status(404)
.create_async()
.await;
let report = bench(&server)
.case_contract_drift("summarise")
.await
.unwrap();
assert!(report.drifted.is_empty());
assert_eq!(report.summary, "");
}
#[tokio::test]
async fn case_contract_drift_parses_drifted_rows() {
let mut server = Server::new_async().await;
let _m = server
.mock(
"GET",
"/projects/7/scripts/summarise/bench/cases/contract-drift",
)
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"drifted":[{"case_id":"case_a","label":"happy",
"what_broke":"outputs.score removed"}],
"script_version_id":99,"published_at":"2026-01-01T00:00:00Z",
"published_by":"alice","summary":"1 case drifted"}"#,
)
.create_async()
.await;
let report = bench(&server)
.case_contract_drift("summarise")
.await
.unwrap();
assert_eq!(report.drifted.len(), 1);
assert_eq!(report.drifted[0].case_id, "case_a");
assert_eq!(report.drifted[0].what_broke, "outputs.score removed");
assert_eq!(report.script_version_id, Some(99));
assert_eq!(report.summary, "1 case drifted");
}
#[tokio::test]
async fn list_runs_sends_limit_offset_query() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/scripts/summarise/bench/runs")
.match_query(Matcher::AllOf(vec![
Matcher::UrlEncoded("limit".into(), "5".into()),
Matcher::UrlEncoded("offset".into(), "10".into()),
]))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"[{"id":1,"bench_id":3,"channel":"production","workflow_version_id":99,
"judge_version_id":100,"status":"completed","triggered_at":"2026-01-01T00:00:00Z",
"mean_headline_score":0.91,"ok_cases":7}]"#,
)
.create_async()
.await;
let runs = bench(&server)
.list_runs("summarise", Some(5), Some(10))
.await
.unwrap();
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].id, 1);
assert_eq!(runs[0].mean_headline_score, Some(0.91));
assert_eq!(runs[0].ok_cases, Some(7));
}
#[tokio::test]
async fn list_runs_omits_query_when_no_pagination() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/projects/7/scripts/summarise/bench/runs")
.match_query(Matcher::Missing)
.with_status(200)
.with_header("content-type", "application/json")
.with_body("[]")
.create_async()
.await;
let runs = bench(&server)
.list_runs("summarise", None, None)
.await
.unwrap();
assert!(runs.is_empty());
}
#[tokio::test]
async fn trigger_run_sends_channel_required_and_case_ids() {
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/projects/7/scripts/summarise/bench/runs")
.match_body(Matcher::Json(serde_json::json!({
"channel": "production",
"case_ids": ["case_a", "case_b"],
})))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"id":42,"bench_id":3,"channel":"production","workflow_version_id":99,
"judge_version_id":100,"status":"pending","triggered_at":"2026-01-01T00:00:00Z"}"#,
)
.create_async()
.await;
let req = TriggerBenchRunRequest {
channel: "production".into(),
notes: None,
case_ids: Some(vec!["case_a".into(), "case_b".into()]),
};
let run = bench(&server).trigger_run("summarise", &req).await.unwrap();
assert_eq!(run.id, 42);
assert_eq!(run.status, "pending");
}
#[tokio::test]
async fn run_get_404_is_none() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/bench-runs/42")
.with_status(404)
.create_async()
.await;
assert!(runs(&server).get(42).await.unwrap().is_none());
}
#[tokio::test]
async fn run_delete_succeeds_on_204() {
let mut server = Server::new_async().await;
let _m = server
.mock("DELETE", "/bench-runs/42")
.with_status(204)
.create_async()
.await;
runs(&server).delete(42).await.unwrap();
}
#[tokio::test]
async fn list_results_deserialises() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/bench-runs/42/results")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"[{"id":1,"bench_run_id":42,"case_id":"case_a","headline_score":0.8,
"status":"ok","created_at":"2026-01-01T00:00:00Z"}]"#,
)
.create_async()
.await;
let results = runs(&server).list_results(42).await.unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].case_id, "case_a");
assert_eq!(results[0].headline_score, Some(0.8));
assert_eq!(results[0].status, "ok");
}
#[tokio::test]
async fn cancel_posts_empty_body_and_returns_run() {
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/bench-runs/42/cancel")
.match_body(Matcher::Json(serde_json::json!({})))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"id":42,"bench_id":3,"channel":"production","workflow_version_id":99,
"judge_version_id":100,"status":"canceled","triggered_at":"2026-01-01T00:00:00Z"}"#,
)
.create_async()
.await;
let run = runs(&server).cancel(42).await.unwrap();
assert_eq!(run.status, "canceled");
}
#[tokio::test]
async fn tag_session_patches_with_mcp_session_id() {
let mut server = Server::new_async().await;
let _m = server
.mock("PATCH", "/bench-runs/42/tag-session")
.match_body(Matcher::Json(serde_json::json!({
"mcp_session_id": "sess_abc"
})))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(r#"{"tagged":true,"run_id":42,"mcp_session_id":"sess_abc"}"#)
.create_async()
.await;
let resp = runs(&server).tag_session(42, "sess_abc").await.unwrap();
assert!(resp.tagged);
assert_eq!(resp.run_id, 42);
assert_eq!(resp.mcp_session_id, "sess_abc");
}
#[tokio::test]
async fn promote_execution_posts_edits_overlay() {
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/executions/exec_123/promote-to-case")
.match_body(Matcher::Json(serde_json::json!({
"edits": {"expected_output": {"a": 2}},
"name": "T_1",
})))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"id":"case_p","project_id":7,"script_name":"summarise","kind":"case",
"frozen":true,"case_name":"T_1","created_at":"2026-01-01T00:00:00Z"}"#,
)
.create_async()
.await;
let req = PromoteExecutionRequest {
edits: Some(PromoteCaseEdits {
inputs: None,
expected_output: Some(serde_json::json!({"a": 2})),
ground_truth: None,
}),
name: Some("T_1".into()),
};
let case = runs(&server)
.promote_execution("exec_123", &req)
.await
.unwrap();
assert_eq!(case.id, "case_p");
assert_eq!(case.case_name.as_deref(), Some("T_1"));
}
#[tokio::test]
async fn promote_execution_url_encodes_exec_id() {
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/executions/exec%2F123/promote-to-case")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"id":"c","project_id":7,"script_name":"s","kind":"case","frozen":true,
"created_at":"2026-01-01T00:00:00Z"}"#,
)
.create_async()
.await;
runs(&server)
.promote_execution("exec/123", &PromoteExecutionRequest::default())
.await
.unwrap();
}
#[tokio::test]
async fn compare_parses_aggregate_and_per_case() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/bench-runs/10/compare/20")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"run_a_id":10,"run_b_id":20,
"aggregate":{"mean_score_delta":0.05,"cost_delta_usd":-0.1,
"n_regressed":1,"n_improved":3,"n_unchanged":2},
"per_case":[{"case_id":"case_a","case_label":"happy","score_a":0.8,
"score_b":0.9,"delta":0.1,"flag":"improved"}]}"#,
)
.create_async()
.await;
let report = runs(&server).compare(10, 20).await.unwrap();
assert_eq!(report.run_a_id, 10);
assert_eq!(report.aggregate.n_improved, 3);
assert_eq!(report.per_case.len(), 1);
assert_eq!(report.per_case[0].flag, "improved");
assert_eq!(report.per_case[0].delta, Some(0.1));
}
#[tokio::test]
async fn compare_404_surfaces_http_status() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/bench-runs/10/compare/20")
.with_status(404)
.create_async()
.await;
let err = runs(&server).compare(10, 20).await.unwrap_err();
match err {
AkribesError::HttpStatus { status, .. } => assert_eq!(status, 404),
other => panic!("expected HttpStatus 404, got {other:?}"),
}
}
#[tokio::test]
async fn patch_case_sends_sparse_update() {
let mut server = Server::new_async().await;
let _m = server
.mock("PATCH", "/cases/case_a")
.match_body(Matcher::Json(serde_json::json!({"name": "renamed"})))
.with_status(200)
.with_header("content-type", "application/json")
.with_body(
r#"{"id":"case_a","project_id":7,"script_name":"s","kind":"case","frozen":true,
"case_name":"renamed","created_at":"2026-01-01T00:00:00Z"}"#,
)
.create_async()
.await;
let req = PatchBenchCaseRequest {
name: Some("renamed".into()),
..Default::default()
};
let case = runs(&server).patch_case("case_a", &req).await.unwrap();
assert_eq!(case.case_name.as_deref(), Some("renamed"));
}
#[tokio::test]
async fn delete_case_succeeds() {
let mut server = Server::new_async().await;
let _m = server
.mock("DELETE", "/cases/case_a")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(r#"{"deleted":true}"#)
.create_async()
.await;
runs(&server).delete_case("case_a").await.unwrap();
}
#[tokio::test]
async fn get_case_404_yields_null_value() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/executions/case_a")
.with_status(404)
.create_async()
.await;
let v = runs(&server).get_case("case_a").await.unwrap();
assert_eq!(v, serde_json::Value::Null);
}
#[tokio::test]
async fn bench_by_id_404_is_none() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/benches/5")
.with_status(404)
.create_async()
.await;
assert!(runs(&server).bench_by_id(5).await.unwrap().is_none());
}
#[tokio::test]
async fn bench_by_id_returns_value() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/benches/5")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(r#"{"id":5,"project_id":7,"script_name":"summarise"}"#)
.create_async()
.await;
let v = runs(&server).bench_by_id(5).await.unwrap().unwrap();
assert_eq!(v["script_name"], "summarise");
}
#[tokio::test]
async fn mcp_session_cost_returns_value() {
let mut server = Server::new_async().await;
let _m = server
.mock("GET", "/mcp-sessions/sess_abc/cost")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(r#"{"session_id":"sess_abc","total_cost_usd":1.25,"breakdown":[]}"#)
.create_async()
.await;
let v = runs(&server).mcp_session_cost("sess_abc").await.unwrap();
assert_eq!(v["total_cost_usd"], 1.25);
}