use std::path::PathBuf;
use std::process::{Command, Output};
use serde_json::Value;
fn skilltest() -> PathBuf {
PathBuf::from(env!("CARGO_BIN_EXE_skilltest"))
}
fn live_fixtures() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/fixtures/live")
}
fn oneharness_bin() -> String {
std::env::var("SKILLTEST_ONEHARNESS_BIN").unwrap_or_else(|_| "oneharness".into())
}
fn platform() -> String {
std::env::var("SKILLTEST_LIVE_PLATFORM").unwrap_or_else(|_| "claude-code".into())
}
fn model() -> String {
std::env::var("SKILLTEST_LIVE_MODEL").unwrap_or_else(|_| "haiku".into())
}
fn run_live(case: &str) -> Output {
let m = model();
Command::new(skilltest())
.arg("run")
.arg(live_fixtures().join("cases").join(case))
.args(["--oneharness-bin", &oneharness_bin()])
.args(["--platform", &platform()])
.args(["--model", &m])
.args(["--judge-model", &m])
.args(["--judge-harness", &platform()])
.args(["--timeout", "150"])
.args(["--format", "json"])
.output()
.expect("skilltest run executes")
}
fn report(output: &Output) -> Value {
assert!(
output.status.code() == Some(0) || output.status.code() == Some(1),
"expected a completed run (exit 0/1), got {:?}; stderr: {}",
output.status.code(),
String::from_utf8_lossy(&output.stderr)
);
serde_json::from_slice(&output.stdout).expect("stdout is a JSON report")
}
#[test]
#[ignore = "live: needs oneharness + a real harness; run with --ignored"]
fn live_respond_and_judge_boolean_and_numeric() {
let out = run_live("pong.yaml");
let report = report(&out);
let run = &report["runs"][0];
assert_eq!(
report["passed"],
Value::Bool(true),
"expected pass; report: {report:#}"
);
let assistant: String = run["transcript"]["messages"]
.as_array()
.unwrap()
.iter()
.filter(|m| m["role"] == "assistant")
.map(|m| m["content"].as_str().unwrap_or("").to_lowercase())
.collect();
assert!(assistant.contains("pong"), "assistant said: {assistant}");
assert_eq!(run["evals"][0]["detail"]["kind"], "boolean");
assert_eq!(run["evals"][1]["detail"]["kind"], "numeric");
let usage = &run["usage"];
assert!(usage.is_object(), "expected per-run usage; got {usage}");
assert!(
usage["input_tokens"].as_u64().unwrap_or(0) > 0,
"expected non-zero input_tokens; got {usage}"
);
assert!(
usage["output_tokens"].as_u64().unwrap_or(0) > 0,
"expected non-zero output_tokens; got {usage}"
);
let summary_usage = &report["summary"]["usage"];
assert!(
summary_usage.is_object(),
"expected summary.usage; got {summary_usage}"
);
}
#[test]
#[ignore = "live: needs oneharness + a real harness; run with --ignored"]
fn live_multi_turn_drives_simulated_user() {
let out = run_live("multiturn.yaml");
let report = report(&out);
let run = &report["runs"][0];
assert_eq!(
report["passed"],
Value::Bool(true),
"expected pass; report: {report:#}"
);
assert_eq!(run["turns"], 2, "report: {report:#}");
let roles: Vec<&str> = run["transcript"]["messages"]
.as_array()
.unwrap()
.iter()
.map(|m| m["role"].as_str().unwrap())
.collect();
assert_eq!(roles, ["user", "assistant", "user", "assistant"]);
assert!(
run["usage"].is_object(),
"expected usage; got {}",
run["usage"]
);
}