use std::ffi::OsString;
use std::path::{Path, PathBuf};
use clap::{Args, Parser, Subcommand, ValueEnum};
use skilltest_core::{
discover_cases, validate_path, CommandProvider, Config, Error, ExitCode, OneharnessProvider,
Overrides, Provider, ProviderConfig, Report, Result, Runner, TestCase, ValidationReport,
};
#[derive(Parser)]
#[command(name = "skilltest", version, about, long_about = None)]
struct Cli {
#[arg(long, global = true, value_name = "FILE")]
config: Option<PathBuf>,
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand)]
enum Command {
Run(RunArgs),
Validate(ValidateArgs),
Init(InitArgs),
Schema(SchemaArgs),
}
#[derive(Args)]
struct RunArgs {
#[arg(value_name = "PATH", required = true)]
paths: Vec<PathBuf>,
#[arg(short = 'p', long = "platform", value_name = "PLATFORM")]
platforms: Vec<String>,
#[arg(short = 'm', long = "model", value_name = "MODEL")]
models: Vec<String>,
#[arg(long, value_name = "CMD")]
provider: Option<String>,
#[arg(long, value_name = "PATH")]
oneharness_bin: Option<String>,
#[arg(long, value_name = "ID")]
judge_harness: Option<String>,
#[arg(long, value_name = "SECS")]
timeout: Option<u64>,
#[arg(long, value_name = "MODEL")]
judge_model: Option<String>,
#[arg(long, value_name = "N")]
max_turns: Option<u32>,
#[arg(long, value_enum, default_value_t = Format::Human)]
format: Format,
}
#[derive(Args)]
struct ValidateArgs {
#[arg(value_name = "PATH", required = true)]
paths: Vec<PathBuf>,
#[arg(long, value_enum, default_value_t = Format::Human)]
format: Format,
}
#[derive(Args)]
struct InitArgs {
#[arg(value_name = "DIR", default_value = ".")]
dir: PathBuf,
}
#[derive(Args)]
struct SchemaArgs {
#[arg(value_enum, value_name = "TARGET")]
target: SchemaTarget,
}
#[derive(Clone, Copy, ValueEnum)]
enum SchemaTarget {
Report,
Validation,
}
#[derive(Clone, Copy, ValueEnum)]
enum Format {
Human,
Json,
}
pub fn run<I, T>(args: I) -> ExitCode
where
I: IntoIterator<Item = T>,
T: Into<OsString> + Clone,
{
let cli = match Cli::try_parse_from(args) {
Ok(cli) => cli,
Err(err) => {
let _ = err.print();
return if matches!(
err.kind(),
clap::error::ErrorKind::DisplayHelp
| clap::error::ErrorKind::DisplayVersion
| clap::error::ErrorKind::DisplayHelpOnMissingArgumentOrSubcommand
) {
ExitCode::Success
} else {
ExitCode::UsageError
};
}
};
let result = match &cli.command {
Command::Run(args) => cmd_run(cli.config.as_deref(), args),
Command::Validate(args) => cmd_validate(args),
Command::Init(args) => cmd_init(args),
Command::Schema(args) => cmd_schema(args),
};
match result {
Ok(code) => code,
Err(err) => report_error(&err),
}
}
fn cmd_run(config_path: Option<&Path>, args: &RunArgs) -> Result<ExitCode> {
let mut config = match config_path {
Some(path) => Config::load(path)?,
None => Config::load_or_default(Path::new("skilltest.yaml"))?,
};
let command_provider = args
.provider
.as_ref()
.map(|s| s.split_whitespace().map(String::from).collect::<Vec<_>>());
config.apply_overrides(Overrides {
command_provider,
oneharness_bin: args.oneharness_bin.clone(),
judge_harness: args.judge_harness.clone(),
timeout_secs: args.timeout,
platforms: args.platforms.clone(),
models: args.models.clone(),
judge_model: args.judge_model.clone(),
max_turns: args.max_turns,
})?;
let provider = build_provider(&config.provider)?;
let mut cases = Vec::new();
for path in &args.paths {
for file in discover_cases(path)? {
cases.push(TestCase::load(&file)?);
}
}
let runner = Runner::new(provider.as_ref(), &config);
let report = runner.run_all(&cases)?;
print_report(&report, args.format)?;
Ok(if report.passed {
ExitCode::Success
} else {
ExitCode::TestFailure
})
}
fn build_provider(config: &ProviderConfig) -> Result<Box<dyn Provider>> {
match config {
ProviderConfig::Oneharness(oh) => Ok(Box::new(OneharnessProvider::new(oh))),
ProviderConfig::Command(c) => Ok(Box::new(CommandProvider::new(c.command.clone())?)),
}
}
fn print_report(report: &Report, format: Format) -> Result<()> {
match format {
Format::Json => {
let json = report
.to_json()
.map_err(|e| Error::Invalid(format!("could not serialize report: {e}")))?;
println!("{json}");
}
Format::Human => print!("{}", report.to_human()),
}
Ok(())
}
fn cmd_validate(args: &ValidateArgs) -> Result<ExitCode> {
let mut findings = Vec::new();
for path in &args.paths {
findings.extend(validate_path(path)?);
}
let valid = findings.is_empty();
match args.format {
Format::Json => {
let report = ValidationReport::new(&findings);
let json = report
.to_json()
.map_err(|e| Error::Invalid(format!("could not serialize findings: {e}")))?;
println!("{json}");
}
Format::Human => {
if valid {
println!("OK all skill definitions valid");
} else {
for finding in &findings {
eprintln!("INVALID {}: {}", finding.skill.display(), finding.message);
}
eprintln!("FAIL {} validation finding(s)", findings.len());
}
}
}
Ok(if valid {
ExitCode::Success
} else {
ExitCode::TestFailure
})
}
fn cmd_init(args: &InitArgs) -> Result<ExitCode> {
let created = crate::scaffold::scaffold(&args.dir)?;
for path in &created {
println!("created {}", path.display());
}
println!(
"\nNext: skilltest run cases/example.yaml\n\
Try it offline: skilltest run cases/example.yaml --provider skilltest-fake-provider"
);
Ok(ExitCode::Success)
}
fn cmd_schema(args: &SchemaArgs) -> Result<ExitCode> {
let generator = schemars::generate::SchemaSettings::draft07().into_generator();
let schema = match args.target {
SchemaTarget::Report => generator.into_root_schema_for::<Report>(),
SchemaTarget::Validation => generator.into_root_schema_for::<ValidationReport>(),
};
let json = serde_json::to_string_pretty(&schema)
.map_err(|e| Error::Invalid(format!("could not serialize schema: {e}")))?;
println!("{json}");
Ok(ExitCode::Success)
}
fn report_error(err: &Error) -> ExitCode {
eprintln!("error: {err}");
match err {
Error::Provider { kind, .. } => {
let hint = match kind.as_deref() {
Some("auth") => "hint: authentication failed — check your provider credentials (e.g. `claude` login)",
Some("rate_limit") => "hint: the harness rate-limited the call — retry after a backoff",
Some("model_not_found") => {
"hint: the harness does not recognize this model — check `--model` and `oneharness list`"
}
Some("quota") => "hint: provider quota exhausted — check your account limits",
Some(other) => {
eprintln!("classified as: {other}");
"hint: see provider docs for this failure class"
}
None => {
"hint: ensure the provider command is installed and on PATH, or pass --provider"
}
};
eprintln!("{hint}");
ExitCode::ProviderError
}
_ => ExitCode::UsageError,
}
}