use crate::zip_project::ZipProjectNetworkAccess;
use agentics_domain::models::challenge::{
ChallengeBundleSpec, ChallengeExecutionSpec, ChallengeSetupSpec,
ChallengeSolutionPublicationPolicy, DockerPlatform, HardwareProfileSpec, MetricDirection,
MetricVisibility, TargetAccelerator,
};
use super::{
validate_challenge_bundle, validate_challenge_bundle_spec, validate_digest_pinned_images,
};
mod fixtures;
use fixtures::*;
#[test]
fn legacy_rounds_field_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["rounds"] = serde_json::json!([
{
"id": "main",
"title": "Main",
"eligibility": { "type": "open" },
"visibility": {
"leaderboard": "public_live",
"score_distribution": "public_live",
"result_detail": "submitter_live_public_after_close"
},
"solution_publication": "public"
}
]);
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy rounds should be an unknown field");
assert!(error.to_string().contains("rounds"));
}
#[test]
fn legacy_community_field_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["community"] = serde_json::json!({
"moltbook_submolt_name": "agentics-sample-sum",
"moltbook_submolt_url": "https://www.moltbook.com/submolts/agentics-sample-sum"
});
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy community metadata should be an unknown field");
assert!(error.to_string().contains("community"));
}
#[test]
fn challenge_authored_moltbook_field_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["moltbook"] = serde_json::json!({
"discussion_url": "https://www.moltbook.com/post/sample-sum"
});
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("Moltbook metadata should be an unknown field");
assert!(error.to_string().contains("moltbook"));
}
#[test]
fn legacy_top_level_scorer_field_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["scorer"] = serde_json::json!({
"command": ["python", "scorer/run.py"],
"result_file": "result.json"
});
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy scorer field should be unknown");
assert!(error.to_string().contains("scorer"));
}
#[test]
fn evaluator_unknown_fields_are_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["execution"]["separated_evaluator"]["extra"] = serde_json::json!("ignored");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("unknown evaluator field should fail");
assert!(error.to_string().contains("extra"));
}
#[test]
fn execution_mode_is_required() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["execution"]
.as_object_mut()
.expect("execution should be an object")
.remove("mode");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("missing execution mode should fail");
assert!(error.to_string().contains("mode"));
}
#[test]
fn unknown_execution_modes_are_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["execution"]["mode"] = serde_json::json!("firecracker_benchmark");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("unknown execution mode should fail");
assert!(error.to_string().contains("firecracker_benchmark"));
}
#[test]
fn evaluator_script_must_match_execution_runtime_name() {
let mut spec = base_spec();
let execution = separated_evaluator_mut(&mut spec);
execution.separated_evaluator.command =
vec!["python".to_string(), "evaluator/run.py".to_string()];
let error =
validate_challenge_bundle_spec(&spec).expect_err("foreign evaluator dir should fail");
assert!(error.to_string().contains("separated-evaluator"));
}
#[test]
fn evaluator_setup_script_must_match_execution_runtime_name() {
let mut spec = base_spec();
let execution = separated_evaluator_mut(&mut spec);
execution.validation_runs = None;
execution.validation_setup = Some(ChallengeSetupSpec {
command: vec!["python".to_string(), "setup/run.py".to_string()],
result_runs_file: bundle_path("public/runs.json"),
reproducibility_notes: None,
});
let error = validate_challenge_bundle_spec(&spec).expect_err("foreign setup dir should fail");
assert!(error.to_string().contains("separated-evaluator"));
}
#[test]
fn targets_are_required() {
let mut spec = base_spec();
spec.targets.clear();
let error = validate_challenge_bundle_spec(&spec).expect_err("empty targets should fail");
assert!(error.to_string().contains("targets"));
}
#[test]
fn keywords_are_required() {
let mut spec = base_spec();
spec.keywords.clear();
let error = validate_challenge_bundle_spec(&spec).expect_err("empty keywords should fail");
assert!(error.to_string().contains("keywords must contain between"));
}
#[test]
fn legacy_string_image_field_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["targets"][0]["resource_profile"]["solution_image"] =
serde_json::json!("agentics-linux-arm64-cpu:ubuntu26.04-local");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy image string should fail");
assert!(
error.to_string().contains("invalid type") || error.to_string().contains("source"),
"unexpected error: {error}"
);
}
#[test]
fn legacy_image_digest_field_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["targets"][0]["resource_profile"]["solution_image_digest"] =
serde_json::json!(test_digest());
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy digest field should fail");
assert!(error.to_string().contains("solution_image_digest"));
}
#[test]
fn legacy_scorer_resource_profile_fields_are_rejected() {
for field in ["scorer_image", "scorer_network_access"] {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["targets"][0]["resource_profile"][field] = if field == "scorer_image" {
serde_json::json!({
"source": "local",
"reference": "agentics-linux-arm64-cpu:ubuntu26.04-local"
})
} else {
serde_json::json!("disabled")
};
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy scorer resource field should fail");
assert!(error.to_string().contains(field));
}
}
#[test]
fn legacy_flat_resource_profile_fields_are_rejected() {
for field in [
"timeout_sec",
"memory_limit_mb",
"cpu_limit_millis",
"disk_limit_mb",
"setup_network_access",
"build_network_access",
"run_network_access",
"evaluator_network_access",
] {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["targets"][0]["resource_profile"][field] = match field {
"setup_network_access"
| "build_network_access"
| "run_network_access"
| "evaluator_network_access" => serde_json::json!("disabled"),
_ => serde_json::json!(30),
};
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy flat resource profile field should fail");
assert!(error.to_string().contains(field));
}
}
#[test]
fn missing_stage_profile_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["targets"][0]["resource_profile"]["solution"]
.as_object_mut()
.expect("solution profile should be an object")
.remove("build");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("missing stage profile should fail");
assert!(error.to_string().contains("build"));
}
#[test]
fn zero_stage_resource_limit_is_rejected() {
let mut spec = base_spec();
spec.targets[0]
.resource_profile
.solution
.run
.as_mut()
.expect("base spec declares solution run")
.disk_limit_mb = 0;
let error =
validate_challenge_bundle_spec(&spec).expect_err("zero stage resource limit should fail");
assert!(
error
.to_string()
.contains("targets[0].resource_profile.solution.run.disk_limit_mb")
);
}
#[test]
fn starts_at_is_required() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json
.as_object_mut()
.expect("spec should be an object")
.remove("starts_at");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("missing starts_at should fail");
assert!(error.to_string().contains("starts_at"));
}
#[test]
fn starts_at_must_be_rfc3339() {
let mut spec = base_spec();
spec.starts_at = "not-a-time".to_string();
let error = validate_challenge_bundle_spec(&spec).expect_err("invalid starts_at should fail");
assert!(error.to_string().contains("starts_at"));
}
#[test]
fn accelerator_requires_explicit_null_for_no_accelerator() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["targets"][0]
.as_object_mut()
.expect("target should be an object")
.remove("accelerator");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("missing accelerator should fail");
assert!(error.to_string().contains("accelerator"));
}
#[test]
fn legacy_cpu_accelerator_string_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["targets"][0]["accelerator"] = serde_json::json!("cpu");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy cpu accelerator should fail");
assert!(error.to_string().contains("cpu"));
}
#[test]
fn legacy_hardware_field_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["targets"][0]["resource_profile"]["hardware"] = serde_json::json!({
"kind": "cpu"
});
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("legacy hardware field should fail");
assert!(error.to_string().contains("hardware"));
}
#[test]
fn removed_setup_metadata_fields_are_rejected() {
for field in ["external_data", "cache_key_hint"] {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["execution"]["official_evaluation_setup"] = serde_json::json!({
"command": ["python", "separated-evaluator/setup.py"],
"result_runs_file": "generated/runs.json"
});
spec_json["execution"]["official_evaluation_setup"][field] = if field == "external_data" {
serde_json::json!([])
} else {
serde_json::json!("dataset-v1")
};
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("removed setup metadata field should fail");
assert!(error.to_string().contains(field));
}
}
#[test]
fn removed_setup_network_access_field_is_rejected() {
let mut spec_json = serde_json::to_value(base_spec()).expect("spec should serialize");
spec_json["execution"]["official_evaluation_setup"] = serde_json::json!({
"command": ["python", "separated-evaluator/setup.py"],
"result_runs_file": "generated/runs.json",
"network_access": "enabled"
});
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("setup network access should be stage-owned");
assert!(error.to_string().contains("network_access"));
}
#[test]
fn target_name_must_use_mvp_allowlist() {
let mut spec = base_spec();
spec.targets[0].name = target_name("main");
let error =
validate_challenge_bundle_spec(&spec).expect_err("unsupported target names should fail");
assert!(error.to_string().contains("not supported for MVP"));
}
#[test]
fn amd64_targets_are_reserved_for_post_mvp() {
let mut spec = base_spec();
spec.targets[0].name = target_name("linux-amd64-cpu");
spec.targets[0].docker_platform = DockerPlatform::LinuxAmd64;
let error = validate_challenge_bundle_spec(&spec)
.expect_err("amd64 targets should be reserved for post-MVP");
assert!(error.to_string().contains("post-MVP"));
}
#[test]
fn public_after_close_solution_publication_requires_close_time() {
let mut spec = base_spec();
spec.solution_publication = ChallengeSolutionPublicationPolicy::PublicAfterClose;
let error = validate_challenge_bundle_spec(&spec)
.expect_err("public-after-close artifacts need a close time");
assert!(error.to_string().contains("solution_publication"));
spec.closes_at = Some("2999-01-02T00:00:00Z".to_string());
validate_challenge_bundle_spec(&spec).expect("close time should satisfy policy");
}
#[test]
fn cuda_target_requires_cuda_hardware_metadata() {
let mut spec = base_spec();
let target = &mut spec.targets[0];
target.name = target_name("linux-arm64-cuda");
target.accelerator = TargetAccelerator::Gpu;
let error =
validate_challenge_bundle_spec(&spec).expect_err("missing cuda hardware should fail");
assert!(error.to_string().contains("hardware_metadata.kind"));
spec.targets[0].resource_profile.hardware_metadata = Some(cuda_hardware());
let image = "agentics-linux-arm64-cuda:cu130-ubuntu24.04-local";
spec.targets[0].resource_profile.solution_image = local_image(image);
spec.targets[0].resource_profile.evaluator_image = local_image(image);
validate_challenge_bundle_spec(&spec).expect("cuda target should validate");
}
#[test]
fn cpu_target_rejects_unsupported_image_repository() {
let mut spec = base_spec();
spec.targets[0].resource_profile.solution_image =
registry_image("ghcr.io/example/not-agentics-linux-arm64-cpu:ubuntu26.04-v0.1.0");
let error = validate_challenge_bundle_spec(&spec)
.expect_err("unsupported image repository should fail");
assert!(
error
.to_string()
.contains("supported Agentics image repository")
);
}
#[test]
fn cpu_target_rejects_unsupported_image_tag() {
let mut spec = base_spec();
let image = "agentics-linux-arm64-cpu:bookworm";
spec.targets[0].resource_profile.solution_image = local_image(image);
spec.targets[0].resource_profile.evaluator_image = local_image(image);
let error =
validate_challenge_bundle_spec(&spec).expect_err("unsupported image tag should fail");
assert!(error.to_string().contains("tag must start with"));
}
#[test]
fn cuda_target_accepts_matching_supported_image() {
let mut spec = base_spec();
use_cuda_target(&mut spec.targets[0], "cu130");
validate_challenge_bundle_spec(&spec).expect("matching cuda image should validate");
}
#[test]
fn cuda_target_rejects_mismatched_image_variant() {
let mut spec = base_spec();
use_cuda_target(&mut spec.targets[0], "cu132");
let error = validate_challenge_bundle_spec(&spec)
.expect_err("mismatched cuda image variant should fail");
assert!(error.to_string().contains("tag must start with `cu130-`"));
}
#[test]
fn cuda_target_rejects_unsupported_cuda_variant() {
let mut spec = base_spec();
let target = &mut spec.targets[0];
target.name = target_name("linux-arm64-cuda");
target.accelerator = TargetAccelerator::Gpu;
target.resource_profile.hardware_metadata = Some(HardwareProfileSpec {
cuda_variant: Some("cu129".to_string()),
cuda_version: Some("12.9".to_string()),
..cuda_hardware()
});
let error =
validate_challenge_bundle_spec(&spec).expect_err("unsupported cuda variant should fail");
assert!(error.to_string().contains("supported variants"));
}
#[test]
fn cuda_target_rejects_mismatched_cuda_version() {
let mut spec = base_spec();
let target = &mut spec.targets[0];
target.name = target_name("linux-arm64-cuda");
target.accelerator = TargetAccelerator::Gpu;
target.resource_profile.hardware_metadata = Some(HardwareProfileSpec {
cuda_variant: Some("cu132".to_string()),
cuda_version: Some("13.0".to_string()),
..cuda_hardware()
});
let error =
validate_challenge_bundle_spec(&spec).expect_err("mismatched cuda version should fail");
assert!(error.to_string().contains("cuda_version"));
}
#[test]
fn digest_pinned_image_policy_rejects_tag_only_images() {
let spec = base_spec();
let error =
validate_digest_pinned_images(&spec).expect_err("tag-only images should fail policy");
assert!(error.to_string().contains("@sha256:<digest>"));
}
#[test]
fn digest_pinned_image_policy_accepts_immutable_references() {
let mut spec = base_spec();
pin_images(&mut spec);
validate_challenge_bundle_spec(&spec).expect("pinned spec should validate");
validate_digest_pinned_images(&spec).expect("pinned images should satisfy policy");
}
#[test]
fn digest_pinned_image_policy_rejects_local_images() {
let mut spec = base_spec();
spec.targets[0].resource_profile.solution_image =
local_image("agentics-linux-arm64-cpu:ubuntu26.04-local");
let error =
validate_digest_pinned_images(&spec).expect_err("local image should fail hosted policy");
assert!(error.to_string().contains("registry image"));
}
#[test]
fn localized_summary_is_required() {
let mut spec = base_spec();
spec.summary.en.clear();
let error = validate_challenge_bundle_spec(&spec).expect_err("empty summary should fail");
assert!(error.to_string().contains("summary.en"));
}
#[test]
fn disabled_private_benchmark_may_still_declare_directory() {
let mut spec = base_spec();
spec.datasets.private_benchmark_enabled = false;
spec.datasets.private_benchmark_dir = Some(bundle_path("private-benchmark"));
assert!(validate_challenge_bundle_spec(&spec).is_ok());
}
#[test]
fn enabled_private_benchmark_requires_directory() {
let mut spec = base_spec();
spec.datasets.private_benchmark_enabled = true;
spec.datasets.private_benchmark_dir = None;
assert!(validate_challenge_bundle_spec(&spec).is_err());
}
#[test]
fn official_log_contract_marks_private_benchmark_enabled_as_sensitive() {
let spec = base_spec();
assert!(spec.official_evaluation_may_expose_private_material());
}
#[test]
fn official_log_contract_marks_official_setup_as_sensitive() {
let mut spec = base_spec();
spec.datasets.private_benchmark_enabled = false;
let execution = separated_evaluator_mut(&mut spec);
execution.official_runs = None;
execution.official_evaluation_setup = Some(setup_spec());
assert!(spec.official_evaluation_may_expose_private_material());
}
#[test]
fn official_log_contract_marks_public_static_official_runs_as_diagnostic_safe() {
let mut spec = base_spec();
spec.datasets.private_benchmark_enabled = false;
separated_evaluator_mut(&mut spec).official_runs = Some(bundle_path("public/runs.json"));
assert!(!spec.official_evaluation_may_expose_private_material());
}
#[test]
fn official_log_contract_marks_non_public_static_official_runs_as_sensitive() {
let mut spec = base_spec();
spec.datasets.private_benchmark_enabled = false;
assert!(spec.official_evaluation_may_expose_private_material());
}
#[test]
fn validation_run_manifest_required_only_when_target_enables_validation() {
let mut spec = base_spec();
separated_evaluator_mut(&mut spec).validation_runs = None;
spec.targets[0].validation_enabled = false;
assert!(validate_challenge_bundle_spec(&spec).is_ok());
spec.targets[0].validation_enabled = true;
let error = validate_challenge_bundle_spec(&spec)
.expect_err("target validation should require run manifest");
assert!(error.to_string().contains("execution.validation_runs"));
}
#[test]
fn validation_enabled_target_requires_validation_submission_limit() {
let mut spec = base_spec();
spec.validation_submission_limit = None;
spec.targets[0].validation_enabled = false;
assert!(validate_challenge_bundle_spec(&spec).is_ok());
spec.targets[0].validation_enabled = true;
let error = validate_challenge_bundle_spec(&spec)
.expect_err("target validation should require validation submission limit");
assert!(error.to_string().contains("validation_submission_limit"));
}
#[test]
fn validation_setup_satisfies_validation_enabled_target() {
let mut spec = base_spec();
let execution = separated_evaluator_mut(&mut spec);
execution.validation_runs = None;
execution.validation_setup = Some(setup_spec());
assert!(validate_challenge_bundle_spec(&spec).is_ok());
}
#[test]
fn official_evaluation_setup_satisfies_private_benchmark_execution() {
let mut spec = base_spec();
let execution = separated_evaluator_mut(&mut spec);
execution.official_runs = None;
execution.official_evaluation_setup = Some(setup_spec());
assert!(validate_challenge_bundle_spec(&spec).is_ok());
}
#[test]
fn official_evaluation_setup_may_omit_private_benchmark_directory() {
let mut spec = base_spec();
let execution = separated_evaluator_mut(&mut spec);
execution.official_runs = None;
execution.official_evaluation_setup = Some(setup_spec());
spec.datasets.private_benchmark_dir = None;
assert!(validate_challenge_bundle_spec(&spec).is_ok());
}
#[test]
fn piped_stdio_static_sessions_are_valid_and_projected_publicly() {
let spec = base_piped_stdio_spec();
validate_challenge_bundle_spec(&spec).expect("piped stdio spec should validate");
let public = agentics_domain::models::challenge::PublicChallengeBundleSpec::from(spec);
let execution_json =
serde_json::to_value(public.execution).expect("public execution serializes");
assert_eq!(execution_json["mode"], serde_json::json!("piped_stdio"));
assert_eq!(
execution_json["acknowledge_stdio_protocol_framing"],
serde_json::json!(true)
);
assert_eq!(
execution_json["validation_session"],
serde_json::json!("public/session.json")
);
assert!(execution_json.get("official_session").is_none());
assert!(execution_json.get("official_evaluation_setup").is_none());
}
#[test]
fn piped_stdio_requires_stdio_protocol_framing_acknowledgement() {
let mut spec = base_piped_stdio_spec();
if let ChallengeExecutionSpec::PipedStdio(execution) = &mut spec.execution {
execution.acknowledge_stdio_protocol_framing = false;
}
let error =
validate_challenge_bundle_spec(&spec).expect_err("missing stdio framing ack should fail");
assert!(
error
.to_string()
.contains("acknowledge_stdio_protocol_framing")
);
assert!(error.to_string().contains("stdin/stdout message protocol"));
}
#[test]
fn piped_stdio_rejects_missing_stdio_protocol_framing_acknowledgement() {
let mut spec_json = serde_json::to_value(base_piped_stdio_spec()).expect("spec serializes");
spec_json["execution"]
.as_object_mut()
.expect("execution must be an object")
.remove("acknowledge_stdio_protocol_framing");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("piped stdio should reject missing stdio framing acknowledgement");
assert!(
error
.to_string()
.contains("acknowledge_stdio_protocol_framing")
);
}
#[test]
fn piped_stdio_rejects_run_manifest_fields() {
let mut spec_json = serde_json::to_value(base_piped_stdio_spec()).expect("spec serializes");
spec_json["execution"]["validation_runs"] = serde_json::json!("public/runs.json");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("piped stdio should reject run manifest fields");
assert!(error.to_string().contains("validation_runs"));
}
#[test]
fn piped_stdio_static_and_setup_sessions_are_mutually_exclusive() {
let mut spec = base_piped_stdio_spec();
if let ChallengeExecutionSpec::PipedStdio(execution) = &mut spec.execution {
execution.validation_setup = Some(piped_setup_spec());
}
let error = validate_challenge_bundle_spec(&spec)
.expect_err("validation session and setup should conflict");
assert!(error.to_string().contains("validation_session"));
}
#[test]
fn piped_stdio_validation_requires_session_source() {
let mut spec = base_piped_stdio_spec();
if let ChallengeExecutionSpec::PipedStdio(execution) = &mut spec.execution {
execution.validation_session = None;
}
let error =
validate_challenge_bundle_spec(&spec).expect_err("validation should require a session");
assert!(error.to_string().contains("validation_session"));
}
#[test]
fn coexecuted_benchmark_is_valid_and_projected_publicly() {
let spec = base_coexecuted_benchmark_spec();
validate_challenge_bundle_spec(&spec).expect("coexecuted-evaluator spec should validate");
let public = agentics_domain::models::challenge::PublicChallengeBundleSpec::from(spec);
let execution_json =
serde_json::to_value(public.execution).expect("public execution serializes");
assert_eq!(
execution_json["mode"],
serde_json::json!("coexecuted_benchmark")
);
assert_eq!(
execution_json["acknowledge_danger"],
serde_json::json!(true)
);
assert!(execution_json.get("coexecuted_evaluator").is_some());
assert!(execution_json.get("validation_setup").is_some());
assert!(execution_json.get("official_evaluation_setup").is_none());
}
#[test]
fn coexecuted_benchmark_requires_danger_acknowledgement() {
let mut spec = base_coexecuted_benchmark_spec();
coexecuted_benchmark_mut(&mut spec).acknowledge_danger = false;
let error = validate_challenge_bundle_spec(&spec).expect_err("missing danger ack should fail");
assert!(error.to_string().contains("acknowledge_danger"));
}
#[test]
fn coexecuted_benchmark_rejects_solution_run_profile() {
let mut spec = base_coexecuted_benchmark_spec();
spec.targets[0].resource_profile.solution.run = Some(stage_profile(
30,
512,
1000,
1024,
ZipProjectNetworkAccess::Disabled,
));
let error = validate_challenge_bundle_spec(&spec)
.expect_err("coexecuted-evaluator should reject solution run profile");
assert!(error.to_string().contains("solution.run"));
assert!(error.to_string().contains("forbidden"));
}
#[test]
fn solution_run_profile_is_required_for_modes_with_solution_run_container() {
let mut separated = base_spec();
separated.targets[0].resource_profile.solution.run = None;
let separated_error = validate_challenge_bundle_spec(&separated)
.expect_err("separated-evaluator should require solution run profile");
assert!(separated_error.to_string().contains("solution.run"));
let mut piped = base_piped_stdio_spec();
piped.targets[0].resource_profile.solution.run = None;
let piped_error = validate_challenge_bundle_spec(&piped)
.expect_err("piped stdio should require solution run profile");
assert!(piped_error.to_string().contains("solution.run"));
}
#[test]
fn coexecuted_benchmark_rejects_run_and_session_locators() {
let mut spec_json =
serde_json::to_value(base_coexecuted_benchmark_spec()).expect("spec serializes");
spec_json["execution"]["validation_runs"] = serde_json::json!("public/runs.json");
spec_json["execution"]["validation_session"] = serde_json::json!("public/session.json");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("coexecuted-evaluator should reject foreign locator fields");
let message = error.to_string();
assert!(message.contains("validation_runs") || message.contains("validation_session"));
}
#[test]
fn coexecuted_benchmark_setup_rejects_result_file_locators() {
let mut spec_json =
serde_json::to_value(base_coexecuted_benchmark_spec()).expect("spec serializes");
spec_json["execution"]["validation_setup"]["result_runs_file"] =
serde_json::json!("generated/runs.json");
let error = serde_json::from_value::<ChallengeBundleSpec>(spec_json)
.expect_err("coexecuted-evaluator setup should reject result-file locators");
assert!(error.to_string().contains("result_runs_file"));
}
#[test]
fn setup_and_static_runs_are_mutually_exclusive_per_mode() {
let mut spec = base_spec();
separated_evaluator_mut(&mut spec).official_evaluation_setup = Some(setup_spec());
let error = validate_challenge_bundle_spec(&spec)
.expect_err("official setup and official runs should conflict");
assert!(error.to_string().contains("official_runs"));
}
#[test]
fn metric_schema_rejects_unknown_primary_metric() {
let mut spec = base_spec();
spec.metric_schema.ranking.primary_metric_name = metric_name("missing");
assert!(validate_challenge_bundle_spec(&spec).is_err());
}
#[test]
fn metric_schema_rejects_duplicate_metric_names() {
let mut spec = base_spec();
let mut duplicate = spec.metric_schema.metrics[0].clone();
duplicate.label = "Duplicate Score".to_string();
spec.metric_schema.metrics.push(duplicate);
assert!(validate_challenge_bundle_spec(&spec).is_err());
}
#[test]
fn metric_schema_accepts_tie_breaker_metadata() {
let mut spec = base_spec();
spec.metric_schema
.metrics
.push(agentics_domain::models::challenge::MetricDefinitionSpec {
name: metric_name("runtime_ms"),
label: "Runtime".to_string(),
unit: Some("ms".to_string()),
direction: MetricDirection::Minimize,
visibility: MetricVisibility::Public,
metric_description: Some("Wall-clock runtime in milliseconds.".to_string()),
});
spec.metric_schema
.ranking
.tie_breaker_metric_names
.push(metric_name("runtime_ms"));
assert!(validate_challenge_bundle_spec(&spec).is_ok());
}
mod bundle_files;