agentics_contracts/
challenge_bundle.rs

1//! Helpers for loading and validating filesystem challenge bundles.
2//!
3//! Challenge bundles are the public contract between seeded/admin-authored
4//! challenges and the runner. Validation keeps contract names explicit and
5//! rejects unknown or stale fields before a bundle can be published.
6
7use std::collections::HashSet;
8use std::path::Path;
9
10use chrono::{DateTime, Utc};
11
12use crate::validation::{archive::ChallengeValidationError, targets, text};
13use crate::zip_project::{ZIP_PROJECT_MANIFEST_FILE, ZIP_PROJECT_PROTOCOL};
14use agentics_domain::models::challenge::{
15    ChallengeBundleSpec, ChallengeExecutionMode, ChallengeRunInputFile, ChallengeRunManifest,
16    ChallengeRunSpec, ChallengeSetupSpec, ChallengeSolutionPublicationPolicy,
17    CoexecutedBenchmarkSetupSpec, EvaluatorSpec, MAX_CHALLENGE_KEYWORDS, MIN_CHALLENGE_KEYWORDS,
18    PipedStdioSessionManifest, PipedStdioSetupSpec, PrivateBenchmarkPolicy,
19};
20use agentics_domain::models::paths::BundleRelativePath;
21use agentics_error::{Result, ServiceError};
22use garde::Validate;
23
24mod filesystem;
25mod images;
26
27pub use filesystem::{
28    challenge_bundle_tree_sha256, copy_challenge_bundle_dir, copy_challenge_bundle_dir_excluding,
29};
30
31/// Hard maximum number of solution invocations in one evaluation.
32pub const MAX_CHALLENGE_RUNS_PER_EVALUATION: u64 = 100;
33
34/// Read `spec.json` from a bundle directory and validate its contract fields.
35pub async fn read_challenge_bundle_spec(bundle_dir: &Path) -> Result<ChallengeBundleSpec> {
36    let spec_path = bundle_dir.join("spec.json");
37    let raw = tokio::fs::read_to_string(&spec_path).await?;
38    let spec: ChallengeBundleSpec = serde_json::from_str(&raw)
39        .map_err(|e| ChallengeValidationError::InvalidManifest(format!("spec.json: {e}")))?;
40    validate_challenge_bundle_spec(&spec)?;
41    Ok(spec)
42}
43
44/// Read and validate one challenge-owned run manifest from a bundle directory.
45pub async fn read_challenge_run_manifest(
46    bundle_dir: &Path,
47    manifest_path: &BundleRelativePath,
48) -> Result<ChallengeRunManifest> {
49    read_challenge_run_manifest_file(
50        &bundle_dir.join(manifest_path.as_path()),
51        &format!("run manifest {manifest_path}"),
52    )
53    .await
54}
55
56/// Read and validate a challenge-owned run manifest from an already resolved path.
57pub async fn read_challenge_run_manifest_file(
58    manifest_file: &Path,
59    label: &str,
60) -> Result<ChallengeRunManifest> {
61    let raw = tokio::fs::read_to_string(manifest_file).await?;
62    let manifest: ChallengeRunManifest = serde_json::from_str(&raw)
63        .map_err(|e| ChallengeValidationError::InvalidManifest(format!("{label}: {e}")))?;
64    validate_challenge_run_manifest(&manifest)?;
65    Ok(manifest)
66}
67
68/// Read and validate one challenge-owned interactive session manifest.
69pub async fn read_piped_stdio_session_manifest(
70    bundle_dir: &Path,
71    manifest_path: &BundleRelativePath,
72) -> Result<PipedStdioSessionManifest> {
73    read_piped_stdio_session_manifest_file(
74        &bundle_dir.join(manifest_path.as_path()),
75        &format!("session manifest {manifest_path}"),
76    )
77    .await
78}
79
80/// Read and validate a challenge-owned interactive session manifest from a resolved path.
81pub async fn read_piped_stdio_session_manifest_file(
82    manifest_file: &Path,
83    label: &str,
84) -> Result<PipedStdioSessionManifest> {
85    let raw = tokio::fs::read_to_string(manifest_file).await?;
86    let manifest: PipedStdioSessionManifest = serde_json::from_str(&raw)
87        .map_err(|e| ChallengeValidationError::InvalidManifest(format!("invalid {label}: {e}")))?;
88    validate_piped_stdio_session_manifest(&manifest)?;
89    Ok(manifest)
90}
91
92/// Validate that a challenge bundle has the required files and declared data directories.
93pub async fn validate_challenge_bundle(bundle_dir: &Path) -> Result<()> {
94    let spec = read_challenge_bundle_spec(bundle_dir).await?;
95    let spec_path = bundle_dir.join("spec.json");
96    let statement_path = bundle_dir.join("statement.md");
97    let public_dir = bundle_dir.join(spec.datasets.public_dir.as_path());
98
99    assert_path_type(&spec_path, "file", "spec.json").await?;
100    assert_path_type(&statement_path, "file", "statement.md").await?;
101    assert_declared_execution_scripts(bundle_dir, &spec).await?;
102    assert_path_type(&public_dir, "directory", "public data dir").await?;
103
104    validate_declared_execution_inputs(bundle_dir, &spec).await?;
105
106    if spec.datasets.private_benchmark_enabled
107        && let Some(ref private_benchmark_dir) = spec.datasets.private_benchmark_dir
108    {
109        assert_path_type(
110            &bundle_dir.join(private_benchmark_dir.as_path()),
111            "directory",
112            "private benchmark data dir",
113        )
114        .await?;
115    }
116
117    Ok(())
118}
119
120/// Validate declared execution scripts for the selected topology.
121async fn assert_declared_execution_scripts(
122    bundle_dir: &Path,
123    spec: &ChallengeBundleSpec,
124) -> Result<()> {
125    if let Some(script_path) =
126        declared_evaluator_script(&spec.execution.trusted_evaluator().command)
127    {
128        let label = format!("{} script", spec.execution.mode().runtime_name());
129        assert_path_type(&bundle_dir.join(script_path), "file", &label).await?;
130    }
131
132    match &spec.execution {
133        agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
134            execution,
135        ) => {
136            for (label, setup) in [
137                (
138                    "validation setup script",
139                    execution.validation_setup.as_ref(),
140                ),
141                (
142                    "official evaluation setup script",
143                    execution.official_evaluation_setup.as_ref(),
144                ),
145            ] {
146                if let Some(setup) = setup
147                    && let Some(script_path) = declared_evaluator_script(&setup.command)
148                {
149                    assert_path_type(&bundle_dir.join(script_path), "file", label).await?;
150                }
151            }
152        }
153        agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
154            for (label, setup) in [
155                (
156                    "validation setup script",
157                    execution.validation_setup.as_ref(),
158                ),
159                (
160                    "official evaluation setup script",
161                    execution.official_evaluation_setup.as_ref(),
162                ),
163            ] {
164                if let Some(setup) = setup
165                    && let Some(script_path) = declared_evaluator_script(&setup.command)
166                {
167                    assert_path_type(&bundle_dir.join(script_path), "file", label).await?;
168                }
169            }
170        }
171        agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(
172            execution,
173        ) => {
174            for (label, setup) in [
175                (
176                    "validation setup script",
177                    execution.validation_setup.as_ref(),
178                ),
179                (
180                    "official evaluation setup script",
181                    execution.official_evaluation_setup.as_ref(),
182                ),
183            ] {
184                if let Some(setup) = setup
185                    && let Some(script_path) = declared_evaluator_script(&setup.command)
186                {
187                    assert_path_type(&bundle_dir.join(script_path), "file", label).await?;
188                }
189            }
190        }
191    }
192
193    Ok(())
194}
195
196/// Validate static run/session locators declared by the execution topology.
197async fn validate_declared_execution_inputs(
198    bundle_dir: &Path,
199    spec: &ChallengeBundleSpec,
200) -> Result<()> {
201    match &spec.execution {
202        agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
203            execution,
204        ) => {
205            if spec.targets.iter().any(|target| target.validation_enabled)
206                && let Some(validation_runs) = &execution.validation_runs
207            {
208                validate_static_run_manifest(bundle_dir, validation_runs, "validation").await?;
209            }
210            if spec.datasets.private_benchmark_enabled
211                && let Some(official_runs) = &execution.official_runs
212            {
213                validate_static_run_manifest(bundle_dir, official_runs, "official").await?;
214            }
215        }
216        agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
217            if spec.targets.iter().any(|target| target.validation_enabled)
218                && let Some(validation_session) = &execution.validation_session
219            {
220                validate_static_session_manifest(bundle_dir, validation_session, "validation")
221                    .await?;
222            }
223            if spec.datasets.private_benchmark_enabled
224                && let Some(official_session) = &execution.official_session
225            {
226                validate_static_session_manifest(bundle_dir, official_session, "official").await?;
227            }
228        }
229        agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(_) => {}
230    }
231
232    Ok(())
233}
234
235/// Validate one static run manifest and its source-backed inputs.
236async fn validate_static_run_manifest(
237    bundle_dir: &Path,
238    manifest_path: &BundleRelativePath,
239    label: &str,
240) -> Result<()> {
241    assert_path_type(
242        &bundle_dir.join(manifest_path.as_path()),
243        "file",
244        &format!("{label} run manifest"),
245    )
246    .await?;
247    let manifest = read_challenge_run_manifest(bundle_dir, manifest_path).await?;
248    validate_challenge_run_manifest_sources(bundle_dir, &manifest).await
249}
250
251/// Validate one static interactive session manifest and its source-backed inputs.
252async fn validate_static_session_manifest(
253    bundle_dir: &Path,
254    manifest_path: &BundleRelativePath,
255    label: &str,
256) -> Result<()> {
257    assert_path_type(
258        &bundle_dir.join(manifest_path.as_path()),
259        "file",
260        &format!("{label} session manifest"),
261    )
262    .await?;
263    let manifest = read_piped_stdio_session_manifest(bundle_dir, manifest_path).await?;
264    validate_piped_stdio_session_manifest_sources(bundle_dir, &manifest).await
265}
266
267/// Handles assert path type for this module.
268async fn assert_path_type(path: &Path, kind: &str, label: &str) -> Result<()> {
269    let meta = tokio::fs::metadata(path).await.map_err(|_| {
270        ServiceError::Validation(format!("{} does not exist: {}", label, path.display()))
271    })?;
272
273    if kind == "file" && !meta.is_file() {
274        return Err(ServiceError::Validation(format!(
275            "{} is not a file: {}",
276            label,
277            path.display()
278        )));
279    }
280    if kind == "directory" && !meta.is_dir() {
281        return Err(ServiceError::Validation(format!(
282            "{} is not a directory: {}",
283            label,
284            path.display()
285        )));
286    }
287
288    Ok(())
289}
290
291/// Return whether `value` can be safely joined under a bundle root.
292pub fn is_safe_relative_path(value: &str) -> bool {
293    if value.starts_with('/') {
294        return false;
295    }
296    value.split(['/', '\\']).all(|s| !s.is_empty() && s != "..")
297}
298
299/// Validates challenge bundle spec invariants for this contract.
300fn validate_challenge_bundle_spec(spec: &ChallengeBundleSpec) -> Result<()> {
301    require_non_empty(&spec.challenge_title, "challenge_title")?;
302    require_non_empty(&spec.summary.en, "summary.en")?;
303    require_non_empty(&spec.summary.zh, "summary.zh")?;
304    validate_challenge_keywords(spec)?;
305    validate_garde(spec, "spec")?;
306
307    if spec.schema_version != 1 {
308        return Err(ServiceError::Validation(
309            "schema_version must be 1".to_string(),
310        ));
311    }
312    if spec.solution.protocol != ZIP_PROJECT_PROTOCOL {
313        return Err(ServiceError::Validation(format!(
314            "solution.protocol must be {ZIP_PROJECT_PROTOCOL}"
315        )));
316    }
317    if spec.solution.manifest_file.as_str() != ZIP_PROJECT_MANIFEST_FILE {
318        return Err(ServiceError::Validation(format!(
319            "solution.manifest_file must be {ZIP_PROJECT_MANIFEST_FILE}"
320        )));
321    }
322    match &spec.execution {
323        agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
324            execution,
325        ) => {
326            validate_evaluator_spec(
327                &execution.separated_evaluator,
328                "execution.separated_evaluator.command",
329                ChallengeExecutionMode::SeparatedEvaluator,
330            )?;
331        }
332        agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
333            validate_evaluator_spec(
334                &execution.interactive_evaluator,
335                "execution.interactive_evaluator.command",
336                ChallengeExecutionMode::PipedStdio,
337            )?;
338        }
339        agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(
340            execution,
341        ) => {
342            validate_evaluator_spec(
343                &execution.coexecuted_evaluator,
344                "execution.coexecuted_evaluator.command",
345                ChallengeExecutionMode::CoexecutedBenchmark,
346            )?;
347        }
348    }
349    validate_targets(spec)?;
350    validate_challenge_policy(spec)?;
351    validate_execution(spec)?;
352
353    if spec.datasets.private_benchmark_policy != PrivateBenchmarkPolicy::ScoreOnly {
354        return Err(ServiceError::Validation(
355            "datasets.private_benchmark_policy must be score_only".to_string(),
356        ));
357    }
358
359    // Challenge authors may stage private benchmark data before enabling
360    // official runs. Static official run manifests need a private directory,
361    // while setup-generated official runs may only need private seeds.
362    match (
363        spec.datasets.private_benchmark_enabled,
364        spec.datasets.private_benchmark_dir.as_ref(),
365        execution_uses_static_official_locator(&spec.execution),
366    ) {
367        (true, Some(_), _) => {}
368        (true, None, true) => {
369            return Err(ServiceError::Validation(
370                "datasets.private_benchmark_dir is required when private_benchmark_enabled uses a static official run or session manifest"
371                    .to_string(),
372            ));
373        }
374        (true, None, false) => {}
375        (false, Some(_), _) => {}
376        (false, None, _) => {}
377    }
378
379    validate_metric_schema(spec)?;
380
381    Ok(())
382}
383
384/// Validates challenge keyword cardinality and duplicate semantics.
385fn validate_challenge_keywords(spec: &ChallengeBundleSpec) -> Result<()> {
386    if !(MIN_CHALLENGE_KEYWORDS..=MAX_CHALLENGE_KEYWORDS).contains(&spec.keywords.len()) {
387        return Err(ServiceError::Validation(format!(
388            "keywords must contain between {MIN_CHALLENGE_KEYWORDS} and {MAX_CHALLENGE_KEYWORDS} entries"
389        )));
390    }
391    let mut seen = HashSet::new();
392    for keyword in &spec.keywords {
393        let normalized = keyword.as_str().to_lowercase();
394        if !seen.insert(normalized) {
395            return Err(ServiceError::Validation(format!(
396                "duplicate challenge keyword `{keyword}`"
397            )));
398        }
399    }
400    Ok(())
401}
402
403/// Require immutable Docker image references for hosted or audited execution.
404pub fn validate_digest_pinned_images(spec: &ChallengeBundleSpec) -> Result<()> {
405    for (index, target) in spec.targets.iter().enumerate() {
406        let field = format!("targets[{index}].resource_profile");
407        images::require_image_digest_reference(
408            &target.resource_profile.solution_image,
409            &format!("{field}.solution_image"),
410        )?;
411        images::require_image_digest_reference(
412            &target.resource_profile.evaluator_image,
413            &format!("{field}.evaluator_image"),
414        )?;
415    }
416
417    Ok(())
418}
419
420/// Validates evaluator command invariants for this contract.
421fn validate_evaluator_spec(
422    evaluator: &EvaluatorSpec,
423    field: &str,
424    mode: ChallengeExecutionMode,
425) -> Result<()> {
426    let evaluator_field = field.strip_suffix(".command").unwrap_or(field);
427    validate_garde(evaluator, evaluator_field)?;
428    validate_declared_script_runtime(&evaluator.command, field, mode)?;
429
430    Ok(())
431}
432
433/// Validate that a bundle-owned script path matches the selected execution runtime.
434fn validate_declared_script_runtime(
435    command: &[String],
436    field: &str,
437    mode: ChallengeExecutionMode,
438) -> Result<()> {
439    let Some(script_path) = declared_evaluator_script(command) else {
440        return Ok(());
441    };
442    let expected_prefix = format!("{}/", mode.runtime_name());
443    if !script_path.starts_with(&expected_prefix) {
444        return Err(ServiceError::Validation(format!(
445            "{field} script path must live under `{}` for {} execution, got `{script_path}`",
446            mode.runtime_name(),
447            execution_mode_name(mode)
448        )));
449    }
450
451    Ok(())
452}
453
454/// Handles declared evaluator script for this module.
455fn declared_evaluator_script(command: &[String]) -> Option<&str> {
456    command
457        .iter()
458        .find(|part| is_safe_relative_path(part) && part.ends_with(".py"))
459        .map(String::as_str)
460}
461
462/// Validates targets invariants for this contract.
463fn validate_targets(spec: &ChallengeBundleSpec) -> Result<()> {
464    if spec.targets.is_empty() {
465        return Err(ServiceError::Validation(
466            "targets must not be empty".to_string(),
467        ));
468    }
469
470    let mut target_names = HashSet::with_capacity(spec.targets.len());
471    for (index, target) in spec.targets.iter().enumerate() {
472        let field = format!("targets[{index}]");
473        targets::validate_submission_target_policy(target, &field)?;
474        images::validate_target(target, &field)?;
475        if !target_names.insert(target.name.as_str()) {
476            return Err(ServiceError::Validation(format!(
477                "targets contains duplicate name `{}`",
478                target.name
479            )));
480        }
481    }
482
483    Ok(())
484}
485
486/// Validates challenge policy invariants for this contract.
487fn validate_challenge_policy(spec: &ChallengeBundleSpec) -> Result<()> {
488    let starts_at = parse_required_rfc3339(&spec.starts_at, "starts_at")?;
489    let closes_at = parse_optional_rfc3339(spec.closes_at.as_deref(), "closes_at")?;
490    if let Some(closes_at) = closes_at
491        && closes_at <= starts_at
492    {
493        return Err(ServiceError::Validation(
494            "closes_at must be later than starts_at".to_string(),
495        ));
496    }
497    if spec.solution_publication == ChallengeSolutionPublicationPolicy::PublicAfterClose
498        && closes_at.is_none()
499    {
500        return Err(ServiceError::Validation(
501            "closes_at is required when solution_publication is public_after_close".to_string(),
502        ));
503    }
504    validate_optional_positive_limit(
505        spec.validation_submission_limit,
506        "validation_submission_limit",
507    )?;
508    validate_optional_positive_limit(spec.official_submission_limit, "official_submission_limit")?;
509    if spec.targets.iter().any(|target| target.validation_enabled)
510        && spec.validation_submission_limit.is_none()
511    {
512        return Err(ServiceError::Validation(
513            "validation_submission_limit is required when any target has validation_enabled true"
514                .to_string(),
515        ));
516    }
517
518    Ok(())
519}
520
521/// Parses required rfc3339 from an external boundary string.
522fn parse_required_rfc3339(value: &str, field: &str) -> Result<DateTime<Utc>> {
523    DateTime::parse_from_rfc3339(value)
524        .map(|date| date.with_timezone(&Utc))
525        .map_err(|e| ServiceError::Validation(format!("{field} must be RFC3339: {e}")))
526}
527
528/// Parses optional rfc3339 from an external boundary string.
529fn parse_optional_rfc3339(value: Option<&str>, field: &str) -> Result<Option<DateTime<Utc>>> {
530    value
531        .map(|value| {
532            DateTime::parse_from_rfc3339(value)
533                .map(|date| date.with_timezone(&Utc))
534                .map_err(|e| ServiceError::Validation(format!("{field} must be RFC3339: {e}")))
535        })
536        .transpose()
537}
538
539/// Validates optional positive limit invariants for this contract.
540fn validate_optional_positive_limit(value: Option<i64>, field: &str) -> Result<()> {
541    if let Some(value) = value
542        && value <= 0
543    {
544        return Err(ServiceError::Validation(format!(
545            "{field} must be positive"
546        )));
547    }
548    Ok(())
549}
550
551/// Validates execution invariants for this contract.
552fn validate_execution(spec: &ChallengeBundleSpec) -> Result<()> {
553    match &spec.execution {
554        agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
555            execution,
556        ) => {
557            validate_separated_evaluator_execution(spec, execution)?;
558        }
559        agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
560            validate_piped_stdio_execution(spec, execution)?;
561        }
562        agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(
563            execution,
564        ) => {
565            validate_coexecuted_benchmark_execution(spec, execution)?;
566        }
567    }
568    validate_solution_run_stage_policy(spec)?;
569
570    Ok(())
571}
572
573/// Validate the mode-specific presence of participant run-stage limits.
574fn validate_solution_run_stage_policy(spec: &ChallengeBundleSpec) -> Result<()> {
575    for (index, target) in spec.targets.iter().enumerate() {
576        let field = format!("targets[{index}].resource_profile.solution.run");
577        match spec.execution.mode() {
578            ChallengeExecutionMode::SeparatedEvaluator | ChallengeExecutionMode::PipedStdio => {
579                if target.resource_profile.solution.run.is_none() {
580                    return Err(ServiceError::Validation(format!(
581                        "{field} is required for {} execution",
582                        execution_mode_name(spec.execution.mode())
583                    )));
584                }
585            }
586            ChallengeExecutionMode::CoexecutedBenchmark => {
587                if target.resource_profile.solution.run.is_some() {
588                    return Err(ServiceError::Validation(format!(
589                        "{field} is forbidden for coexecuted_benchmark execution"
590                    )));
591                }
592            }
593        }
594    }
595    Ok(())
596}
597
598/// Stable wire name for one execution mode.
599fn execution_mode_name(mode: ChallengeExecutionMode) -> &'static str {
600    match mode {
601        ChallengeExecutionMode::SeparatedEvaluator => "separated_evaluator",
602        ChallengeExecutionMode::PipedStdio => "piped_stdio",
603        ChallengeExecutionMode::CoexecutedBenchmark => "coexecuted_benchmark",
604    }
605}
606
607/// Validate separated-evaluator topology fields.
608fn validate_separated_evaluator_execution(
609    spec: &ChallengeBundleSpec,
610    execution: &agentics_domain::models::challenge::SeparatedEvaluatorExecutionSpec,
611) -> Result<()> {
612    if let Some(setup) = &execution.validation_setup {
613        validate_setup_spec(
614            setup,
615            "execution.validation_setup",
616            ChallengeExecutionMode::SeparatedEvaluator,
617        )?;
618    }
619    if let Some(setup) = &execution.official_evaluation_setup {
620        validate_setup_spec(
621            setup,
622            "execution.official_evaluation_setup",
623            ChallengeExecutionMode::SeparatedEvaluator,
624        )?;
625    }
626    if execution.validation_runs.is_some() && execution.validation_setup.is_some() {
627        return Err(ServiceError::Validation(
628            "execution must not declare both validation_runs and validation_setup".to_string(),
629        ));
630    }
631    if execution.official_runs.is_some() && execution.official_evaluation_setup.is_some() {
632        return Err(ServiceError::Validation(
633            "execution must not declare both official_runs and official_evaluation_setup"
634                .to_string(),
635        ));
636    }
637    if spec.targets.iter().any(|target| target.validation_enabled)
638        && execution.validation_runs.is_none()
639        && execution.validation_setup.is_none()
640    {
641        return Err(ServiceError::Validation(
642            "execution.validation_runs or execution.validation_setup is required when any target has validation_enabled true"
643                .to_string(),
644        ));
645    }
646    if spec.datasets.private_benchmark_enabled
647        && execution.official_runs.is_none()
648        && execution.official_evaluation_setup.is_none()
649    {
650        return Err(ServiceError::Validation(
651            "execution.official_runs or execution.official_evaluation_setup is required when private_benchmark_enabled is true"
652                .to_string(),
653        ));
654    }
655    Ok(())
656}
657
658/// Validate piped-stdio topology fields.
659fn validate_piped_stdio_execution(
660    spec: &ChallengeBundleSpec,
661    execution: &agentics_domain::models::challenge::PipedStdioExecutionSpec,
662) -> Result<()> {
663    if !execution.acknowledge_stdio_protocol_framing {
664        return Err(ServiceError::Validation(
665            "execution.acknowledge_stdio_protocol_framing must be true for piped_stdio: the challenge author must document the stdin/stdout message protocol, including session start and termination, multi-case framing if used, EOF behavior, malformed participant output handling, and trusted evaluator result.json ownership."
666                .to_string(),
667        ));
668    }
669    if let Some(setup) = &execution.validation_setup {
670        validate_piped_stdio_setup_spec(
671            setup,
672            "execution.validation_setup",
673            ChallengeExecutionMode::PipedStdio,
674        )?;
675    }
676    if let Some(setup) = &execution.official_evaluation_setup {
677        validate_piped_stdio_setup_spec(
678            setup,
679            "execution.official_evaluation_setup",
680            ChallengeExecutionMode::PipedStdio,
681        )?;
682    }
683    if execution.validation_session.is_some() && execution.validation_setup.is_some() {
684        return Err(ServiceError::Validation(
685            "execution must not declare both validation_session and validation_setup".to_string(),
686        ));
687    }
688    if execution.official_session.is_some() && execution.official_evaluation_setup.is_some() {
689        return Err(ServiceError::Validation(
690            "execution must not declare both official_session and official_evaluation_setup"
691                .to_string(),
692        ));
693    }
694    if spec.targets.iter().any(|target| target.validation_enabled)
695        && execution.validation_session.is_none()
696        && execution.validation_setup.is_none()
697    {
698        return Err(ServiceError::Validation(
699            "execution.validation_session or execution.validation_setup is required when any target has validation_enabled true"
700                .to_string(),
701        ));
702    }
703    if spec.datasets.private_benchmark_enabled
704        && execution.official_session.is_none()
705        && execution.official_evaluation_setup.is_none()
706    {
707        return Err(ServiceError::Validation(
708            "execution.official_session or execution.official_evaluation_setup is required when private_benchmark_enabled is true"
709                .to_string(),
710        ));
711    }
712    Ok(())
713}
714
715/// Validate coexecuted-evaluator topology fields.
716fn validate_coexecuted_benchmark_execution(
717    _spec: &ChallengeBundleSpec,
718    execution: &agentics_domain::models::challenge::CoexecutedBenchmarkExecutionSpec,
719) -> Result<()> {
720    if !execution.acknowledge_danger {
721        return Err(ServiceError::Validation(
722            "execution.acknowledge_danger must be true for coexecuted_benchmark".to_string(),
723        ));
724    }
725    if let Some(setup) = &execution.validation_setup {
726        validate_coexecuted_benchmark_setup_spec(
727            setup,
728            "execution.validation_setup",
729            ChallengeExecutionMode::CoexecutedBenchmark,
730        )?;
731    }
732    if let Some(setup) = &execution.official_evaluation_setup {
733        validate_coexecuted_benchmark_setup_spec(
734            setup,
735            "execution.official_evaluation_setup",
736            ChallengeExecutionMode::CoexecutedBenchmark,
737        )?;
738    }
739    Ok(())
740}
741
742/// Validates setup spec invariants for this contract.
743fn validate_setup_spec(
744    setup: &ChallengeSetupSpec,
745    field: &str,
746    mode: ChallengeExecutionMode,
747) -> Result<()> {
748    validate_garde(setup, field)?;
749    let command_field = format!("{field}.command");
750    validate_declared_script_runtime(&setup.command, &command_field, mode)?;
751
752    Ok(())
753}
754
755/// Validates piped-stdio setup spec invariants for this contract.
756fn validate_piped_stdio_setup_spec(
757    setup: &PipedStdioSetupSpec,
758    field: &str,
759    mode: ChallengeExecutionMode,
760) -> Result<()> {
761    validate_garde(setup, field)?;
762    let command_field = format!("{field}.command");
763    validate_declared_script_runtime(&setup.command, &command_field, mode)?;
764
765    Ok(())
766}
767
768/// Validates coexecuted-evaluator setup spec invariants for this contract.
769fn validate_coexecuted_benchmark_setup_spec(
770    setup: &CoexecutedBenchmarkSetupSpec,
771    field: &str,
772    mode: ChallengeExecutionMode,
773) -> Result<()> {
774    validate_garde(setup, field)?;
775    let command_field = format!("{field}.command");
776    validate_declared_script_runtime(&setup.command, &command_field, mode)?;
777
778    Ok(())
779}
780
781/// Validates challenge run manifest invariants for this contract.
782fn validate_challenge_run_manifest(manifest: &ChallengeRunManifest) -> Result<()> {
783    if manifest.runs.is_empty() {
784        return Err(ServiceError::Validation(
785            "run manifest must declare at least one run".to_string(),
786        ));
787    }
788    if u64::try_from(manifest.runs.len())
789        .map(|count| count > MAX_CHALLENGE_RUNS_PER_EVALUATION)
790        .unwrap_or(true)
791    {
792        return Err(ServiceError::Validation(format!(
793            "run manifest must declare at most {MAX_CHALLENGE_RUNS_PER_EVALUATION} runs"
794        )));
795    }
796
797    let mut run_names = HashSet::with_capacity(manifest.runs.len());
798    for run in &manifest.runs {
799        validate_challenge_run(run)?;
800        if !run_names.insert(run.run_name.as_str()) {
801            return Err(ServiceError::Validation(format!(
802                "run manifest contains duplicate run_name `{}`",
803                run.run_name
804            )));
805        }
806    }
807
808    Ok(())
809}
810
811/// Validates challenge run invariants for this contract.
812fn validate_challenge_run(run: &ChallengeRunSpec) -> Result<()> {
813    if run.stdin_json.is_some() && run.stdin_text.is_some() {
814        return Err(ServiceError::Validation(
815            "runs[].stdin_json and runs[].stdin_text cannot both be present".to_string(),
816        ));
817    }
818    let mut input_paths = HashSet::with_capacity(run.input_files.len());
819    for input in &run.input_files {
820        validate_run_input_file(input)?;
821        if !input_paths.insert(input.path.as_str()) {
822            return Err(ServiceError::Validation(format!(
823                "runs[].input_files contains duplicate path `{}`",
824                input.path
825            )));
826        }
827    }
828    let mut output_paths = HashSet::with_capacity(run.output_files.len());
829    for path in &run.output_files {
830        if !output_paths.insert(path.as_str()) {
831            return Err(ServiceError::Validation(format!(
832                "runs[].output_files contains duplicate path `{path}`"
833            )));
834        }
835    }
836
837    Ok(())
838}
839
840/// Validates interactive session manifest invariants.
841fn validate_piped_stdio_session_manifest(manifest: &PipedStdioSessionManifest) -> Result<()> {
842    let mut input_paths = HashSet::with_capacity(manifest.input_files.len());
843    for input in &manifest.input_files {
844        validate_run_input_file(input)?;
845        if !input_paths.insert(input.path.as_str()) {
846            return Err(ServiceError::Validation(format!(
847                "session.input_files contains duplicate path `{}`",
848                input.path
849            )));
850        }
851    }
852
853    Ok(())
854}
855
856/// Validates run input file invariants for this contract.
857fn validate_run_input_file(input: &ChallengeRunInputFile) -> Result<()> {
858    let source_count = [
859        input.source_path.is_some(),
860        input.content.is_some(),
861        input.content_json.is_some(),
862    ]
863    .into_iter()
864    .filter(|present| *present)
865    .count();
866    if source_count > 1 {
867        return Err(ServiceError::Validation(
868            "runs[].input_files[] must declare only one of source_path, content, or content_json"
869                .to_string(),
870        ));
871    }
872    if source_count == 0 {
873        return Err(ServiceError::Validation(
874            "runs[].input_files[] must declare source_path, content, or content_json".to_string(),
875        ));
876    }
877
878    Ok(())
879}
880
881/// Validate that source-backed run inputs exist under the bundle root.
882pub async fn validate_challenge_run_manifest_sources(
883    bundle_dir: &Path,
884    manifest: &ChallengeRunManifest,
885) -> Result<()> {
886    for run in &manifest.runs {
887        for input in &run.input_files {
888            if let Some(source_path) = &input.source_path {
889                let full_path = bundle_dir.join(source_path.as_path());
890                let meta = tokio::fs::symlink_metadata(&full_path).await.map_err(|_| {
891                    ServiceError::Validation(format!(
892                        "runs[].input_files[].source_path does not exist: {}",
893                        full_path.display()
894                    ))
895                })?;
896                if meta.file_type().is_symlink() {
897                    return Err(ServiceError::Validation(format!(
898                        "runs[].input_files[].source_path must not be a symlink: {}",
899                        full_path.display()
900                    )));
901                }
902                if !meta.is_file() {
903                    return Err(ServiceError::Validation(format!(
904                        "runs[].input_files[].source_path is not a file: {}",
905                        full_path.display()
906                    )));
907                }
908            }
909        }
910    }
911
912    Ok(())
913}
914
915/// Validate source-backed session inputs under one source root.
916pub async fn validate_piped_stdio_session_manifest_sources(
917    bundle_dir: &Path,
918    manifest: &PipedStdioSessionManifest,
919) -> Result<()> {
920    for input in &manifest.input_files {
921        if let Some(source_path) = &input.source_path {
922            let full_path = bundle_dir.join(source_path.as_path());
923            let meta = tokio::fs::symlink_metadata(&full_path).await.map_err(|_| {
924                ServiceError::Validation(format!(
925                    "session.input_files[].source_path does not exist: {}",
926                    full_path.display()
927                ))
928            })?;
929            if meta.file_type().is_symlink() {
930                return Err(ServiceError::Validation(format!(
931                    "session.input_files[].source_path must not be a symlink: {}",
932                    full_path.display()
933                )));
934            }
935            if !meta.is_file() {
936                return Err(ServiceError::Validation(format!(
937                    "session.input_files[].source_path is not a file: {}",
938                    full_path.display()
939                )));
940            }
941        }
942    }
943
944    Ok(())
945}
946
947/// Return whether the execution topology has a static private official locator.
948fn execution_uses_static_official_locator(
949    execution: &agentics_domain::models::challenge::ChallengeExecutionSpec,
950) -> bool {
951    match execution {
952        agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
953            execution,
954        ) => execution.official_runs.is_some(),
955        agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
956            execution.official_session.is_some()
957        }
958        agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(_) => false,
959    }
960}
961
962/// Validates metric schema invariants for this contract.
963fn validate_metric_schema(spec: &ChallengeBundleSpec) -> Result<()> {
964    let schema = &spec.metric_schema;
965    if schema.metrics.is_empty() {
966        return Err(ServiceError::Validation(
967            "metric_schema.metrics must not be empty".to_string(),
968        ));
969    }
970
971    let mut names = HashSet::with_capacity(schema.metrics.len());
972    for metric in &schema.metrics {
973        require_non_empty(&metric.label, "metric_schema.metrics[].label")?;
974        if let Some(unit) = &metric.unit {
975            require_non_empty(unit, "metric_schema.metrics[].unit")?;
976        }
977        if let Some(metric_description) = &metric.metric_description {
978            require_non_empty(
979                metric_description,
980                "metric_schema.metrics[].metric_description",
981            )?;
982        }
983        if !names.insert(metric.name.as_str()) {
984            return Err(ServiceError::Validation(format!(
985                "metric_schema.metrics contains duplicate name `{}`",
986                metric.name
987            )));
988        }
989    }
990
991    if !names.contains(schema.ranking.primary_metric_name.as_str()) {
992        return Err(ServiceError::Validation(format!(
993            "metric_schema.ranking.primary_metric_name references unknown metric `{}`",
994            schema.ranking.primary_metric_name
995        )));
996    }
997
998    let mut tie_breakers = HashSet::with_capacity(schema.ranking.tie_breaker_metric_names.len());
999    for metric_name in &schema.ranking.tie_breaker_metric_names {
1000        if metric_name == &schema.ranking.primary_metric_name {
1001            return Err(ServiceError::Validation(
1002                "metric_schema.ranking.tie_breaker_metric_names must not repeat the primary metric"
1003                    .to_string(),
1004            ));
1005        }
1006        if !names.contains(metric_name.as_str()) {
1007            return Err(ServiceError::Validation(format!(
1008                "metric_schema.ranking.tie_breaker_metric_names references unknown metric `{metric_name}`"
1009            )));
1010        }
1011        if !tie_breakers.insert(metric_name.as_str()) {
1012            return Err(ServiceError::Validation(format!(
1013                "metric_schema.ranking.tie_breaker_metric_names contains duplicate metric `{metric_name}`"
1014            )));
1015        }
1016    }
1017
1018    Ok(())
1019}
1020
1021/// Requires non empty and reports a domain error otherwise.
1022fn require_non_empty(value: &str, field: &str) -> Result<()> {
1023    text::require_non_empty(value, field)
1024}
1025
1026fn validate_garde<T>(value: &T, field: &str) -> Result<()>
1027where
1028    T: Validate<Context = ()>,
1029{
1030    value
1031        .validate()
1032        .map_err(|report| ServiceError::Validation(format_garde_report(field, &report)))
1033}
1034
1035fn format_garde_report(field: &str, report: &garde::Report) -> String {
1036    report
1037        .iter()
1038        .map(|(path, error)| {
1039            if path.is_empty() {
1040                format!("{field}: {error}")
1041            } else {
1042                format!("{field}.{path}: {error}")
1043            }
1044        })
1045        .collect::<Vec<_>>()
1046        .join("; ")
1047}
1048
1049#[cfg(test)]
1050mod tests;
agentics_contracts/challenge_bundle.rs

agentics_contracts/
challenge_bundle.rs