Skip to main content

agentics_domain/models/challenge/
execution.rs

1use serde::{Deserialize, Serialize};
2
3use super::super::names::RunName;
4use super::super::paths::{BundleRelativePath, RunInputPath, RunOutputPath};
5use super::serde_helpers::{
6    required_nullable, required_nullable_non_empty_vec, required_nullable_non_empty_vec_schema,
7    required_nullable_schema, serialize_empty_vec_as_null,
8};
9
10/// Evaluator entrypoint and output-file contract for a bundle.
11#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
12#[garde(allow_unvalidated)]
13#[serde(deny_unknown_fields)]
14pub struct EvaluatorSpec {
15    #[garde(
16        length(min = 1),
17        inner(
18            custom(crate::validation::trimmed_non_empty),
19            custom(crate::validation::no_nul)
20        )
21    )]
22    pub command: Vec<String>,
23    pub result_file: BundleRelativePath,
24}
25
26/// Supported challenge execution topology.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
28#[serde(rename_all = "snake_case")]
29pub enum ChallengeExecutionMode {
30    SeparatedEvaluator,
31    PipedStdio,
32    CoexecutedBenchmark,
33}
34
35impl ChallengeExecutionMode {
36    /// Return the stable runtime name used for container labels and bundle script directories.
37    pub fn runtime_name(self) -> &'static str {
38        match self {
39            Self::SeparatedEvaluator => "separated-evaluator",
40            Self::PipedStdio => "interactive-evaluator",
41            Self::CoexecutedBenchmark => "coexecuted-evaluator",
42        }
43    }
44}
45
46/// Challenge-owned execution topology and run manifest locations for `zip_project`.
47#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
48#[serde(tag = "mode", rename_all = "snake_case")]
49pub enum ChallengeExecutionSpec {
50    SeparatedEvaluator(SeparatedEvaluatorExecutionSpec),
51    PipedStdio(PipedStdioExecutionSpec),
52    CoexecutedBenchmark(CoexecutedBenchmarkExecutionSpec),
53}
54
55impl ChallengeExecutionSpec {
56    /// Return the current execution topology mode.
57    pub fn mode(&self) -> ChallengeExecutionMode {
58        match self {
59            Self::SeparatedEvaluator(_) => ChallengeExecutionMode::SeparatedEvaluator,
60            Self::PipedStdio(_) => ChallengeExecutionMode::PipedStdio,
61            Self::CoexecutedBenchmark(_) => ChallengeExecutionMode::CoexecutedBenchmark,
62        }
63    }
64
65    /// Borrow the current piped-stdio execution contract.
66    pub fn piped_stdio(&self) -> Option<&PipedStdioExecutionSpec> {
67        match self {
68            Self::SeparatedEvaluator(_) => None,
69            Self::PipedStdio(spec) => Some(spec),
70            Self::CoexecutedBenchmark(_) => None,
71        }
72    }
73
74    /// Borrow the current coexecuted-evaluator contract.
75    pub fn coexecuted_benchmark(&self) -> Option<&CoexecutedBenchmarkExecutionSpec> {
76        match self {
77            Self::SeparatedEvaluator(_) | Self::PipedStdio(_) => None,
78            Self::CoexecutedBenchmark(spec) => Some(spec),
79        }
80    }
81
82    /// Borrow the trusted evaluator command contract for the current topology.
83    pub fn trusted_evaluator(&self) -> &EvaluatorSpec {
84        match self {
85            Self::SeparatedEvaluator(spec) => &spec.separated_evaluator,
86            Self::PipedStdio(spec) => &spec.interactive_evaluator,
87            Self::CoexecutedBenchmark(spec) => &spec.coexecuted_evaluator,
88        }
89    }
90
91    /// Borrow public validation run locator if declared.
92    pub fn validation_runs(&self) -> Option<&BundleRelativePath> {
93        match self {
94            Self::SeparatedEvaluator(spec) => spec.validation_runs.as_ref(),
95            Self::PipedStdio(_) | Self::CoexecutedBenchmark(_) => None,
96        }
97    }
98
99    /// Borrow public validation setup contract if declared.
100    pub fn validation_setup(&self) -> Option<&ChallengeSetupSpec> {
101        match self {
102            Self::SeparatedEvaluator(spec) => spec.validation_setup.as_ref(),
103            Self::PipedStdio(_) | Self::CoexecutedBenchmark(_) => None,
104        }
105    }
106
107    /// Borrow official benchmark run locator if declared.
108    pub fn official_runs(&self) -> Option<&BundleRelativePath> {
109        match self {
110            Self::SeparatedEvaluator(spec) => spec.official_runs.as_ref(),
111            Self::PipedStdio(_) | Self::CoexecutedBenchmark(_) => None,
112        }
113    }
114
115    /// Borrow official benchmark setup contract if declared.
116    pub fn official_evaluation_setup(&self) -> Option<&ChallengeSetupSpec> {
117        match self {
118            Self::SeparatedEvaluator(spec) => spec.official_evaluation_setup.as_ref(),
119            Self::PipedStdio(_) | Self::CoexecutedBenchmark(_) => None,
120        }
121    }
122
123    /// Return whether the official evaluator declares setup-generated official inputs.
124    pub fn has_official_evaluation_setup(&self) -> bool {
125        match self {
126            Self::SeparatedEvaluator(spec) => spec.official_evaluation_setup.is_some(),
127            Self::PipedStdio(spec) => spec.official_evaluation_setup.is_some(),
128            Self::CoexecutedBenchmark(spec) => spec.official_evaluation_setup.is_some(),
129        }
130    }
131}
132
133/// Current separated-container evaluator topology.
134#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
135#[serde(deny_unknown_fields)]
136pub struct SeparatedEvaluatorExecutionSpec {
137    pub separated_evaluator: EvaluatorSpec,
138    #[serde(deserialize_with = "required_nullable")]
139    #[schemars(
140        required,
141        schema_with = "required_nullable_schema::<BundleRelativePath>"
142    )]
143    pub validation_runs: Option<BundleRelativePath>,
144    #[serde(deserialize_with = "required_nullable")]
145    #[schemars(
146        required,
147        schema_with = "required_nullable_schema::<ChallengeSetupSpec>"
148    )]
149    pub validation_setup: Option<ChallengeSetupSpec>,
150    #[serde(deserialize_with = "required_nullable")]
151    #[schemars(
152        required,
153        schema_with = "required_nullable_schema::<BundleRelativePath>"
154    )]
155    pub official_runs: Option<BundleRelativePath>,
156    #[serde(deserialize_with = "required_nullable")]
157    #[schemars(
158        required,
159        schema_with = "required_nullable_schema::<ChallengeSetupSpec>"
160    )]
161    pub official_evaluation_setup: Option<ChallengeSetupSpec>,
162}
163
164/// Interactive topology where a trusted interactive-evaluator exchanges stdio with one solution run.
165#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
166#[serde(deny_unknown_fields)]
167pub struct PipedStdioExecutionSpec {
168    pub interactive_evaluator: EvaluatorSpec,
169    pub acknowledge_stdio_protocol_framing: bool,
170    #[serde(deserialize_with = "required_nullable")]
171    #[schemars(
172        required,
173        schema_with = "required_nullable_schema::<BundleRelativePath>"
174    )]
175    pub validation_session: Option<BundleRelativePath>,
176    #[serde(deserialize_with = "required_nullable")]
177    #[schemars(
178        required,
179        schema_with = "required_nullable_schema::<PipedStdioSetupSpec>"
180    )]
181    pub validation_setup: Option<PipedStdioSetupSpec>,
182    #[serde(deserialize_with = "required_nullable")]
183    #[schemars(
184        required,
185        schema_with = "required_nullable_schema::<BundleRelativePath>"
186    )]
187    pub official_session: Option<BundleRelativePath>,
188    #[serde(deserialize_with = "required_nullable")]
189    #[schemars(
190        required,
191        schema_with = "required_nullable_schema::<PipedStdioSetupSpec>"
192    )]
193    pub official_evaluation_setup: Option<PipedStdioSetupSpec>,
194}
195
196/// Coexecuted topology where a trusted coexecuted-evaluator imports participant code in one container.
197#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
198#[serde(deny_unknown_fields)]
199pub struct CoexecutedBenchmarkExecutionSpec {
200    pub coexecuted_evaluator: EvaluatorSpec,
201    pub acknowledge_danger: bool,
202    #[serde(deserialize_with = "required_nullable")]
203    #[schemars(
204        required,
205        schema_with = "required_nullable_schema::<CoexecutedBenchmarkSetupSpec>"
206    )]
207    pub validation_setup: Option<CoexecutedBenchmarkSetupSpec>,
208    #[serde(deserialize_with = "required_nullable")]
209    #[schemars(
210        required,
211        schema_with = "required_nullable_schema::<CoexecutedBenchmarkSetupSpec>"
212    )]
213    pub official_evaluation_setup: Option<CoexecutedBenchmarkSetupSpec>,
214}
215
216/// Public execution metadata that excludes official private benchmark locators.
217#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
218#[serde(tag = "mode", rename_all = "snake_case")]
219pub enum PublicChallengeExecutionSpec {
220    SeparatedEvaluator(PublicSeparatedEvaluatorExecutionSpec),
221    PipedStdio(PublicPipedStdioExecutionSpec),
222    CoexecutedBenchmark(PublicCoexecutedBenchmarkExecutionSpec),
223}
224
225impl PublicChallengeExecutionSpec {
226    /// Borrow the trusted evaluator command contract for the public execution topology.
227    pub fn trusted_evaluator(&self) -> &EvaluatorSpec {
228        match self {
229            Self::SeparatedEvaluator(spec) => &spec.separated_evaluator,
230            Self::PipedStdio(spec) => &spec.interactive_evaluator,
231            Self::CoexecutedBenchmark(spec) => &spec.coexecuted_evaluator,
232        }
233    }
234}
235
236impl From<ChallengeExecutionSpec> for PublicChallengeExecutionSpec {
237    fn from(execution: ChallengeExecutionSpec) -> Self {
238        match execution {
239            ChallengeExecutionSpec::SeparatedEvaluator(spec) => {
240                Self::SeparatedEvaluator(PublicSeparatedEvaluatorExecutionSpec {
241                    separated_evaluator: spec.separated_evaluator,
242                    validation_runs: spec.validation_runs,
243                    validation_setup: spec.validation_setup,
244                })
245            }
246            ChallengeExecutionSpec::PipedStdio(spec) => {
247                Self::PipedStdio(PublicPipedStdioExecutionSpec {
248                    interactive_evaluator: spec.interactive_evaluator,
249                    acknowledge_stdio_protocol_framing: spec.acknowledge_stdio_protocol_framing,
250                    validation_session: spec.validation_session,
251                    validation_setup: spec.validation_setup,
252                })
253            }
254            ChallengeExecutionSpec::CoexecutedBenchmark(spec) => {
255                Self::CoexecutedBenchmark(PublicCoexecutedBenchmarkExecutionSpec {
256                    coexecuted_evaluator: spec.coexecuted_evaluator,
257                    acknowledge_danger: spec.acknowledge_danger,
258                    validation_setup: spec.validation_setup,
259                })
260            }
261        }
262    }
263}
264
265/// Public separated-evaluator topology metadata.
266#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
267#[serde(deny_unknown_fields)]
268pub struct PublicSeparatedEvaluatorExecutionSpec {
269    pub separated_evaluator: EvaluatorSpec,
270    #[serde(default, skip_serializing_if = "Option::is_none")]
271    pub validation_runs: Option<BundleRelativePath>,
272    #[serde(default, skip_serializing_if = "Option::is_none")]
273    pub validation_setup: Option<ChallengeSetupSpec>,
274}
275
276/// Public piped-stdio topology metadata.
277#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
278#[serde(deny_unknown_fields)]
279pub struct PublicPipedStdioExecutionSpec {
280    pub interactive_evaluator: EvaluatorSpec,
281    pub acknowledge_stdio_protocol_framing: bool,
282    #[serde(default, skip_serializing_if = "Option::is_none")]
283    pub validation_session: Option<BundleRelativePath>,
284    #[serde(default, skip_serializing_if = "Option::is_none")]
285    pub validation_setup: Option<PipedStdioSetupSpec>,
286}
287
288/// Public coexecuted-evaluator topology metadata.
289#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
290#[serde(deny_unknown_fields)]
291pub struct PublicCoexecutedBenchmarkExecutionSpec {
292    pub coexecuted_evaluator: EvaluatorSpec,
293    pub acknowledge_danger: bool,
294    #[serde(default, skip_serializing_if = "Option::is_none")]
295    pub validation_setup: Option<CoexecutedBenchmarkSetupSpec>,
296}
297
298/// Optional separated-evaluator command that sets up generated benchmark inputs.
299#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
300#[garde(allow_unvalidated)]
301#[serde(deny_unknown_fields)]
302pub struct ChallengeSetupSpec {
303    #[garde(
304        length(min = 1),
305        inner(
306            custom(crate::validation::trimmed_non_empty),
307            custom(crate::validation::no_nul)
308        )
309    )]
310    pub command: Vec<String>,
311    /// Relative path, under the setup workspace, to the generated run manifest.
312    pub result_runs_file: BundleRelativePath,
313    /// Challenge-owner notes about seeds, versions, or external data provenance.
314    #[serde(deserialize_with = "required_nullable")]
315    #[schemars(required, schema_with = "required_nullable_schema::<String>")]
316    #[garde(custom(crate::validation::optional_trimmed_non_empty))]
317    pub reproducibility_notes: Option<String>,
318}
319
320/// Optional interactive-evaluator command that sets up one generated interactive session.
321#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
322#[garde(allow_unvalidated)]
323#[serde(deny_unknown_fields)]
324pub struct PipedStdioSetupSpec {
325    #[garde(
326        length(min = 1),
327        inner(
328            custom(crate::validation::trimmed_non_empty),
329            custom(crate::validation::no_nul)
330        )
331    )]
332    pub command: Vec<String>,
333    /// Relative path, under the setup workspace, to the generated session manifest.
334    pub result_session_file: BundleRelativePath,
335    /// Challenge-owner notes about seeds, versions, or external data provenance.
336    #[serde(deserialize_with = "required_nullable")]
337    #[schemars(required, schema_with = "required_nullable_schema::<String>")]
338    #[garde(custom(crate::validation::optional_trimmed_non_empty))]
339    pub reproducibility_notes: Option<String>,
340}
341
342/// Optional coexecuted-evaluator command that sets up files for a coexecuted run.
343#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
344#[garde(allow_unvalidated)]
345#[serde(deny_unknown_fields)]
346pub struct CoexecutedBenchmarkSetupSpec {
347    #[garde(
348        length(min = 1),
349        inner(
350            custom(crate::validation::trimmed_non_empty),
351            custom(crate::validation::no_nul)
352        )
353    )]
354    pub command: Vec<String>,
355    /// Challenge-owner notes about seeds, versions, or external data provenance.
356    #[serde(deserialize_with = "required_nullable")]
357    #[schemars(required, schema_with = "required_nullable_schema::<String>")]
358    #[garde(custom(crate::validation::optional_trimmed_non_empty))]
359    pub reproducibility_notes: Option<String>,
360}
361
362/// Challenge-owned list of evaluator-controlled solution invocations.
363#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
364#[serde(deny_unknown_fields)]
365pub struct ChallengeRunManifest {
366    pub runs: Vec<ChallengeRunSpec>,
367}
368
369/// One solution invocation generated by the worker and later evaluated by the evaluator.
370#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
371#[serde(deny_unknown_fields)]
372pub struct ChallengeRunSpec {
373    pub run_name: RunName,
374    pub interface: ChallengeRunInterface,
375    #[serde(deserialize_with = "required_nullable")]
376    #[schemars(
377        required,
378        schema_with = "required_nullable_schema::<serde_json::Value>"
379    )]
380    pub stdin_json: Option<serde_json::Value>,
381    #[serde(deserialize_with = "required_nullable")]
382    #[schemars(required, schema_with = "required_nullable_schema::<String>")]
383    pub stdin_text: Option<String>,
384    #[serde(
385        deserialize_with = "required_nullable_non_empty_vec",
386        serialize_with = "serialize_empty_vec_as_null"
387    )]
388    #[schemars(
389        required,
390        schema_with = "required_nullable_non_empty_vec_schema::<ChallengeRunInputFile>"
391    )]
392    pub input_files: Vec<ChallengeRunInputFile>,
393    #[serde(
394        deserialize_with = "required_nullable_non_empty_vec",
395        serialize_with = "serialize_empty_vec_as_null"
396    )]
397    #[schemars(
398        required,
399        schema_with = "required_nullable_non_empty_vec_schema::<RunOutputPath>"
400    )]
401    pub output_files: Vec<RunOutputPath>,
402    #[serde(deserialize_with = "required_nullable")]
403    #[schemars(
404        required,
405        schema_with = "required_nullable_schema::<serde_json::Map<String, serde_json::Value>>"
406    )]
407    pub metadata: Option<serde_json::Map<String, serde_json::Value>>,
408}
409
410/// Supported worker-managed solution input/output interfaces.
411#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
412#[serde(rename_all = "snake_case")]
413pub enum ChallengeRunInterface {
414    Stdio,
415    FileSystem,
416}
417
418/// One input file materialized into `AGENTICS_INPUT_DIR` for a file-mode run.
419#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
420#[serde(deny_unknown_fields)]
421pub struct ChallengeRunInputFile {
422    pub path: RunInputPath,
423    #[serde(default, skip_serializing_if = "Option::is_none")]
424    pub source_path: Option<BundleRelativePath>,
425    #[serde(default, skip_serializing_if = "Option::is_none")]
426    pub content: Option<String>,
427    #[serde(default, skip_serializing_if = "Option::is_none")]
428    pub content_json: Option<serde_json::Value>,
429}
430
431/// Challenge-owned single interactive session manifest for `piped_stdio`.
432#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
433#[serde(deny_unknown_fields)]
434pub struct PipedStdioSessionManifest {
435    pub session_name: RunName,
436    #[serde(
437        deserialize_with = "required_nullable_non_empty_vec",
438        serialize_with = "serialize_empty_vec_as_null"
439    )]
440    #[schemars(
441        required,
442        schema_with = "required_nullable_non_empty_vec_schema::<ChallengeRunInputFile>"
443    )]
444    pub input_files: Vec<ChallengeRunInputFile>,
445    #[serde(deserialize_with = "required_nullable")]
446    #[schemars(
447        required,
448        schema_with = "required_nullable_schema::<serde_json::Map<String, serde_json::Value>>"
449    )]
450    pub metadata: Option<serde_json::Map<String, serde_json::Value>>,
451}