Skip to main content

agentics_domain/models/challenge/
execution.rs

1use serde::{Deserialize, Serialize};
2
3use super::super::names::RunName;
4use super::super::paths::{BundleRelativePath, RunInputPath, RunOutputPath};
5
6/// Evaluator entrypoint and output-file contract for a bundle.
7#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
8#[garde(allow_unvalidated)]
9#[serde(deny_unknown_fields)]
10pub struct EvaluatorSpec {
11    #[garde(
12        length(min = 1),
13        inner(
14            custom(crate::validation::trimmed_non_empty),
15            custom(crate::validation::no_nul)
16        )
17    )]
18    pub command: Vec<String>,
19    pub result_file: BundleRelativePath,
20}
21
22/// Supported challenge execution topology.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
24#[serde(rename_all = "snake_case")]
25pub enum ChallengeExecutionMode {
26    SeparatedEvaluator,
27    PipedStdio,
28    CoexecutedBenchmark,
29}
30
31impl ChallengeExecutionMode {
32    /// Return the stable runtime name used for container labels and bundle script directories.
33    pub fn runtime_name(self) -> &'static str {
34        match self {
35            Self::SeparatedEvaluator => "separated-evaluator",
36            Self::PipedStdio => "interactive-evaluator",
37            Self::CoexecutedBenchmark => "coexecuted-evaluator",
38        }
39    }
40}
41
42/// Challenge-owned execution topology and run manifest locations for `zip_project`.
43#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
44#[serde(tag = "mode", rename_all = "snake_case")]
45pub enum ChallengeExecutionSpec {
46    SeparatedEvaluator(SeparatedEvaluatorExecutionSpec),
47    PipedStdio(PipedStdioExecutionSpec),
48    CoexecutedBenchmark(CoexecutedBenchmarkExecutionSpec),
49}
50
51impl ChallengeExecutionSpec {
52    /// Return the current execution topology mode.
53    pub fn mode(&self) -> ChallengeExecutionMode {
54        match self {
55            Self::SeparatedEvaluator(_) => ChallengeExecutionMode::SeparatedEvaluator,
56            Self::PipedStdio(_) => ChallengeExecutionMode::PipedStdio,
57            Self::CoexecutedBenchmark(_) => ChallengeExecutionMode::CoexecutedBenchmark,
58        }
59    }
60
61    /// Borrow the current piped-stdio execution contract.
62    pub fn piped_stdio(&self) -> Option<&PipedStdioExecutionSpec> {
63        match self {
64            Self::SeparatedEvaluator(_) => None,
65            Self::PipedStdio(spec) => Some(spec),
66            Self::CoexecutedBenchmark(_) => None,
67        }
68    }
69
70    /// Borrow the current coexecuted-evaluator contract.
71    pub fn coexecuted_benchmark(&self) -> Option<&CoexecutedBenchmarkExecutionSpec> {
72        match self {
73            Self::SeparatedEvaluator(_) | Self::PipedStdio(_) => None,
74            Self::CoexecutedBenchmark(spec) => Some(spec),
75        }
76    }
77
78    /// Borrow the trusted evaluator command contract for the current topology.
79    pub fn trusted_evaluator(&self) -> &EvaluatorSpec {
80        match self {
81            Self::SeparatedEvaluator(spec) => &spec.separated_evaluator,
82            Self::PipedStdio(spec) => &spec.interactive_evaluator,
83            Self::CoexecutedBenchmark(spec) => &spec.coexecuted_evaluator,
84        }
85    }
86
87    /// Borrow public validation run locator if declared.
88    pub fn validation_runs(&self) -> Option<&BundleRelativePath> {
89        match self {
90            Self::SeparatedEvaluator(spec) => spec.validation_runs.as_ref(),
91            Self::PipedStdio(_) | Self::CoexecutedBenchmark(_) => None,
92        }
93    }
94
95    /// Borrow public validation setup contract if declared.
96    pub fn validation_setup(&self) -> Option<&ChallengeSetupSpec> {
97        match self {
98            Self::SeparatedEvaluator(spec) => spec.validation_setup.as_ref(),
99            Self::PipedStdio(_) | Self::CoexecutedBenchmark(_) => None,
100        }
101    }
102
103    /// Borrow official benchmark run locator if declared.
104    pub fn official_runs(&self) -> Option<&BundleRelativePath> {
105        match self {
106            Self::SeparatedEvaluator(spec) => spec.official_runs.as_ref(),
107            Self::PipedStdio(_) | Self::CoexecutedBenchmark(_) => None,
108        }
109    }
110
111    /// Borrow official benchmark setup contract if declared.
112    pub fn official_evaluation_setup(&self) -> Option<&ChallengeSetupSpec> {
113        match self {
114            Self::SeparatedEvaluator(spec) => spec.official_evaluation_setup.as_ref(),
115            Self::PipedStdio(_) | Self::CoexecutedBenchmark(_) => None,
116        }
117    }
118
119    /// Return whether the official evaluator declares setup-generated official inputs.
120    pub fn has_official_evaluation_setup(&self) -> bool {
121        match self {
122            Self::SeparatedEvaluator(spec) => spec.official_evaluation_setup.is_some(),
123            Self::PipedStdio(spec) => spec.official_evaluation_setup.is_some(),
124            Self::CoexecutedBenchmark(spec) => spec.official_evaluation_setup.is_some(),
125        }
126    }
127}
128
129/// Current separated-container evaluator topology.
130#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
131#[serde(deny_unknown_fields)]
132pub struct SeparatedEvaluatorExecutionSpec {
133    pub separated_evaluator: EvaluatorSpec,
134    #[serde(default, skip_serializing_if = "Option::is_none")]
135    pub validation_runs: Option<BundleRelativePath>,
136    #[serde(default, skip_serializing_if = "Option::is_none")]
137    pub validation_setup: Option<ChallengeSetupSpec>,
138    #[serde(default, skip_serializing_if = "Option::is_none")]
139    pub official_runs: Option<BundleRelativePath>,
140    #[serde(default, skip_serializing_if = "Option::is_none")]
141    pub official_evaluation_setup: Option<ChallengeSetupSpec>,
142}
143
144/// Interactive topology where a trusted interactive-evaluator exchanges stdio with one solution run.
145#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
146#[serde(deny_unknown_fields)]
147pub struct PipedStdioExecutionSpec {
148    pub interactive_evaluator: EvaluatorSpec,
149    pub acknowledge_stdio_protocol_framing: bool,
150    #[serde(default, skip_serializing_if = "Option::is_none")]
151    pub validation_session: Option<BundleRelativePath>,
152    #[serde(default, skip_serializing_if = "Option::is_none")]
153    pub validation_setup: Option<PipedStdioSetupSpec>,
154    #[serde(default, skip_serializing_if = "Option::is_none")]
155    pub official_session: Option<BundleRelativePath>,
156    #[serde(default, skip_serializing_if = "Option::is_none")]
157    pub official_evaluation_setup: Option<PipedStdioSetupSpec>,
158}
159
160/// Coexecuted topology where a trusted coexecuted-evaluator imports participant code in one container.
161#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
162#[serde(deny_unknown_fields)]
163pub struct CoexecutedBenchmarkExecutionSpec {
164    pub coexecuted_evaluator: EvaluatorSpec,
165    pub acknowledge_danger: bool,
166    #[serde(default, skip_serializing_if = "Option::is_none")]
167    pub validation_setup: Option<CoexecutedBenchmarkSetupSpec>,
168    #[serde(default, skip_serializing_if = "Option::is_none")]
169    pub official_evaluation_setup: Option<CoexecutedBenchmarkSetupSpec>,
170}
171
172/// Public execution metadata that excludes official private benchmark locators.
173#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
174#[serde(tag = "mode", rename_all = "snake_case")]
175pub enum PublicChallengeExecutionSpec {
176    SeparatedEvaluator(PublicSeparatedEvaluatorExecutionSpec),
177    PipedStdio(PublicPipedStdioExecutionSpec),
178    CoexecutedBenchmark(PublicCoexecutedBenchmarkExecutionSpec),
179}
180
181impl PublicChallengeExecutionSpec {
182    /// Borrow the trusted evaluator command contract for the public execution topology.
183    pub fn trusted_evaluator(&self) -> &EvaluatorSpec {
184        match self {
185            Self::SeparatedEvaluator(spec) => &spec.separated_evaluator,
186            Self::PipedStdio(spec) => &spec.interactive_evaluator,
187            Self::CoexecutedBenchmark(spec) => &spec.coexecuted_evaluator,
188        }
189    }
190}
191
192impl From<ChallengeExecutionSpec> for PublicChallengeExecutionSpec {
193    fn from(execution: ChallengeExecutionSpec) -> Self {
194        match execution {
195            ChallengeExecutionSpec::SeparatedEvaluator(spec) => {
196                Self::SeparatedEvaluator(PublicSeparatedEvaluatorExecutionSpec {
197                    separated_evaluator: spec.separated_evaluator,
198                    validation_runs: spec.validation_runs,
199                    validation_setup: spec.validation_setup,
200                })
201            }
202            ChallengeExecutionSpec::PipedStdio(spec) => {
203                Self::PipedStdio(PublicPipedStdioExecutionSpec {
204                    interactive_evaluator: spec.interactive_evaluator,
205                    acknowledge_stdio_protocol_framing: spec.acknowledge_stdio_protocol_framing,
206                    validation_session: spec.validation_session,
207                    validation_setup: spec.validation_setup,
208                })
209            }
210            ChallengeExecutionSpec::CoexecutedBenchmark(spec) => {
211                Self::CoexecutedBenchmark(PublicCoexecutedBenchmarkExecutionSpec {
212                    coexecuted_evaluator: spec.coexecuted_evaluator,
213                    acknowledge_danger: spec.acknowledge_danger,
214                    validation_setup: spec.validation_setup,
215                })
216            }
217        }
218    }
219}
220
221/// Public separated-evaluator topology metadata.
222#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
223#[serde(deny_unknown_fields)]
224pub struct PublicSeparatedEvaluatorExecutionSpec {
225    pub separated_evaluator: EvaluatorSpec,
226    #[serde(default, skip_serializing_if = "Option::is_none")]
227    pub validation_runs: Option<BundleRelativePath>,
228    #[serde(default, skip_serializing_if = "Option::is_none")]
229    pub validation_setup: Option<ChallengeSetupSpec>,
230}
231
232/// Public piped-stdio topology metadata.
233#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
234#[serde(deny_unknown_fields)]
235pub struct PublicPipedStdioExecutionSpec {
236    pub interactive_evaluator: EvaluatorSpec,
237    pub acknowledge_stdio_protocol_framing: bool,
238    #[serde(default, skip_serializing_if = "Option::is_none")]
239    pub validation_session: Option<BundleRelativePath>,
240    #[serde(default, skip_serializing_if = "Option::is_none")]
241    pub validation_setup: Option<PipedStdioSetupSpec>,
242}
243
244/// Public coexecuted-evaluator topology metadata.
245#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
246#[serde(deny_unknown_fields)]
247pub struct PublicCoexecutedBenchmarkExecutionSpec {
248    pub coexecuted_evaluator: EvaluatorSpec,
249    pub acknowledge_danger: bool,
250    #[serde(default, skip_serializing_if = "Option::is_none")]
251    pub validation_setup: Option<CoexecutedBenchmarkSetupSpec>,
252}
253
254/// Optional separated-evaluator command that sets up generated benchmark inputs.
255#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
256#[garde(allow_unvalidated)]
257#[serde(deny_unknown_fields)]
258pub struct ChallengeSetupSpec {
259    #[garde(
260        length(min = 1),
261        inner(
262            custom(crate::validation::trimmed_non_empty),
263            custom(crate::validation::no_nul)
264        )
265    )]
266    pub command: Vec<String>,
267    /// Relative path, under the setup workspace, to the generated run manifest.
268    pub result_runs_file: BundleRelativePath,
269    /// Challenge-owner notes about seeds, versions, or external data provenance.
270    #[serde(default, skip_serializing_if = "Option::is_none")]
271    #[garde(custom(crate::validation::optional_trimmed_non_empty))]
272    pub reproducibility_notes: Option<String>,
273}
274
275/// Optional interactive-evaluator command that sets up one generated interactive session.
276#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
277#[garde(allow_unvalidated)]
278#[serde(deny_unknown_fields)]
279pub struct PipedStdioSetupSpec {
280    #[garde(
281        length(min = 1),
282        inner(
283            custom(crate::validation::trimmed_non_empty),
284            custom(crate::validation::no_nul)
285        )
286    )]
287    pub command: Vec<String>,
288    /// Relative path, under the setup workspace, to the generated session manifest.
289    pub result_session_file: BundleRelativePath,
290    /// Challenge-owner notes about seeds, versions, or external data provenance.
291    #[serde(default, skip_serializing_if = "Option::is_none")]
292    #[garde(custom(crate::validation::optional_trimmed_non_empty))]
293    pub reproducibility_notes: Option<String>,
294}
295
296/// Optional coexecuted-evaluator command that sets up files for a coexecuted run.
297#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
298#[garde(allow_unvalidated)]
299#[serde(deny_unknown_fields)]
300pub struct CoexecutedBenchmarkSetupSpec {
301    #[garde(
302        length(min = 1),
303        inner(
304            custom(crate::validation::trimmed_non_empty),
305            custom(crate::validation::no_nul)
306        )
307    )]
308    pub command: Vec<String>,
309    /// Challenge-owner notes about seeds, versions, or external data provenance.
310    #[serde(default, skip_serializing_if = "Option::is_none")]
311    #[garde(custom(crate::validation::optional_trimmed_non_empty))]
312    pub reproducibility_notes: Option<String>,
313}
314
315/// Challenge-owned list of evaluator-controlled solution invocations.
316#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
317pub struct ChallengeRunManifest {
318    #[serde(default)]
319    pub runs: Vec<ChallengeRunSpec>,
320}
321
322/// One solution invocation generated by the worker and later evaluated by the evaluator.
323#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
324pub struct ChallengeRunSpec {
325    pub run_name: RunName,
326    pub interface: ChallengeRunInterface,
327    #[serde(default, skip_serializing_if = "Option::is_none")]
328    pub stdin_json: Option<serde_json::Value>,
329    #[serde(default, skip_serializing_if = "Option::is_none")]
330    pub stdin_text: Option<String>,
331    #[serde(default, skip_serializing_if = "Vec::is_empty")]
332    pub input_files: Vec<ChallengeRunInputFile>,
333    #[serde(default, skip_serializing_if = "Vec::is_empty")]
334    pub output_files: Vec<RunOutputPath>,
335}
336
337/// Supported worker-managed solution input/output interfaces.
338#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
339#[serde(rename_all = "snake_case")]
340pub enum ChallengeRunInterface {
341    Stdio,
342    FileSystem,
343}
344
345/// One input file materialized into `AGENTICS_INPUT_DIR` for a file-mode run.
346#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
347pub struct ChallengeRunInputFile {
348    pub path: RunInputPath,
349    #[serde(default, skip_serializing_if = "Option::is_none")]
350    pub source_path: Option<BundleRelativePath>,
351    #[serde(default, skip_serializing_if = "Option::is_none")]
352    pub content: Option<String>,
353    #[serde(default, skip_serializing_if = "Option::is_none")]
354    pub content_json: Option<serde_json::Value>,
355}
356
357/// Challenge-owned single interactive session manifest for `piped_stdio`.
358#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
359#[serde(deny_unknown_fields)]
360pub struct PipedStdioSessionManifest {
361    pub session_name: RunName,
362    #[serde(default, skip_serializing_if = "Vec::is_empty")]
363    pub input_files: Vec<ChallengeRunInputFile>,
364    #[serde(default, skip_serializing_if = "Option::is_none")]
365    pub metadata: Option<serde_json::Map<String, serde_json::Value>>,
366}