Skip to main content

agentics_domain/models/challenge/
bundle.rs

1//! Challenge bundle contracts and public bundle projections.
2
3use serde::{Deserialize, Serialize};
4
5use crate::models::localization::LocalizedText;
6use crate::models::names::{ChallengeKeyword, ChallengeName, TargetName};
7use crate::models::paths::BundleRelativePath;
8
9use super::datasets::{DatasetsSpec, PublicDatasetsSpec};
10use super::execution::{ChallengeExecutionSpec, PublicChallengeExecutionSpec};
11use super::metrics::MetricSchemaSpec;
12use super::targets::ChallengeTargetSpec;
13
14/// Minimum public keywords that a challenge must declare.
15pub const MIN_CHALLENGE_KEYWORDS: usize = 1;
16
17/// Maximum public keywords that a challenge may declare.
18pub const MAX_CHALLENGE_KEYWORDS: usize = 6;
19
20/// Parsed `spec.json` contract for a challenge bundle.
21#[derive(Debug, Clone, Serialize, Deserialize, garde::Validate, schemars::JsonSchema)]
22#[garde(allow_unvalidated)]
23#[serde(deny_unknown_fields)]
24pub struct ChallengeBundleSpec {
25    pub schema_version: i32,
26    pub challenge_name: ChallengeName,
27    pub challenge_title: String,
28    /// Localized summary used in compact challenge catalog surfaces.
29    pub summary: LocalizedText,
30    /// Required public keywords used by catalog search and filtering.
31    #[garde(length(min = MIN_CHALLENGE_KEYWORDS, max = MAX_CHALLENGE_KEYWORDS))]
32    #[schemars(length(min = 1, max = 6))]
33    pub keywords: Vec<ChallengeKeyword>,
34    pub solution: SolutionSpec,
35    pub targets: Vec<ChallengeTargetSpec>,
36    pub starts_at: String,
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub closes_at: Option<String>,
39    pub eligibility: ChallengeEligibilitySpec,
40    #[serde(default, skip_serializing_if = "Option::is_none")]
41    pub validation_submission_limit: Option<i64>,
42    #[serde(default, skip_serializing_if = "Option::is_none")]
43    pub official_submission_limit: Option<i64>,
44    pub visibility: ChallengeVisibilitySpec,
45    pub solution_publication: ChallengeSolutionPublicationPolicy,
46    pub execution: ChallengeExecutionSpec,
47    pub datasets: DatasetsSpec,
48    /// Metric definitions and ranking metadata used to interpret evaluator output.
49    #[serde(default)]
50    #[schemars(required)]
51    pub metric_schema: MetricSchemaSpec,
52}
53
54impl ChallengeBundleSpec {
55    /// Look up one target declared by this challenge.
56    pub fn target(&self, target: &TargetName) -> Option<&ChallengeTargetSpec> {
57        self.targets
58            .iter()
59            .find(|candidate| &candidate.name == target)
60    }
61
62    /// Return the only target name when a challenge is unambiguous.
63    pub fn sole_target(&self) -> Option<&TargetName> {
64        match self.targets.as_slice() {
65            [target] => Some(&target.name),
66            _ => None,
67        }
68    }
69
70    /// Return whether official runner diagnostics may contain private benchmark material.
71    pub fn official_evaluation_may_expose_private_material(&self) -> bool {
72        if self.datasets.private_benchmark_enabled || self.execution.has_official_evaluation_setup()
73        {
74            return true;
75        }
76
77        match &self.execution {
78            ChallengeExecutionSpec::SeparatedEvaluator(spec) => {
79                spec.official_runs.as_ref().is_none_or(|path| {
80                    !path
81                        .as_path()
82                        .starts_with(self.datasets.public_dir.as_path())
83                })
84            }
85            ChallengeExecutionSpec::PipedStdio(spec) => {
86                spec.official_session.as_ref().is_none_or(|path| {
87                    !path
88                        .as_path()
89                        .starts_with(self.datasets.public_dir.as_path())
90                })
91            }
92            ChallengeExecutionSpec::CoexecutedBenchmark(_) => false,
93        }
94    }
95}
96
97/// Public projection of a challenge contract safe for unauthenticated clients.
98#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
99#[serde(deny_unknown_fields)]
100pub struct PublicChallengeBundleSpec {
101    pub schema_version: i32,
102    pub challenge_name: ChallengeName,
103    pub challenge_title: String,
104    /// Localized summary used in compact challenge catalog surfaces.
105    pub summary: LocalizedText,
106    /// Required public keywords used by catalog search and filtering.
107    #[schemars(length(min = 1, max = 6))]
108    pub keywords: Vec<ChallengeKeyword>,
109    pub solution: SolutionSpec,
110    pub targets: Vec<ChallengeTargetSpec>,
111    pub starts_at: String,
112    #[serde(default, skip_serializing_if = "Option::is_none")]
113    pub closes_at: Option<String>,
114    pub eligibility: ChallengeEligibilitySpec,
115    #[serde(default, skip_serializing_if = "Option::is_none")]
116    pub validation_submission_limit: Option<i64>,
117    #[serde(default, skip_serializing_if = "Option::is_none")]
118    pub official_submission_limit: Option<i64>,
119    pub visibility: ChallengeVisibilitySpec,
120    pub solution_publication: ChallengeSolutionPublicationPolicy,
121    pub execution: PublicChallengeExecutionSpec,
122    pub datasets: PublicDatasetsSpec,
123    /// Metric definitions and ranking metadata used to interpret evaluator output.
124    #[serde(default)]
125    #[schemars(required)]
126    pub metric_schema: MetricSchemaSpec,
127}
128
129impl PublicChallengeBundleSpec {
130    /// Look up one public target declared by this challenge.
131    pub fn target(&self, target: &TargetName) -> Option<&ChallengeTargetSpec> {
132        self.targets
133            .iter()
134            .find(|candidate| &candidate.name == target)
135    }
136
137    /// Return the only target name when a public challenge is unambiguous.
138    pub fn sole_target(&self) -> Option<&TargetName> {
139        match self.targets.as_slice() {
140            [target] => Some(&target.name),
141            _ => None,
142        }
143    }
144}
145
146impl From<ChallengeBundleSpec> for PublicChallengeBundleSpec {
147    /// Remove private benchmark locator metadata from a full challenge contract.
148    fn from(spec: ChallengeBundleSpec) -> Self {
149        Self {
150            schema_version: spec.schema_version,
151            challenge_name: spec.challenge_name,
152            challenge_title: spec.challenge_title,
153            summary: spec.summary,
154            keywords: spec.keywords,
155            solution: spec.solution,
156            targets: spec.targets,
157            starts_at: spec.starts_at,
158            closes_at: spec.closes_at,
159            eligibility: spec.eligibility,
160            validation_submission_limit: spec.validation_submission_limit,
161            official_submission_limit: spec.official_submission_limit,
162            visibility: spec.visibility,
163            solution_publication: spec.solution_publication,
164            execution: spec.execution.into(),
165            datasets: PublicDatasetsSpec {
166                public_dir: spec.datasets.public_dir,
167                public_policy: spec.datasets.public_policy,
168                private_benchmark_policy: spec.datasets.private_benchmark_policy,
169                private_benchmark_enabled: spec.datasets.private_benchmark_enabled,
170            },
171            metric_schema: spec.metric_schema,
172        }
173    }
174}
175
176/// Eligibility policy for a challenge.
177#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
178#[serde(deny_unknown_fields)]
179pub struct ChallengeEligibilitySpec {
180    #[serde(rename = "type")]
181    pub eligibility_type: ChallengeEligibilityType,
182}
183
184/// Stable eligibility policy names.
185#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
186#[serde(rename_all = "snake_case")]
187pub enum ChallengeEligibilityType {
188    Open,
189    PrivateShortlist,
190}
191
192/// Visibility policy for challenge result surfaces.
193#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
194#[serde(deny_unknown_fields)]
195pub struct ChallengeVisibilitySpec {
196    pub leaderboard: ChallengeVisibility,
197    pub score_distribution: ChallengeVisibility,
198    pub result_detail: ChallengeResultDetailVisibility,
199}
200
201/// Visibility for public aggregate surfaces.
202#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
203#[serde(rename_all = "snake_case")]
204pub enum ChallengeVisibility {
205    PublicLive,
206    PublicAfterClose,
207    Hidden,
208}
209
210/// Visibility for solution submission details.
211#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
212#[serde(rename_all = "snake_case")]
213pub enum ChallengeResultDetailVisibility {
214    SubmitterLivePublicLive,
215    SubmitterLivePublicAfterClose,
216    SubmitterOnly,
217}
218
219/// Policy controlling when solution artifacts may become public.
220#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, schemars::JsonSchema)]
221#[serde(rename_all = "snake_case")]
222pub enum ChallengeSolutionPublicationPolicy {
223    Private,
224    Public,
225    PublicAfterClose,
226}
227
228/// Local solution format constraints declared by a bundle.
229#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)]
230pub struct SolutionSpec {
231    pub protocol: String,
232    pub manifest_file: BundleRelativePath,
233}