1use std::collections::HashSet;
8use std::path::Path;
9
10use chrono::{DateTime, Utc};
11
12use crate::validation::{archive::ChallengeValidationError, targets, text};
13use crate::zip_project::{ZIP_PROJECT_MANIFEST_FILE, ZIP_PROJECT_PROTOCOL};
14use agentics_domain::models::challenge::{
15 ChallengeBundleSpec, ChallengeExecutionMode, ChallengeRunInputFile, ChallengeRunManifest,
16 ChallengeRunSpec, ChallengeSetupSpec, ChallengeSolutionPublicationPolicy,
17 CoexecutedBenchmarkSetupSpec, EvaluatorSpec, MAX_CHALLENGE_KEYWORDS, MIN_CHALLENGE_KEYWORDS,
18 PipedStdioSessionManifest, PipedStdioSetupSpec, PrivateBenchmarkPolicy,
19};
20use agentics_domain::models::paths::BundleRelativePath;
21use agentics_error::{Result, ServiceError};
22use garde::Validate;
23
24mod filesystem;
25mod images;
26
27pub use filesystem::{
28 challenge_bundle_tree_sha256, copy_challenge_bundle_dir, copy_challenge_bundle_dir_excluding,
29};
30
31pub const MAX_CHALLENGE_RUNS_PER_EVALUATION: u64 = 100;
33
34pub async fn read_challenge_bundle_spec(bundle_dir: &Path) -> Result<ChallengeBundleSpec> {
36 let spec_path = bundle_dir.join("spec.json");
37 let raw = tokio::fs::read_to_string(&spec_path).await?;
38 let spec: ChallengeBundleSpec = serde_json::from_str(&raw)
39 .map_err(|e| ChallengeValidationError::InvalidManifest(format!("spec.json: {e}")))?;
40 validate_challenge_bundle_spec(&spec)?;
41 Ok(spec)
42}
43
44pub async fn read_challenge_run_manifest(
46 bundle_dir: &Path,
47 manifest_path: &BundleRelativePath,
48) -> Result<ChallengeRunManifest> {
49 read_challenge_run_manifest_file(
50 &bundle_dir.join(manifest_path.as_path()),
51 &format!("run manifest {manifest_path}"),
52 )
53 .await
54}
55
56pub async fn read_challenge_run_manifest_file(
58 manifest_file: &Path,
59 label: &str,
60) -> Result<ChallengeRunManifest> {
61 let raw = tokio::fs::read_to_string(manifest_file).await?;
62 let manifest: ChallengeRunManifest = serde_json::from_str(&raw)
63 .map_err(|e| ChallengeValidationError::InvalidManifest(format!("{label}: {e}")))?;
64 validate_challenge_run_manifest(&manifest)?;
65 Ok(manifest)
66}
67
68pub async fn read_piped_stdio_session_manifest(
70 bundle_dir: &Path,
71 manifest_path: &BundleRelativePath,
72) -> Result<PipedStdioSessionManifest> {
73 read_piped_stdio_session_manifest_file(
74 &bundle_dir.join(manifest_path.as_path()),
75 &format!("session manifest {manifest_path}"),
76 )
77 .await
78}
79
80pub async fn read_piped_stdio_session_manifest_file(
82 manifest_file: &Path,
83 label: &str,
84) -> Result<PipedStdioSessionManifest> {
85 let raw = tokio::fs::read_to_string(manifest_file).await?;
86 let manifest: PipedStdioSessionManifest = serde_json::from_str(&raw)
87 .map_err(|e| ChallengeValidationError::InvalidManifest(format!("invalid {label}: {e}")))?;
88 validate_piped_stdio_session_manifest(&manifest)?;
89 Ok(manifest)
90}
91
92pub async fn validate_challenge_bundle(bundle_dir: &Path) -> Result<()> {
94 let spec = read_challenge_bundle_spec(bundle_dir).await?;
95 let spec_path = bundle_dir.join("spec.json");
96 let statement_path = bundle_dir.join("statement.md");
97 let public_dir = bundle_dir.join(spec.datasets.public_dir.as_path());
98
99 assert_path_type(&spec_path, "file", "spec.json").await?;
100 assert_path_type(&statement_path, "file", "statement.md").await?;
101 assert_declared_execution_scripts(bundle_dir, &spec).await?;
102 assert_path_type(&public_dir, "directory", "public data dir").await?;
103
104 validate_declared_execution_inputs(bundle_dir, &spec).await?;
105
106 if spec.datasets.private_benchmark_enabled
107 && let Some(ref private_benchmark_dir) = spec.datasets.private_benchmark_dir
108 {
109 assert_path_type(
110 &bundle_dir.join(private_benchmark_dir.as_path()),
111 "directory",
112 "private benchmark data dir",
113 )
114 .await?;
115 }
116
117 Ok(())
118}
119
120async fn assert_declared_execution_scripts(
122 bundle_dir: &Path,
123 spec: &ChallengeBundleSpec,
124) -> Result<()> {
125 if let Some(script_path) =
126 declared_evaluator_script(&spec.execution.trusted_evaluator().command)
127 {
128 let label = format!("{} script", spec.execution.mode().runtime_name());
129 assert_path_type(&bundle_dir.join(script_path), "file", &label).await?;
130 }
131
132 match &spec.execution {
133 agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
134 execution,
135 ) => {
136 for (label, setup) in [
137 (
138 "validation setup script",
139 execution.validation_setup.as_ref(),
140 ),
141 (
142 "official evaluation setup script",
143 execution.official_evaluation_setup.as_ref(),
144 ),
145 ] {
146 if let Some(setup) = setup
147 && let Some(script_path) = declared_evaluator_script(&setup.command)
148 {
149 assert_path_type(&bundle_dir.join(script_path), "file", label).await?;
150 }
151 }
152 }
153 agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
154 for (label, setup) in [
155 (
156 "validation setup script",
157 execution.validation_setup.as_ref(),
158 ),
159 (
160 "official evaluation setup script",
161 execution.official_evaluation_setup.as_ref(),
162 ),
163 ] {
164 if let Some(setup) = setup
165 && let Some(script_path) = declared_evaluator_script(&setup.command)
166 {
167 assert_path_type(&bundle_dir.join(script_path), "file", label).await?;
168 }
169 }
170 }
171 agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(
172 execution,
173 ) => {
174 for (label, setup) in [
175 (
176 "validation setup script",
177 execution.validation_setup.as_ref(),
178 ),
179 (
180 "official evaluation setup script",
181 execution.official_evaluation_setup.as_ref(),
182 ),
183 ] {
184 if let Some(setup) = setup
185 && let Some(script_path) = declared_evaluator_script(&setup.command)
186 {
187 assert_path_type(&bundle_dir.join(script_path), "file", label).await?;
188 }
189 }
190 }
191 }
192
193 Ok(())
194}
195
196async fn validate_declared_execution_inputs(
198 bundle_dir: &Path,
199 spec: &ChallengeBundleSpec,
200) -> Result<()> {
201 match &spec.execution {
202 agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
203 execution,
204 ) => {
205 if spec.targets.iter().any(|target| target.validation_enabled)
206 && let Some(validation_runs) = &execution.validation_runs
207 {
208 validate_static_run_manifest(bundle_dir, validation_runs, "validation").await?;
209 }
210 if spec.datasets.private_benchmark_enabled
211 && let Some(official_runs) = &execution.official_runs
212 {
213 validate_static_run_manifest(bundle_dir, official_runs, "official").await?;
214 }
215 }
216 agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
217 if spec.targets.iter().any(|target| target.validation_enabled)
218 && let Some(validation_session) = &execution.validation_session
219 {
220 validate_static_session_manifest(bundle_dir, validation_session, "validation")
221 .await?;
222 }
223 if spec.datasets.private_benchmark_enabled
224 && let Some(official_session) = &execution.official_session
225 {
226 validate_static_session_manifest(bundle_dir, official_session, "official").await?;
227 }
228 }
229 agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(_) => {}
230 }
231
232 Ok(())
233}
234
235async fn validate_static_run_manifest(
237 bundle_dir: &Path,
238 manifest_path: &BundleRelativePath,
239 label: &str,
240) -> Result<()> {
241 assert_path_type(
242 &bundle_dir.join(manifest_path.as_path()),
243 "file",
244 &format!("{label} run manifest"),
245 )
246 .await?;
247 let manifest = read_challenge_run_manifest(bundle_dir, manifest_path).await?;
248 validate_challenge_run_manifest_sources(bundle_dir, &manifest).await
249}
250
251async fn validate_static_session_manifest(
253 bundle_dir: &Path,
254 manifest_path: &BundleRelativePath,
255 label: &str,
256) -> Result<()> {
257 assert_path_type(
258 &bundle_dir.join(manifest_path.as_path()),
259 "file",
260 &format!("{label} session manifest"),
261 )
262 .await?;
263 let manifest = read_piped_stdio_session_manifest(bundle_dir, manifest_path).await?;
264 validate_piped_stdio_session_manifest_sources(bundle_dir, &manifest).await
265}
266
267async fn assert_path_type(path: &Path, kind: &str, label: &str) -> Result<()> {
269 let meta = tokio::fs::metadata(path).await.map_err(|_| {
270 ServiceError::Validation(format!("{} does not exist: {}", label, path.display()))
271 })?;
272
273 if kind == "file" && !meta.is_file() {
274 return Err(ServiceError::Validation(format!(
275 "{} is not a file: {}",
276 label,
277 path.display()
278 )));
279 }
280 if kind == "directory" && !meta.is_dir() {
281 return Err(ServiceError::Validation(format!(
282 "{} is not a directory: {}",
283 label,
284 path.display()
285 )));
286 }
287
288 Ok(())
289}
290
291pub fn is_safe_relative_path(value: &str) -> bool {
293 if value.starts_with('/') {
294 return false;
295 }
296 value.split(['/', '\\']).all(|s| !s.is_empty() && s != "..")
297}
298
299fn validate_challenge_bundle_spec(spec: &ChallengeBundleSpec) -> Result<()> {
301 require_non_empty(&spec.challenge_title, "challenge_title")?;
302 require_non_empty(&spec.summary.en, "summary.en")?;
303 require_non_empty(&spec.summary.zh, "summary.zh")?;
304 validate_challenge_keywords(spec)?;
305 validate_garde(spec, "spec")?;
306
307 if spec.schema_version != 1 {
308 return Err(ServiceError::Validation(
309 "schema_version must be 1".to_string(),
310 ));
311 }
312 if spec.solution.protocol != ZIP_PROJECT_PROTOCOL {
313 return Err(ServiceError::Validation(format!(
314 "solution.protocol must be {ZIP_PROJECT_PROTOCOL}"
315 )));
316 }
317 if spec.solution.manifest_file.as_str() != ZIP_PROJECT_MANIFEST_FILE {
318 return Err(ServiceError::Validation(format!(
319 "solution.manifest_file must be {ZIP_PROJECT_MANIFEST_FILE}"
320 )));
321 }
322 match &spec.execution {
323 agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
324 execution,
325 ) => {
326 validate_evaluator_spec(
327 &execution.separated_evaluator,
328 "execution.separated_evaluator.command",
329 ChallengeExecutionMode::SeparatedEvaluator,
330 )?;
331 }
332 agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
333 validate_evaluator_spec(
334 &execution.interactive_evaluator,
335 "execution.interactive_evaluator.command",
336 ChallengeExecutionMode::PipedStdio,
337 )?;
338 }
339 agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(
340 execution,
341 ) => {
342 validate_evaluator_spec(
343 &execution.coexecuted_evaluator,
344 "execution.coexecuted_evaluator.command",
345 ChallengeExecutionMode::CoexecutedBenchmark,
346 )?;
347 }
348 }
349 validate_targets(spec)?;
350 validate_challenge_policy(spec)?;
351 validate_execution(spec)?;
352
353 if spec.datasets.private_benchmark_policy != PrivateBenchmarkPolicy::ScoreOnly {
354 return Err(ServiceError::Validation(
355 "datasets.private_benchmark_policy must be score_only".to_string(),
356 ));
357 }
358
359 match (
363 spec.datasets.private_benchmark_enabled,
364 spec.datasets.private_benchmark_dir.as_ref(),
365 execution_uses_static_official_locator(&spec.execution),
366 ) {
367 (true, Some(_), _) => {}
368 (true, None, true) => {
369 return Err(ServiceError::Validation(
370 "datasets.private_benchmark_dir is required when private_benchmark_enabled uses a static official run or session manifest"
371 .to_string(),
372 ));
373 }
374 (true, None, false) => {}
375 (false, Some(_), _) => {}
376 (false, None, _) => {}
377 }
378
379 validate_metric_schema(spec)?;
380
381 Ok(())
382}
383
384fn validate_challenge_keywords(spec: &ChallengeBundleSpec) -> Result<()> {
386 if !(MIN_CHALLENGE_KEYWORDS..=MAX_CHALLENGE_KEYWORDS).contains(&spec.keywords.len()) {
387 return Err(ServiceError::Validation(format!(
388 "keywords must contain between {MIN_CHALLENGE_KEYWORDS} and {MAX_CHALLENGE_KEYWORDS} entries"
389 )));
390 }
391 let mut seen = HashSet::new();
392 for keyword in &spec.keywords {
393 let normalized = keyword.as_str().to_lowercase();
394 if !seen.insert(normalized) {
395 return Err(ServiceError::Validation(format!(
396 "duplicate challenge keyword `{keyword}`"
397 )));
398 }
399 }
400 Ok(())
401}
402
403pub fn validate_digest_pinned_images(spec: &ChallengeBundleSpec) -> Result<()> {
405 for (index, target) in spec.targets.iter().enumerate() {
406 let field = format!("targets[{index}].resource_profile");
407 images::require_image_digest_reference(
408 &target.resource_profile.solution_image,
409 &format!("{field}.solution_image"),
410 )?;
411 images::require_image_digest_reference(
412 &target.resource_profile.evaluator_image,
413 &format!("{field}.evaluator_image"),
414 )?;
415 }
416
417 Ok(())
418}
419
420fn validate_evaluator_spec(
422 evaluator: &EvaluatorSpec,
423 field: &str,
424 mode: ChallengeExecutionMode,
425) -> Result<()> {
426 let evaluator_field = field.strip_suffix(".command").unwrap_or(field);
427 validate_garde(evaluator, evaluator_field)?;
428 validate_declared_script_runtime(&evaluator.command, field, mode)?;
429
430 Ok(())
431}
432
433fn validate_declared_script_runtime(
435 command: &[String],
436 field: &str,
437 mode: ChallengeExecutionMode,
438) -> Result<()> {
439 let Some(script_path) = declared_evaluator_script(command) else {
440 return Ok(());
441 };
442 let expected_prefix = format!("{}/", mode.runtime_name());
443 if !script_path.starts_with(&expected_prefix) {
444 return Err(ServiceError::Validation(format!(
445 "{field} script path must live under `{}` for {} execution, got `{script_path}`",
446 mode.runtime_name(),
447 execution_mode_name(mode)
448 )));
449 }
450
451 Ok(())
452}
453
454fn declared_evaluator_script(command: &[String]) -> Option<&str> {
456 command
457 .iter()
458 .find(|part| is_safe_relative_path(part) && part.ends_with(".py"))
459 .map(String::as_str)
460}
461
462fn validate_targets(spec: &ChallengeBundleSpec) -> Result<()> {
464 if spec.targets.is_empty() {
465 return Err(ServiceError::Validation(
466 "targets must not be empty".to_string(),
467 ));
468 }
469
470 let mut target_names = HashSet::with_capacity(spec.targets.len());
471 for (index, target) in spec.targets.iter().enumerate() {
472 let field = format!("targets[{index}]");
473 targets::validate_submission_target_policy(target, &field)?;
474 images::validate_target(target, &field)?;
475 if !target_names.insert(target.name.as_str()) {
476 return Err(ServiceError::Validation(format!(
477 "targets contains duplicate name `{}`",
478 target.name
479 )));
480 }
481 }
482
483 Ok(())
484}
485
486fn validate_challenge_policy(spec: &ChallengeBundleSpec) -> Result<()> {
488 let starts_at = parse_required_rfc3339(&spec.starts_at, "starts_at")?;
489 let closes_at = parse_optional_rfc3339(spec.closes_at.as_deref(), "closes_at")?;
490 if let Some(closes_at) = closes_at
491 && closes_at <= starts_at
492 {
493 return Err(ServiceError::Validation(
494 "closes_at must be later than starts_at".to_string(),
495 ));
496 }
497 if spec.solution_publication == ChallengeSolutionPublicationPolicy::PublicAfterClose
498 && closes_at.is_none()
499 {
500 return Err(ServiceError::Validation(
501 "closes_at is required when solution_publication is public_after_close".to_string(),
502 ));
503 }
504 validate_optional_positive_limit(
505 spec.validation_submission_limit,
506 "validation_submission_limit",
507 )?;
508 validate_optional_positive_limit(spec.official_submission_limit, "official_submission_limit")?;
509 if spec.targets.iter().any(|target| target.validation_enabled)
510 && spec.validation_submission_limit.is_none()
511 {
512 return Err(ServiceError::Validation(
513 "validation_submission_limit is required when any target has validation_enabled true"
514 .to_string(),
515 ));
516 }
517
518 Ok(())
519}
520
521fn parse_required_rfc3339(value: &str, field: &str) -> Result<DateTime<Utc>> {
523 DateTime::parse_from_rfc3339(value)
524 .map(|date| date.with_timezone(&Utc))
525 .map_err(|e| ServiceError::Validation(format!("{field} must be RFC3339: {e}")))
526}
527
528fn parse_optional_rfc3339(value: Option<&str>, field: &str) -> Result<Option<DateTime<Utc>>> {
530 value
531 .map(|value| {
532 DateTime::parse_from_rfc3339(value)
533 .map(|date| date.with_timezone(&Utc))
534 .map_err(|e| ServiceError::Validation(format!("{field} must be RFC3339: {e}")))
535 })
536 .transpose()
537}
538
539fn validate_optional_positive_limit(value: Option<i64>, field: &str) -> Result<()> {
541 if let Some(value) = value
542 && value <= 0
543 {
544 return Err(ServiceError::Validation(format!(
545 "{field} must be positive"
546 )));
547 }
548 Ok(())
549}
550
551fn validate_execution(spec: &ChallengeBundleSpec) -> Result<()> {
553 match &spec.execution {
554 agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
555 execution,
556 ) => {
557 validate_separated_evaluator_execution(spec, execution)?;
558 }
559 agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
560 validate_piped_stdio_execution(spec, execution)?;
561 }
562 agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(
563 execution,
564 ) => {
565 validate_coexecuted_benchmark_execution(spec, execution)?;
566 }
567 }
568 validate_solution_run_stage_policy(spec)?;
569
570 Ok(())
571}
572
573fn validate_solution_run_stage_policy(spec: &ChallengeBundleSpec) -> Result<()> {
575 for (index, target) in spec.targets.iter().enumerate() {
576 let field = format!("targets[{index}].resource_profile.solution.run");
577 match spec.execution.mode() {
578 ChallengeExecutionMode::SeparatedEvaluator | ChallengeExecutionMode::PipedStdio => {
579 if target.resource_profile.solution.run.is_none() {
580 return Err(ServiceError::Validation(format!(
581 "{field} is required for {} execution",
582 execution_mode_name(spec.execution.mode())
583 )));
584 }
585 }
586 ChallengeExecutionMode::CoexecutedBenchmark => {
587 if target.resource_profile.solution.run.is_some() {
588 return Err(ServiceError::Validation(format!(
589 "{field} is forbidden for coexecuted_benchmark execution"
590 )));
591 }
592 }
593 }
594 }
595 Ok(())
596}
597
598fn execution_mode_name(mode: ChallengeExecutionMode) -> &'static str {
600 match mode {
601 ChallengeExecutionMode::SeparatedEvaluator => "separated_evaluator",
602 ChallengeExecutionMode::PipedStdio => "piped_stdio",
603 ChallengeExecutionMode::CoexecutedBenchmark => "coexecuted_benchmark",
604 }
605}
606
607fn validate_separated_evaluator_execution(
609 spec: &ChallengeBundleSpec,
610 execution: &agentics_domain::models::challenge::SeparatedEvaluatorExecutionSpec,
611) -> Result<()> {
612 if let Some(setup) = &execution.validation_setup {
613 validate_setup_spec(
614 setup,
615 "execution.validation_setup",
616 ChallengeExecutionMode::SeparatedEvaluator,
617 )?;
618 }
619 if let Some(setup) = &execution.official_evaluation_setup {
620 validate_setup_spec(
621 setup,
622 "execution.official_evaluation_setup",
623 ChallengeExecutionMode::SeparatedEvaluator,
624 )?;
625 }
626 if execution.validation_runs.is_some() && execution.validation_setup.is_some() {
627 return Err(ServiceError::Validation(
628 "execution must not declare both validation_runs and validation_setup".to_string(),
629 ));
630 }
631 if execution.official_runs.is_some() && execution.official_evaluation_setup.is_some() {
632 return Err(ServiceError::Validation(
633 "execution must not declare both official_runs and official_evaluation_setup"
634 .to_string(),
635 ));
636 }
637 if spec.targets.iter().any(|target| target.validation_enabled)
638 && execution.validation_runs.is_none()
639 && execution.validation_setup.is_none()
640 {
641 return Err(ServiceError::Validation(
642 "execution.validation_runs or execution.validation_setup is required when any target has validation_enabled true"
643 .to_string(),
644 ));
645 }
646 if spec.datasets.private_benchmark_enabled
647 && execution.official_runs.is_none()
648 && execution.official_evaluation_setup.is_none()
649 {
650 return Err(ServiceError::Validation(
651 "execution.official_runs or execution.official_evaluation_setup is required when private_benchmark_enabled is true"
652 .to_string(),
653 ));
654 }
655 Ok(())
656}
657
658fn validate_piped_stdio_execution(
660 spec: &ChallengeBundleSpec,
661 execution: &agentics_domain::models::challenge::PipedStdioExecutionSpec,
662) -> Result<()> {
663 if !execution.acknowledge_stdio_protocol_framing {
664 return Err(ServiceError::Validation(
665 "execution.acknowledge_stdio_protocol_framing must be true for piped_stdio: the challenge author must document the stdin/stdout message protocol, including session start and termination, multi-case framing if used, EOF behavior, malformed participant output handling, and trusted evaluator result.json ownership."
666 .to_string(),
667 ));
668 }
669 if let Some(setup) = &execution.validation_setup {
670 validate_piped_stdio_setup_spec(
671 setup,
672 "execution.validation_setup",
673 ChallengeExecutionMode::PipedStdio,
674 )?;
675 }
676 if let Some(setup) = &execution.official_evaluation_setup {
677 validate_piped_stdio_setup_spec(
678 setup,
679 "execution.official_evaluation_setup",
680 ChallengeExecutionMode::PipedStdio,
681 )?;
682 }
683 if execution.validation_session.is_some() && execution.validation_setup.is_some() {
684 return Err(ServiceError::Validation(
685 "execution must not declare both validation_session and validation_setup".to_string(),
686 ));
687 }
688 if execution.official_session.is_some() && execution.official_evaluation_setup.is_some() {
689 return Err(ServiceError::Validation(
690 "execution must not declare both official_session and official_evaluation_setup"
691 .to_string(),
692 ));
693 }
694 if spec.targets.iter().any(|target| target.validation_enabled)
695 && execution.validation_session.is_none()
696 && execution.validation_setup.is_none()
697 {
698 return Err(ServiceError::Validation(
699 "execution.validation_session or execution.validation_setup is required when any target has validation_enabled true"
700 .to_string(),
701 ));
702 }
703 if spec.datasets.private_benchmark_enabled
704 && execution.official_session.is_none()
705 && execution.official_evaluation_setup.is_none()
706 {
707 return Err(ServiceError::Validation(
708 "execution.official_session or execution.official_evaluation_setup is required when private_benchmark_enabled is true"
709 .to_string(),
710 ));
711 }
712 Ok(())
713}
714
715fn validate_coexecuted_benchmark_execution(
717 _spec: &ChallengeBundleSpec,
718 execution: &agentics_domain::models::challenge::CoexecutedBenchmarkExecutionSpec,
719) -> Result<()> {
720 if !execution.acknowledge_danger {
721 return Err(ServiceError::Validation(
722 "execution.acknowledge_danger must be true for coexecuted_benchmark".to_string(),
723 ));
724 }
725 if let Some(setup) = &execution.validation_setup {
726 validate_coexecuted_benchmark_setup_spec(
727 setup,
728 "execution.validation_setup",
729 ChallengeExecutionMode::CoexecutedBenchmark,
730 )?;
731 }
732 if let Some(setup) = &execution.official_evaluation_setup {
733 validate_coexecuted_benchmark_setup_spec(
734 setup,
735 "execution.official_evaluation_setup",
736 ChallengeExecutionMode::CoexecutedBenchmark,
737 )?;
738 }
739 Ok(())
740}
741
742fn validate_setup_spec(
744 setup: &ChallengeSetupSpec,
745 field: &str,
746 mode: ChallengeExecutionMode,
747) -> Result<()> {
748 validate_garde(setup, field)?;
749 let command_field = format!("{field}.command");
750 validate_declared_script_runtime(&setup.command, &command_field, mode)?;
751
752 Ok(())
753}
754
755fn validate_piped_stdio_setup_spec(
757 setup: &PipedStdioSetupSpec,
758 field: &str,
759 mode: ChallengeExecutionMode,
760) -> Result<()> {
761 validate_garde(setup, field)?;
762 let command_field = format!("{field}.command");
763 validate_declared_script_runtime(&setup.command, &command_field, mode)?;
764
765 Ok(())
766}
767
768fn validate_coexecuted_benchmark_setup_spec(
770 setup: &CoexecutedBenchmarkSetupSpec,
771 field: &str,
772 mode: ChallengeExecutionMode,
773) -> Result<()> {
774 validate_garde(setup, field)?;
775 let command_field = format!("{field}.command");
776 validate_declared_script_runtime(&setup.command, &command_field, mode)?;
777
778 Ok(())
779}
780
781fn validate_challenge_run_manifest(manifest: &ChallengeRunManifest) -> Result<()> {
783 if manifest.runs.is_empty() {
784 return Err(ServiceError::Validation(
785 "run manifest must declare at least one run".to_string(),
786 ));
787 }
788 if u64::try_from(manifest.runs.len())
789 .map(|count| count > MAX_CHALLENGE_RUNS_PER_EVALUATION)
790 .unwrap_or(true)
791 {
792 return Err(ServiceError::Validation(format!(
793 "run manifest must declare at most {MAX_CHALLENGE_RUNS_PER_EVALUATION} runs"
794 )));
795 }
796
797 let mut run_names = HashSet::with_capacity(manifest.runs.len());
798 for run in &manifest.runs {
799 validate_challenge_run(run)?;
800 if !run_names.insert(run.run_name.as_str()) {
801 return Err(ServiceError::Validation(format!(
802 "run manifest contains duplicate run_name `{}`",
803 run.run_name
804 )));
805 }
806 }
807
808 Ok(())
809}
810
811fn validate_challenge_run(run: &ChallengeRunSpec) -> Result<()> {
813 if run.stdin_json.is_some() && run.stdin_text.is_some() {
814 return Err(ServiceError::Validation(
815 "runs[].stdin_json and runs[].stdin_text cannot both be present".to_string(),
816 ));
817 }
818 let mut input_paths = HashSet::with_capacity(run.input_files.len());
819 for input in &run.input_files {
820 validate_run_input_file(input)?;
821 if !input_paths.insert(input.path.as_str()) {
822 return Err(ServiceError::Validation(format!(
823 "runs[].input_files contains duplicate path `{}`",
824 input.path
825 )));
826 }
827 }
828 let mut output_paths = HashSet::with_capacity(run.output_files.len());
829 for path in &run.output_files {
830 if !output_paths.insert(path.as_str()) {
831 return Err(ServiceError::Validation(format!(
832 "runs[].output_files contains duplicate path `{path}`"
833 )));
834 }
835 }
836
837 Ok(())
838}
839
840fn validate_piped_stdio_session_manifest(manifest: &PipedStdioSessionManifest) -> Result<()> {
842 let mut input_paths = HashSet::with_capacity(manifest.input_files.len());
843 for input in &manifest.input_files {
844 validate_run_input_file(input)?;
845 if !input_paths.insert(input.path.as_str()) {
846 return Err(ServiceError::Validation(format!(
847 "session.input_files contains duplicate path `{}`",
848 input.path
849 )));
850 }
851 }
852
853 Ok(())
854}
855
856fn validate_run_input_file(input: &ChallengeRunInputFile) -> Result<()> {
858 let source_count = [
859 input.source_path.is_some(),
860 input.content.is_some(),
861 input.content_json.is_some(),
862 ]
863 .into_iter()
864 .filter(|present| *present)
865 .count();
866 if source_count > 1 {
867 return Err(ServiceError::Validation(
868 "runs[].input_files[] must declare only one of source_path, content, or content_json"
869 .to_string(),
870 ));
871 }
872 if source_count == 0 {
873 return Err(ServiceError::Validation(
874 "runs[].input_files[] must declare source_path, content, or content_json".to_string(),
875 ));
876 }
877
878 Ok(())
879}
880
881pub async fn validate_challenge_run_manifest_sources(
883 bundle_dir: &Path,
884 manifest: &ChallengeRunManifest,
885) -> Result<()> {
886 for run in &manifest.runs {
887 for input in &run.input_files {
888 if let Some(source_path) = &input.source_path {
889 let full_path = bundle_dir.join(source_path.as_path());
890 let meta = tokio::fs::symlink_metadata(&full_path).await.map_err(|_| {
891 ServiceError::Validation(format!(
892 "runs[].input_files[].source_path does not exist: {}",
893 full_path.display()
894 ))
895 })?;
896 if meta.file_type().is_symlink() {
897 return Err(ServiceError::Validation(format!(
898 "runs[].input_files[].source_path must not be a symlink: {}",
899 full_path.display()
900 )));
901 }
902 if !meta.is_file() {
903 return Err(ServiceError::Validation(format!(
904 "runs[].input_files[].source_path is not a file: {}",
905 full_path.display()
906 )));
907 }
908 }
909 }
910 }
911
912 Ok(())
913}
914
915pub async fn validate_piped_stdio_session_manifest_sources(
917 bundle_dir: &Path,
918 manifest: &PipedStdioSessionManifest,
919) -> Result<()> {
920 for input in &manifest.input_files {
921 if let Some(source_path) = &input.source_path {
922 let full_path = bundle_dir.join(source_path.as_path());
923 let meta = tokio::fs::symlink_metadata(&full_path).await.map_err(|_| {
924 ServiceError::Validation(format!(
925 "session.input_files[].source_path does not exist: {}",
926 full_path.display()
927 ))
928 })?;
929 if meta.file_type().is_symlink() {
930 return Err(ServiceError::Validation(format!(
931 "session.input_files[].source_path must not be a symlink: {}",
932 full_path.display()
933 )));
934 }
935 if !meta.is_file() {
936 return Err(ServiceError::Validation(format!(
937 "session.input_files[].source_path is not a file: {}",
938 full_path.display()
939 )));
940 }
941 }
942 }
943
944 Ok(())
945}
946
947fn execution_uses_static_official_locator(
949 execution: &agentics_domain::models::challenge::ChallengeExecutionSpec,
950) -> bool {
951 match execution {
952 agentics_domain::models::challenge::ChallengeExecutionSpec::SeparatedEvaluator(
953 execution,
954 ) => execution.official_runs.is_some(),
955 agentics_domain::models::challenge::ChallengeExecutionSpec::PipedStdio(execution) => {
956 execution.official_session.is_some()
957 }
958 agentics_domain::models::challenge::ChallengeExecutionSpec::CoexecutedBenchmark(_) => false,
959 }
960}
961
962fn validate_metric_schema(spec: &ChallengeBundleSpec) -> Result<()> {
964 let schema = &spec.metric_schema;
965 if schema.metrics.is_empty() {
966 return Err(ServiceError::Validation(
967 "metric_schema.metrics must not be empty".to_string(),
968 ));
969 }
970
971 let mut names = HashSet::with_capacity(schema.metrics.len());
972 for metric in &schema.metrics {
973 require_non_empty(&metric.label, "metric_schema.metrics[].label")?;
974 if let Some(unit) = &metric.unit {
975 require_non_empty(unit, "metric_schema.metrics[].unit")?;
976 }
977 if let Some(metric_description) = &metric.metric_description {
978 require_non_empty(
979 metric_description,
980 "metric_schema.metrics[].metric_description",
981 )?;
982 }
983 if !names.insert(metric.name.as_str()) {
984 return Err(ServiceError::Validation(format!(
985 "metric_schema.metrics contains duplicate name `{}`",
986 metric.name
987 )));
988 }
989 }
990
991 if !names.contains(schema.ranking.primary_metric_name.as_str()) {
992 return Err(ServiceError::Validation(format!(
993 "metric_schema.ranking.primary_metric_name references unknown metric `{}`",
994 schema.ranking.primary_metric_name
995 )));
996 }
997
998 let mut tie_breakers = HashSet::with_capacity(schema.ranking.tie_breaker_metric_names.len());
999 for metric_name in &schema.ranking.tie_breaker_metric_names {
1000 if metric_name == &schema.ranking.primary_metric_name {
1001 return Err(ServiceError::Validation(
1002 "metric_schema.ranking.tie_breaker_metric_names must not repeat the primary metric"
1003 .to_string(),
1004 ));
1005 }
1006 if !names.contains(metric_name.as_str()) {
1007 return Err(ServiceError::Validation(format!(
1008 "metric_schema.ranking.tie_breaker_metric_names references unknown metric `{metric_name}`"
1009 )));
1010 }
1011 if !tie_breakers.insert(metric_name.as_str()) {
1012 return Err(ServiceError::Validation(format!(
1013 "metric_schema.ranking.tie_breaker_metric_names contains duplicate metric `{metric_name}`"
1014 )));
1015 }
1016 }
1017
1018 Ok(())
1019}
1020
1021fn require_non_empty(value: &str, field: &str) -> Result<()> {
1023 text::require_non_empty(value, field)
1024}
1025
1026fn validate_garde<T>(value: &T, field: &str) -> Result<()>
1027where
1028 T: Validate<Context = ()>,
1029{
1030 value
1031 .validate()
1032 .map_err(|report| ServiceError::Validation(format_garde_report(field, &report)))
1033}
1034
1035fn format_garde_report(field: &str, report: &garde::Report) -> String {
1036 report
1037 .iter()
1038 .map(|(path, error)| {
1039 if path.is_empty() {
1040 format!("{field}: {error}")
1041 } else {
1042 format!("{field}.{path}: {error}")
1043 }
1044 })
1045 .collect::<Vec<_>>()
1046 .join("; ")
1047}
1048
1049#[cfg(test)]
1050mod tests;