Skip to main content

openjd_model/template/
parse.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// Copyright by contributors to this project.
3// SPDX-License-Identifier: (Apache-2.0 OR MIT)
4
5//! Template parsing: YAML/JSON decoding and dispatch by specificationVersion.
6//!
7//! Mirrors Python `_parse.py`.
8
9use std::str::FromStr;
10
11use crate::error::{path_field, ModelError, ValidationErrors};
12use crate::template::constrained_strings::ExtensionName;
13use crate::template::validation as validate;
14use crate::template::{EnvironmentTemplate, JobTemplate};
15use crate::types::{
16    CallerLimits, Extensions, ModelExtension, SpecificationRevision, TemplateSpecificationVersion,
17    ValidationContext,
18};
19
20/// Document format.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum DocumentType {
23    Json,
24    Yaml,
25}
26
27/// Maximum structural nesting depth for template documents.
28///
29/// A valid OpenJD template reaches at most ~8 levels of nesting
30/// (e.g. `steps[0].script.embeddedFiles[0].data`). 128 is generous
31/// while preventing stack exhaustion from pathological inputs.
32///
33/// Matches `serde_json`'s hardcoded recursion limit so both formats
34/// behave identically on deeply nested input.
35pub const MAX_DOCUMENT_DEPTH: usize = 128;
36
37/// Parse a string into a generic YAML/JSON object.
38///
39/// When `caller_limits.max_template_size` is set, the document is rejected
40/// if its byte length exceeds the limit (checked before parsing).
41pub fn document_string_to_object(
42    document: &str,
43    doc_type: DocumentType,
44    caller_limits: &CallerLimits,
45) -> Result<serde_json::Value, ModelError> {
46    if let Some(max) = caller_limits.max_template_size {
47        if document.len() > max {
48            return Err(ModelError::ModelValidation(ValidationErrors::single(
49                format!(
50                    "Template document size ({} bytes) exceeds caller limit of {max} bytes.",
51                    document.len()
52                ),
53            )));
54        }
55    }
56
57    let parsed: serde_json::Value = match doc_type {
58        DocumentType::Json => serde_json::from_str(document).map_err(|e| {
59            ModelError::DecodeValidation(format!(
60                "The document is not a valid JSON document consisting of key-value pairs. {e}"
61            ))
62        })?,
63        DocumentType::Yaml => {
64            let options = serde_saphyr::options! {
65                strict_booleans: true,
66                budget: serde_saphyr::budget! {
67                    max_depth: MAX_DOCUMENT_DEPTH,
68                },
69            };
70            serde_saphyr::from_str_with_options(document, options).map_err(|e| {
71                ModelError::DecodeValidation(format!(
72                    "The document is not a valid YAML document consisting of key-value pairs. {e}"
73                ))
74            })?
75        }
76    };
77
78    if !parsed.is_object() {
79        return Err(ModelError::DecodeValidation(format!(
80            "The document is not a valid {doc_type:?} document consisting of key-value pairs."
81        )));
82    }
83
84    Ok(parsed)
85}
86
87/// Validate a template's `extensions` list against the library's known
88/// set and the caller's allowlist, accumulating problems into
89/// `errors`.
90///
91/// The returned [`Extensions`] contains every entry that was both
92/// recognized by [`ModelExtension`] and permitted by
93/// `supported_extensions`. Invalid entries don't stop the function;
94/// they're reported via `errors` and skipped, so the caller sees every
95/// problem in one validation pass.
96///
97/// Problems reported, each at path `extensions`:
98///
99/// * Empty list: `"if provided, must be a non-empty list."`
100/// * One or more duplicate names: a single aggregated message
101///   `"Duplicate values for extension name are not allowed. Duplicate values: A,B,C"`
102///   (values are sorted for stable output).
103/// * One or more unrecognized or not-permitted names: a single
104///   aggregated message
105///   `"Unsupported extension names: A, B, C"` (sorted).
106///
107/// The duplicate pass and the unsupported pass run independently —
108/// callers see errors from both when both apply, matching the Python
109/// Pydantic reference implementation.
110fn validate_extensions_list(
111    template_exts: Option<&[ExtensionName]>,
112    supported_extensions: Option<&[&str]>,
113    errors: &mut ValidationErrors,
114) -> Extensions {
115    let path = path_field(&[], "extensions");
116    let mut result = Extensions::new();
117
118    let Some(exts) = template_exts else {
119        return result;
120    };
121
122    if exts.is_empty() {
123        errors.add(&path, "if provided, must be a non-empty list.");
124        return result;
125    }
126
127    // Duplicate detection: collect all names that appear more than once,
128    // report them in a single message with a stable (sorted) order.
129    let mut seen: std::collections::HashSet<&str> = std::collections::HashSet::new();
130    let mut duplicates: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new();
131    for ext in exts {
132        let name = ext.as_str();
133        if !seen.insert(name) {
134            duplicates.insert(name);
135        }
136    }
137    if !duplicates.is_empty() {
138        let joined: Vec<&str> = duplicates.iter().copied().collect();
139        errors.add(
140            &path,
141            format!(
142                "Duplicate values for extension name are not allowed. Duplicate values: {}",
143                joined.join(",")
144            ),
145        );
146    }
147
148    // Support/recognition: a name is "supported" iff it's in the caller's
149    // allowlist AND is a recognized ModelExtension. Both checks collapse
150    // into a single "Unsupported extension names" message to match
151    // Python's wording and to avoid two near-identical errors for the
152    // common "caller didn't enable the extension" case.
153    let allowlist: std::collections::HashSet<&str> = supported_extensions
154        .unwrap_or(&[])
155        .iter()
156        .copied()
157        .collect();
158    let mut unsupported: std::collections::BTreeSet<&str> = std::collections::BTreeSet::new();
159    for ext in exts {
160        let name = ext.as_str();
161        match (
162            ModelExtension::from_str(name).ok(),
163            allowlist.contains(name),
164        ) {
165            (Some(known), true) => {
166                result.insert(known);
167            }
168            _ => {
169                unsupported.insert(name);
170            }
171        }
172    }
173    if !unsupported.is_empty() {
174        let joined: Vec<&str> = unsupported.iter().copied().collect();
175        errors.add(
176            &path,
177            format!("Unsupported extension names: {}", joined.join(", ")),
178        );
179    }
180
181    result
182}
183
184/// Decode and validate a job template from a YAML value.
185pub fn decode_job_template(
186    template: serde_json::Value,
187    supported_extensions: Option<&[&str]>,
188    caller_limits: &CallerLimits,
189) -> Result<JobTemplate, ModelError> {
190    // Extract specificationVersion
191    let version_str = template
192        .get("specificationVersion")
193        .and_then(|v| v.as_str())
194        .map(|s| s.to_string())
195        .ok_or_else(|| {
196            ModelError::DecodeValidation(
197                "Template is missing Open Job Description schema version key: specificationVersion"
198                    .to_string(),
199            )
200        })?;
201
202    let version = TemplateSpecificationVersion::from_str(&version_str)
203        .map_err(|_| {
204            let allowed = TemplateSpecificationVersion::JobTemplate2023_09.as_str();
205            ModelError::DecodeValidation(format!(
206                "Unknown template version: {version_str}. Values allowed for 'specificationVersion' in Job Templates are: {allowed}"
207            ))
208        })?;
209
210    if !version.is_job_template() {
211        let allowed = TemplateSpecificationVersion::JobTemplate2023_09.as_str();
212        return Err(ModelError::DecodeValidation(format!(
213            "Specification version '{version_str}' is not a Job Template version. \
214             Values allowed for 'specificationVersion' in Job Templates are: {allowed}"
215        )));
216    }
217
218    let jt: JobTemplate = match version.revision() {
219        // Future revisions may decode into a different struct layout.
220        // Making the match explicit now localizes the dispatch point.
221        SpecificationRevision::V2023_09 => serde_json::from_value(template).map_err(|e| {
222            ModelError::DecodeValidation(format!("'{version_str}' failed checks: {e}"))
223        })?,
224    };
225
226    // Build extension set with collect-all error reporting. Any problems
227    // (empty list, duplicates, unsupported names) are reported through
228    // `errors` with path `extensions` and aggregated messages.
229    let mut errors = ValidationErrors::default();
230    let extensions =
231        validate_extensions_list(jt.extensions.as_deref(), supported_extensions, &mut errors);
232    errors.into_result("JobTemplate")?;
233
234    // Route to the revision-specific validation pipeline via the
235    // revision-neutral dispatcher. The revision comes from the template's
236    // declared `specificationVersion`, not from a hardcoded constant.
237    let ctx = ValidationContext::with_extensions(version.revision(), extensions)
238        .with_caller_limits(caller_limits.clone());
239    validate::validate_job_template(&jt, &ctx)?;
240
241    Ok(jt)
242}
243
244/// Decode and validate an environment template from a YAML value.
245pub fn decode_environment_template(
246    template: serde_json::Value,
247    supported_extensions: Option<&[&str]>,
248) -> Result<EnvironmentTemplate, ModelError> {
249    let version_str = template
250        .get("specificationVersion")
251        .and_then(|v| v.as_str())
252        .map(|s| s.to_string())
253        .ok_or_else(|| {
254            ModelError::DecodeValidation(
255                "Template is missing Open Job Description schema version key: specificationVersion"
256                    .to_string(),
257            )
258        })?;
259
260    let version = TemplateSpecificationVersion::from_str(&version_str).map_err(|_| {
261        let allowed = TemplateSpecificationVersion::Environment2023_09.as_str();
262        ModelError::DecodeValidation(format!(
263            "Unknown template version: {version_str}. Allowed values are: {allowed}"
264        ))
265    })?;
266
267    if !version.is_environment_template() {
268        let allowed = TemplateSpecificationVersion::Environment2023_09.as_str();
269        return Err(ModelError::DecodeValidation(format!(
270            "Specification version '{version_str}' is not an Environment Template version. \
271             Allowed values for 'specificationVersion' are: {allowed}"
272        )));
273    }
274
275    let et: EnvironmentTemplate = match version.revision() {
276        // Future revisions may decode into a different struct layout.
277        // Making the match explicit now localizes the dispatch point,
278        // mirroring `decode_job_template`.
279        SpecificationRevision::V2023_09 => serde_json::from_value(template).map_err(|e| {
280            ModelError::DecodeValidation(format!("'{version_str}' failed checks: {e}"))
281        })?,
282    };
283
284    // Build extension set with collect-all error reporting. Same helper
285    // as decode_job_template; the error model name is different.
286    let mut errors = ValidationErrors::default();
287    let extensions =
288        validate_extensions_list(et.extensions.as_deref(), supported_extensions, &mut errors);
289    errors.into_result("EnvironmentTemplate")?;
290
291    let ctx = ValidationContext::with_extensions(version.revision(), extensions);
292    validate::validate_environment_template(&et, &ctx)?;
293
294    Ok(et)
295}
296
297/// Auto-detect template type and decode.
298// Both variants are large structs only used as return values, not stored in collections.
299#[allow(clippy::large_enum_variant)]
300#[derive(Debug)]
301pub enum DecodedTemplate {
302    Job(JobTemplate),
303    Environment(EnvironmentTemplate),
304}
305
306/// Auto-detect whether a template is a job or environment template and decode it.
307pub fn decode_template(
308    template: serde_json::Value,
309    supported_extensions: Option<&[&str]>,
310    caller_limits: &CallerLimits,
311) -> Result<DecodedTemplate, ModelError> {
312    let version_str = template
313        .get("specificationVersion")
314        .and_then(|v| v.as_str())
315        .map(|s| s.to_string())
316        .ok_or_else(|| {
317            ModelError::DecodeValidation(
318                "Template is missing Open Job Description schema version key: specificationVersion"
319                    .to_string(),
320            )
321        })?;
322
323    let version = version_str
324        .parse::<TemplateSpecificationVersion>()
325        .map_err(|_| {
326            ModelError::DecodeValidation(format!("Unknown template version: {version_str}"))
327        })?;
328
329    if version.is_job_template() {
330        decode_job_template(template, supported_extensions, caller_limits).map(DecodedTemplate::Job)
331    } else {
332        decode_environment_template(template, supported_extensions)
333            .map(DecodedTemplate::Environment)
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    fn yaml_val(s: &str) -> serde_json::Value {
342        serde_saphyr::from_str(s).unwrap()
343    }
344
345    // -- document_string_to_object --
346
347    #[test]
348    fn test_doc_string_to_object_json() {
349        let result = document_string_to_object(
350            r#"{"key": "value"}"#,
351            DocumentType::Json,
352            &CallerLimits::default(),
353        )
354        .unwrap();
355        assert_eq!(result["key"].as_str().unwrap(), "value");
356    }
357
358    #[test]
359    fn test_doc_string_to_object_yaml() {
360        let result =
361            document_string_to_object("key: value\n", DocumentType::Yaml, &CallerLimits::default())
362                .unwrap();
363        assert_eq!(result["key"].as_str().unwrap(), "value");
364    }
365
366    #[test]
367    fn test_doc_string_not_a_dict_json() {
368        assert!(document_string_to_object(
369            "[1, 2, 3]",
370            DocumentType::Json,
371            &CallerLimits::default()
372        )
373        .is_err());
374    }
375
376    #[test]
377    fn test_doc_string_not_a_dict_yaml() {
378        assert!(document_string_to_object(
379            "- 1\n- 2\n",
380            DocumentType::Yaml,
381            &CallerLimits::default()
382        )
383        .is_err());
384    }
385
386    #[test]
387    fn test_doc_string_bad_parse_json() {
388        assert!(
389            document_string_to_object("{", DocumentType::Json, &CallerLimits::default()).is_err()
390        );
391    }
392
393    #[test]
394    fn test_doc_string_bad_parse_yaml() {
395        assert!(
396            document_string_to_object("-", DocumentType::Yaml, &CallerLimits::default()).is_err()
397        );
398    }
399
400    // -- decode_job_template --
401
402    #[test]
403    fn test_decode_job_template_missing_spec_version() {
404        let v = yaml_val(r#"{"notspecversion": "badvalue"}"#);
405        assert!(decode_job_template(v, None, &CallerLimits::default()).is_err());
406    }
407
408    #[test]
409    fn test_decode_job_template_unknown_version() {
410        let v = yaml_val(r#"{"specificationVersion": "badvalue"}"#);
411        assert!(decode_job_template(v, None, &CallerLimits::default()).is_err());
412    }
413
414    #[test]
415    fn test_decode_job_template_not_job_version() {
416        let v = yaml_val(r#"{"specificationVersion": "environment-2023-09"}"#);
417        assert!(decode_job_template(v, None, &CallerLimits::default()).is_err());
418    }
419
420    #[test]
421    fn test_decode_job_template_success() {
422        let v = yaml_val(
423            r#"{
424            "specificationVersion": "jobtemplate-2023-09",
425            "name": "name",
426            "steps": [{"name": "step", "script": {"actions": {"onRun": {"command": "do thing"}}}}]
427        }"#,
428        );
429        let jt = decode_job_template(v, None, &CallerLimits::default()).unwrap();
430        assert_eq!(jt.specification_version, "jobtemplate-2023-09");
431    }
432
433    // -- decode_environment_template --
434
435    #[test]
436    fn test_decode_env_template_missing_spec_version() {
437        let v = yaml_val(r#"{"notspecversion": "badvalue"}"#);
438        assert!(decode_environment_template(v, None).is_err());
439    }
440
441    #[test]
442    fn test_decode_env_template_unknown_version() {
443        let v = yaml_val(r#"{"specificationVersion": "badvalue"}"#);
444        assert!(decode_environment_template(v, None).is_err());
445    }
446
447    #[test]
448    fn test_decode_env_template_not_env_version() {
449        let v = yaml_val(r#"{"specificationVersion": "jobtemplate-2023-09"}"#);
450        assert!(decode_environment_template(v, None).is_err());
451    }
452
453    #[test]
454    fn test_decode_env_template_success() {
455        let v = yaml_val(
456            r#"{
457            "specificationVersion": "environment-2023-09",
458            "environment": {
459                "name": "FooEnv",
460                "description": "A description",
461                "script": {"actions": {"onEnter": {"command": "echo", "args": ["Hello", "World"]}}}
462            }
463        }"#,
464        );
465        let et = decode_environment_template(v, None).unwrap();
466        assert_eq!(et.specification_version, "environment-2023-09");
467    }
468
469    // -- decode_template (auto-detect) --
470
471    #[test]
472    fn test_decode_template_auto_detect_job() {
473        let v = yaml_val(
474            r#"{
475            "specificationVersion": "jobtemplate-2023-09",
476            "name": "name",
477            "steps": [{"name": "step", "script": {"actions": {"onRun": {"command": "do thing"}}}}]
478        }"#,
479        );
480        assert!(matches!(
481            decode_template(v, None, &CallerLimits::default()).unwrap(),
482            DecodedTemplate::Job(_)
483        ));
484    }
485
486    #[test]
487    fn test_decode_template_auto_detect_env() {
488        let v = yaml_val(
489            r#"{
490            "specificationVersion": "environment-2023-09",
491            "environment": {
492                "name": "FooEnv",
493                "description": "A description",
494                "script": {"actions": {"onEnter": {"command": "echo", "args": ["Hello", "World"]}}}
495            }
496        }"#,
497        );
498        assert!(matches!(
499            decode_template(v, None, &CallerLimits::default()).unwrap(),
500            DecodedTemplate::Environment(_)
501        ));
502    }
503
504    #[test]
505    fn test_decode_template_missing_version() {
506        let v = yaml_val(r#"{"name": "test"}"#);
507        let err = decode_template(v, None, &CallerLimits::default()).unwrap_err();
508        assert!(err.to_string().contains("specificationVersion"));
509    }
510
511    #[test]
512    fn test_decode_template_unknown_version() {
513        let v = yaml_val(r#"{"specificationVersion": "badvalue"}"#);
514        let err = decode_template(v, None, &CallerLimits::default()).unwrap_err();
515        assert!(err.to_string().contains("Unknown template version"));
516    }
517
518    // ══════════════════════════════════════════════════════════════
519    // ══════════════════════════════════════════════════════════════
520    // ModelValidation structured errors via decode_job_template
521    // ══════════════════════════════════════════════════════════════
522    #[test]
523    fn validation_error_has_structured_paths() {
524        // Step name exceeds 64 chars — triggers ModelValidation
525        let long_name = "a".repeat(128);
526        let v = yaml_val(&format!(
527            r#"{{
528            "specificationVersion": "jobtemplate-2023-09",
529            "name": "test",
530            "steps": [{{"name": "{long_name}", "script": {{"actions": {{"onRun": {{"command": "echo"}}}}}}}}]
531        }}"#,
532        ));
533        let err = decode_job_template(v, None, &Default::default()).unwrap_err();
534        let errors = match &err {
535            crate::error::ModelError::ModelValidation(e) => e,
536            other => panic!("expected ModelValidation, got: {other}"),
537        };
538        assert_eq!(errors.len(), 1);
539        let e = &errors.errors[0];
540        assert_eq!(
541            e.path,
542            vec![
543                crate::error::PathElement::Field("steps".into()),
544                crate::error::PathElement::Index(0),
545                crate::error::PathElement::Field("name".into()),
546            ]
547        );
548        assert!(
549            e.message.contains("64"),
550            "expected message about 64-char limit, got: {}",
551            e.message
552        );
553        // Display output matches the Pydantic-compatible format
554        assert_eq!(
555            err.to_string(),
556            format!(
557                "Model validation error: 1 validation error for JobTemplate\nsteps[0] -> name:\n\t{}",
558                e.message
559            )
560        );
561    }
562
563    #[test]
564    fn validation_error_paths_contain_steps() {
565        // Missing 'script' — step has no actions
566        let v = yaml_val(
567            r#"{
568            "specificationVersion": "jobtemplate-2023-09",
569            "name": "test",
570            "steps": [{"name": "s"}]
571        }"#,
572        );
573        let err = decode_job_template(v, None, &Default::default()).unwrap_err();
574        let errors = match &err {
575            crate::error::ModelError::ModelValidation(e) => e,
576            other => panic!("expected ModelValidation, got: {other}"),
577        };
578        assert!(!errors.is_empty());
579        // Every error should reference steps[0]
580        for e in &errors.errors {
581            assert!(
582                e.path.len() >= 2,
583                "expected path with at least 2 elements, got: {:?}",
584                e.path
585            );
586            assert_eq!(e.path[0], crate::error::PathElement::Field("steps".into()),);
587            assert_eq!(e.path[1], crate::error::PathElement::Index(0),);
588        }
589    }
590}