Skip to main content

simular/cli/
schema.rs

1//! EMC schema validation.
2//!
3//! This module contains the EMC YAML schema validation logic.
4//! Extracted to enable comprehensive testing of validation rules.
5
6/// Validate an EMC YAML document against the schema.
7///
8/// Returns a tuple of (errors, warnings).
9/// Errors indicate schema violations that must be fixed.
10/// Warnings indicate missing recommended fields.
11///
12/// # Arguments
13///
14/// * `yaml` - The parsed YAML document to validate
15///
16/// # Returns
17///
18/// A tuple of (errors, warnings) vectors.
19#[must_use]
20pub fn validate_emc_schema(yaml: &serde_yaml::Value) -> (Vec<String>, Vec<String>) {
21    let mut errors = Vec::new();
22    let mut warnings = Vec::new();
23
24    check_required_top_level_fields(yaml, &mut errors);
25    check_identity_section(yaml, &mut errors, &mut warnings);
26    check_governing_equation_section(yaml, &mut errors);
27    check_analytical_derivation_section(yaml, &mut errors);
28    check_edd_sections(yaml, &mut warnings);
29
30    (errors, warnings)
31}
32
33fn check_required_top_level_fields(yaml: &serde_yaml::Value, errors: &mut Vec<String>) {
34    let required = [
35        "emc_version",
36        "emc_id",
37        "identity",
38        "governing_equation",
39        "analytical_derivation",
40        "domain_of_validity",
41    ];
42    for field in required {
43        if yaml.get(field).is_none() {
44            errors.push(format!("Missing required field: {field}"));
45        }
46    }
47}
48
49fn check_identity_section(
50    yaml: &serde_yaml::Value,
51    errors: &mut Vec<String>,
52    warnings: &mut Vec<String>,
53) {
54    let Some(identity) = yaml.get("identity") else {
55        return;
56    };
57    if identity.get("name").is_none() {
58        errors.push("Missing required field: identity.name".to_string());
59    }
60    if identity.get("version").is_none() {
61        warnings.push("Missing recommended field: identity.version".to_string());
62    }
63}
64
65fn check_governing_equation_section(yaml: &serde_yaml::Value, errors: &mut Vec<String>) {
66    let Some(eq) = yaml.get("governing_equation") else {
67        return;
68    };
69    if eq.get("latex").is_none() && eq.get("plain_text").is_none() {
70        errors.push("governing_equation must have 'latex' or 'plain_text'".to_string());
71    }
72}
73
74fn check_analytical_derivation_section(yaml: &serde_yaml::Value, errors: &mut Vec<String>) {
75    let Some(deriv) = yaml.get("analytical_derivation") else {
76        return;
77    };
78    if deriv.get("primary_citation").is_none() {
79        errors.push("Missing: analytical_derivation.primary_citation".to_string());
80    }
81}
82
83fn check_edd_sections(yaml: &serde_yaml::Value, warnings: &mut Vec<String>) {
84    if yaml.get("verification_tests").is_none() {
85        warnings.push("Missing EDD-required section: verification_tests".to_string());
86    }
87    if yaml.get("falsification_criteria").is_none() {
88        warnings.push("Missing EDD-required section: falsification_criteria".to_string());
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use serde_yaml::Value;
96
97    fn minimal_valid_emc() -> Value {
98        serde_yaml::from_str(
99            r#"
100            emc_version: "1.0"
101            emc_id: "test/example"
102            identity:
103              name: "Test EMC"
104              version: "1.0.0"
105            governing_equation:
106              latex: "E = mc^2"
107            analytical_derivation:
108              primary_citation: "Einstein, A. (1905)"
109            domain_of_validity:
110              description: "All domains"
111        "#,
112        )
113        .ok()
114        .unwrap_or(Value::Null)
115    }
116
117    #[test]
118    fn test_valid_emc_no_errors() {
119        let yaml = minimal_valid_emc();
120        let (errors, warnings) = validate_emc_schema(&yaml);
121        assert!(errors.is_empty(), "Unexpected errors: {errors:?}");
122        // Warnings for missing EDD sections are expected
123        assert_eq!(warnings.len(), 2);
124    }
125
126    #[test]
127    fn test_missing_emc_version() {
128        let yaml: Value = serde_yaml::from_str(
129            r#"
130            emc_id: "test/example"
131            identity:
132              name: "Test"
133            governing_equation:
134              latex: "x"
135            analytical_derivation:
136              primary_citation: "cite"
137            domain_of_validity: {}
138        "#,
139        )
140        .ok()
141        .unwrap_or(Value::Null);
142
143        let (errors, _) = validate_emc_schema(&yaml);
144        assert!(errors.iter().any(|e| e.contains("emc_version")));
145    }
146
147    #[test]
148    fn test_missing_emc_id() {
149        let yaml: Value = serde_yaml::from_str(
150            r#"
151            emc_version: "1.0"
152            identity:
153              name: "Test"
154            governing_equation:
155              latex: "x"
156            analytical_derivation:
157              primary_citation: "cite"
158            domain_of_validity: {}
159        "#,
160        )
161        .ok()
162        .unwrap_or(Value::Null);
163
164        let (errors, _) = validate_emc_schema(&yaml);
165        assert!(errors.iter().any(|e| e.contains("emc_id")));
166    }
167
168    #[test]
169    fn test_missing_identity() {
170        let yaml: Value = serde_yaml::from_str(
171            r#"
172            emc_version: "1.0"
173            emc_id: "test/example"
174            governing_equation:
175              latex: "x"
176            analytical_derivation:
177              primary_citation: "cite"
178            domain_of_validity: {}
179        "#,
180        )
181        .ok()
182        .unwrap_or(Value::Null);
183
184        let (errors, _) = validate_emc_schema(&yaml);
185        assert!(errors.iter().any(|e| e.contains("identity")));
186    }
187
188    #[test]
189    fn test_missing_identity_name() {
190        let yaml: Value = serde_yaml::from_str(
191            r#"
192            emc_version: "1.0"
193            emc_id: "test/example"
194            identity:
195              version: "1.0.0"
196            governing_equation:
197              latex: "x"
198            analytical_derivation:
199              primary_citation: "cite"
200            domain_of_validity: {}
201        "#,
202        )
203        .ok()
204        .unwrap_or(Value::Null);
205
206        let (errors, _) = validate_emc_schema(&yaml);
207        assert!(errors.iter().any(|e| e.contains("identity.name")));
208    }
209
210    #[test]
211    fn test_missing_identity_version_warning() {
212        let yaml: Value = serde_yaml::from_str(
213            r#"
214            emc_version: "1.0"
215            emc_id: "test/example"
216            identity:
217              name: "Test"
218            governing_equation:
219              latex: "x"
220            analytical_derivation:
221              primary_citation: "cite"
222            domain_of_validity: {}
223        "#,
224        )
225        .ok()
226        .unwrap_or(Value::Null);
227
228        let (errors, warnings) = validate_emc_schema(&yaml);
229        assert!(errors.is_empty());
230        assert!(warnings.iter().any(|w| w.contains("identity.version")));
231    }
232
233    #[test]
234    fn test_missing_governing_equation() {
235        let yaml: Value = serde_yaml::from_str(
236            r#"
237            emc_version: "1.0"
238            emc_id: "test/example"
239            identity:
240              name: "Test"
241            analytical_derivation:
242              primary_citation: "cite"
243            domain_of_validity: {}
244        "#,
245        )
246        .ok()
247        .unwrap_or(Value::Null);
248
249        let (errors, _) = validate_emc_schema(&yaml);
250        assert!(errors.iter().any(|e| e.contains("governing_equation")));
251    }
252
253    #[test]
254    fn test_governing_equation_no_latex_or_plaintext() {
255        let yaml: Value = serde_yaml::from_str(
256            r#"
257            emc_version: "1.0"
258            emc_id: "test/example"
259            identity:
260              name: "Test"
261            governing_equation:
262              description: "some equation"
263            analytical_derivation:
264              primary_citation: "cite"
265            domain_of_validity: {}
266        "#,
267        )
268        .ok()
269        .unwrap_or(Value::Null);
270
271        let (errors, _) = validate_emc_schema(&yaml);
272        assert!(errors
273            .iter()
274            .any(|e| e.contains("latex") || e.contains("plain_text")));
275    }
276
277    #[test]
278    fn test_governing_equation_with_plain_text_only() {
279        let yaml: Value = serde_yaml::from_str(
280            r#"
281            emc_version: "1.0"
282            emc_id: "test/example"
283            identity:
284              name: "Test"
285              version: "1.0"
286            governing_equation:
287              plain_text: "E equals mc squared"
288            analytical_derivation:
289              primary_citation: "cite"
290            domain_of_validity: {}
291        "#,
292        )
293        .ok()
294        .unwrap_or(Value::Null);
295
296        let (errors, _) = validate_emc_schema(&yaml);
297        // Should not have the latex/plain_text error
298        assert!(!errors
299            .iter()
300            .any(|e| e.contains("latex") || e.contains("plain_text")));
301    }
302
303    #[test]
304    fn test_missing_analytical_derivation() {
305        let yaml: Value = serde_yaml::from_str(
306            r#"
307            emc_version: "1.0"
308            emc_id: "test/example"
309            identity:
310              name: "Test"
311            governing_equation:
312              latex: "x"
313            domain_of_validity: {}
314        "#,
315        )
316        .ok()
317        .unwrap_or(Value::Null);
318
319        let (errors, _) = validate_emc_schema(&yaml);
320        assert!(errors.iter().any(|e| e.contains("analytical_derivation")));
321    }
322
323    #[test]
324    fn test_missing_primary_citation() {
325        let yaml: Value = serde_yaml::from_str(
326            r#"
327            emc_version: "1.0"
328            emc_id: "test/example"
329            identity:
330              name: "Test"
331            governing_equation:
332              latex: "x"
333            analytical_derivation:
334              secondary_sources: []
335            domain_of_validity: {}
336        "#,
337        )
338        .ok()
339        .unwrap_or(Value::Null);
340
341        let (errors, _) = validate_emc_schema(&yaml);
342        assert!(errors.iter().any(|e| e.contains("primary_citation")));
343    }
344
345    #[test]
346    fn test_missing_domain_of_validity() {
347        let yaml: Value = serde_yaml::from_str(
348            r#"
349            emc_version: "1.0"
350            emc_id: "test/example"
351            identity:
352              name: "Test"
353            governing_equation:
354              latex: "x"
355            analytical_derivation:
356              primary_citation: "cite"
357        "#,
358        )
359        .ok()
360        .unwrap_or(Value::Null);
361
362        let (errors, _) = validate_emc_schema(&yaml);
363        assert!(errors.iter().any(|e| e.contains("domain_of_validity")));
364    }
365
366    #[test]
367    fn test_edd_sections_warnings() {
368        let yaml = minimal_valid_emc();
369        let (_, warnings) = validate_emc_schema(&yaml);
370        assert!(warnings.iter().any(|w| w.contains("verification_tests")));
371        assert!(warnings
372            .iter()
373            .any(|w| w.contains("falsification_criteria")));
374    }
375
376    #[test]
377    fn test_with_verification_tests_no_warning() {
378        let yaml: Value = serde_yaml::from_str(
379            r#"
380            emc_version: "1.0"
381            emc_id: "test/example"
382            identity:
383              name: "Test"
384              version: "1.0"
385            governing_equation:
386              latex: "x"
387            analytical_derivation:
388              primary_citation: "cite"
389            domain_of_validity: {}
390            verification_tests:
391              - id: test1
392                description: "Test"
393        "#,
394        )
395        .ok()
396        .unwrap_or(Value::Null);
397
398        let (_, warnings) = validate_emc_schema(&yaml);
399        assert!(!warnings.iter().any(|w| w.contains("verification_tests")));
400    }
401
402    #[test]
403    fn test_with_falsification_criteria_no_warning() {
404        let yaml: Value = serde_yaml::from_str(
405            r#"
406            emc_version: "1.0"
407            emc_id: "test/example"
408            identity:
409              name: "Test"
410              version: "1.0"
411            governing_equation:
412              latex: "x"
413            analytical_derivation:
414              primary_citation: "cite"
415            domain_of_validity: {}
416            falsification_criteria:
417              - id: crit1
418                description: "Criteria"
419        "#,
420        )
421        .ok()
422        .unwrap_or(Value::Null);
423
424        let (_, warnings) = validate_emc_schema(&yaml);
425        assert!(!warnings
426            .iter()
427            .any(|w| w.contains("falsification_criteria")));
428    }
429
430    #[test]
431    fn test_empty_yaml() {
432        let yaml = Value::Null;
433        let (errors, _) = validate_emc_schema(&yaml);
434        // Should have errors for all required fields
435        assert!(errors.len() >= 6);
436    }
437
438    #[test]
439    fn test_multiple_errors() {
440        let yaml: Value = serde_yaml::from_str(
441            r#"
442            identity:
443              version: "1.0"
444        "#,
445        )
446        .ok()
447        .unwrap_or(Value::Null);
448
449        let (errors, _) = validate_emc_schema(&yaml);
450        // Should have multiple errors
451        assert!(errors.len() >= 5);
452        assert!(errors.iter().any(|e| e.contains("emc_version")));
453        assert!(errors.iter().any(|e| e.contains("emc_id")));
454        assert!(errors.iter().any(|e| e.contains("governing_equation")));
455        assert!(errors.iter().any(|e| e.contains("analytical_derivation")));
456        assert!(errors.iter().any(|e| e.contains("domain_of_validity")));
457    }
458}