Skip to main content

simular/cli/
schema.rs

1//! EMC schema validation.
2//!
3//! This module contains the EMC YAML schema validation logic.
4//! Extracted to enable comprehensive testing of validation rules.
5
6/// Validate an EMC YAML document against the schema.
7///
8/// Returns a tuple of (errors, warnings).
9/// Errors indicate schema violations that must be fixed.
10/// Warnings indicate missing recommended fields.
11///
12/// # Arguments
13///
14/// * `yaml` - The parsed YAML document to validate
15///
16/// # Returns
17///
18/// A tuple of (errors, warnings) vectors.
19#[must_use]
20pub fn validate_emc_schema(yaml: &serde_yaml::Value) -> (Vec<String>, Vec<String>) {
21    let mut errors = Vec::new();
22    let mut warnings = Vec::new();
23
24    // Required top-level fields
25    let required = [
26        "emc_version",
27        "emc_id",
28        "identity",
29        "governing_equation",
30        "analytical_derivation",
31        "domain_of_validity",
32    ];
33
34    for field in required {
35        if yaml.get(field).is_none() {
36            errors.push(format!("Missing required field: {field}"));
37        }
38    }
39
40    // Validate identity section
41    if let Some(identity) = yaml.get("identity") {
42        if identity.get("name").is_none() {
43            errors.push("Missing required field: identity.name".to_string());
44        }
45        if identity.get("version").is_none() {
46            warnings.push("Missing recommended field: identity.version".to_string());
47        }
48    }
49
50    // Validate governing_equation section
51    if let Some(eq) = yaml.get("governing_equation") {
52        if eq.get("latex").is_none() && eq.get("plain_text").is_none() {
53            errors.push("governing_equation must have 'latex' or 'plain_text'".to_string());
54        }
55    }
56
57    // Validate analytical_derivation section
58    if let Some(deriv) = yaml.get("analytical_derivation") {
59        if deriv.get("primary_citation").is_none() {
60            errors.push("Missing: analytical_derivation.primary_citation".to_string());
61        }
62    }
63
64    // EDD-required sections (warnings only, not hard errors)
65    if yaml.get("verification_tests").is_none() {
66        warnings.push("Missing EDD-required section: verification_tests".to_string());
67    }
68    if yaml.get("falsification_criteria").is_none() {
69        warnings.push("Missing EDD-required section: falsification_criteria".to_string());
70    }
71
72    (errors, warnings)
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78    use serde_yaml::Value;
79
80    fn minimal_valid_emc() -> Value {
81        serde_yaml::from_str(
82            r#"
83            emc_version: "1.0"
84            emc_id: "test/example"
85            identity:
86              name: "Test EMC"
87              version: "1.0.0"
88            governing_equation:
89              latex: "E = mc^2"
90            analytical_derivation:
91              primary_citation: "Einstein, A. (1905)"
92            domain_of_validity:
93              description: "All domains"
94        "#,
95        )
96        .ok()
97        .unwrap_or(Value::Null)
98    }
99
100    #[test]
101    fn test_valid_emc_no_errors() {
102        let yaml = minimal_valid_emc();
103        let (errors, warnings) = validate_emc_schema(&yaml);
104        assert!(errors.is_empty(), "Unexpected errors: {errors:?}");
105        // Warnings for missing EDD sections are expected
106        assert_eq!(warnings.len(), 2);
107    }
108
109    #[test]
110    fn test_missing_emc_version() {
111        let yaml: Value = serde_yaml::from_str(
112            r#"
113            emc_id: "test/example"
114            identity:
115              name: "Test"
116            governing_equation:
117              latex: "x"
118            analytical_derivation:
119              primary_citation: "cite"
120            domain_of_validity: {}
121        "#,
122        )
123        .ok()
124        .unwrap_or(Value::Null);
125
126        let (errors, _) = validate_emc_schema(&yaml);
127        assert!(errors.iter().any(|e| e.contains("emc_version")));
128    }
129
130    #[test]
131    fn test_missing_emc_id() {
132        let yaml: Value = serde_yaml::from_str(
133            r#"
134            emc_version: "1.0"
135            identity:
136              name: "Test"
137            governing_equation:
138              latex: "x"
139            analytical_derivation:
140              primary_citation: "cite"
141            domain_of_validity: {}
142        "#,
143        )
144        .ok()
145        .unwrap_or(Value::Null);
146
147        let (errors, _) = validate_emc_schema(&yaml);
148        assert!(errors.iter().any(|e| e.contains("emc_id")));
149    }
150
151    #[test]
152    fn test_missing_identity() {
153        let yaml: Value = serde_yaml::from_str(
154            r#"
155            emc_version: "1.0"
156            emc_id: "test/example"
157            governing_equation:
158              latex: "x"
159            analytical_derivation:
160              primary_citation: "cite"
161            domain_of_validity: {}
162        "#,
163        )
164        .ok()
165        .unwrap_or(Value::Null);
166
167        let (errors, _) = validate_emc_schema(&yaml);
168        assert!(errors.iter().any(|e| e.contains("identity")));
169    }
170
171    #[test]
172    fn test_missing_identity_name() {
173        let yaml: Value = serde_yaml::from_str(
174            r#"
175            emc_version: "1.0"
176            emc_id: "test/example"
177            identity:
178              version: "1.0.0"
179            governing_equation:
180              latex: "x"
181            analytical_derivation:
182              primary_citation: "cite"
183            domain_of_validity: {}
184        "#,
185        )
186        .ok()
187        .unwrap_or(Value::Null);
188
189        let (errors, _) = validate_emc_schema(&yaml);
190        assert!(errors.iter().any(|e| e.contains("identity.name")));
191    }
192
193    #[test]
194    fn test_missing_identity_version_warning() {
195        let yaml: Value = serde_yaml::from_str(
196            r#"
197            emc_version: "1.0"
198            emc_id: "test/example"
199            identity:
200              name: "Test"
201            governing_equation:
202              latex: "x"
203            analytical_derivation:
204              primary_citation: "cite"
205            domain_of_validity: {}
206        "#,
207        )
208        .ok()
209        .unwrap_or(Value::Null);
210
211        let (errors, warnings) = validate_emc_schema(&yaml);
212        assert!(errors.is_empty());
213        assert!(warnings.iter().any(|w| w.contains("identity.version")));
214    }
215
216    #[test]
217    fn test_missing_governing_equation() {
218        let yaml: Value = serde_yaml::from_str(
219            r#"
220            emc_version: "1.0"
221            emc_id: "test/example"
222            identity:
223              name: "Test"
224            analytical_derivation:
225              primary_citation: "cite"
226            domain_of_validity: {}
227        "#,
228        )
229        .ok()
230        .unwrap_or(Value::Null);
231
232        let (errors, _) = validate_emc_schema(&yaml);
233        assert!(errors.iter().any(|e| e.contains("governing_equation")));
234    }
235
236    #[test]
237    fn test_governing_equation_no_latex_or_plaintext() {
238        let yaml: Value = serde_yaml::from_str(
239            r#"
240            emc_version: "1.0"
241            emc_id: "test/example"
242            identity:
243              name: "Test"
244            governing_equation:
245              description: "some equation"
246            analytical_derivation:
247              primary_citation: "cite"
248            domain_of_validity: {}
249        "#,
250        )
251        .ok()
252        .unwrap_or(Value::Null);
253
254        let (errors, _) = validate_emc_schema(&yaml);
255        assert!(errors
256            .iter()
257            .any(|e| e.contains("latex") || e.contains("plain_text")));
258    }
259
260    #[test]
261    fn test_governing_equation_with_plain_text_only() {
262        let yaml: Value = serde_yaml::from_str(
263            r#"
264            emc_version: "1.0"
265            emc_id: "test/example"
266            identity:
267              name: "Test"
268              version: "1.0"
269            governing_equation:
270              plain_text: "E equals mc squared"
271            analytical_derivation:
272              primary_citation: "cite"
273            domain_of_validity: {}
274        "#,
275        )
276        .ok()
277        .unwrap_or(Value::Null);
278
279        let (errors, _) = validate_emc_schema(&yaml);
280        // Should not have the latex/plain_text error
281        assert!(!errors
282            .iter()
283            .any(|e| e.contains("latex") || e.contains("plain_text")));
284    }
285
286    #[test]
287    fn test_missing_analytical_derivation() {
288        let yaml: Value = serde_yaml::from_str(
289            r#"
290            emc_version: "1.0"
291            emc_id: "test/example"
292            identity:
293              name: "Test"
294            governing_equation:
295              latex: "x"
296            domain_of_validity: {}
297        "#,
298        )
299        .ok()
300        .unwrap_or(Value::Null);
301
302        let (errors, _) = validate_emc_schema(&yaml);
303        assert!(errors.iter().any(|e| e.contains("analytical_derivation")));
304    }
305
306    #[test]
307    fn test_missing_primary_citation() {
308        let yaml: Value = serde_yaml::from_str(
309            r#"
310            emc_version: "1.0"
311            emc_id: "test/example"
312            identity:
313              name: "Test"
314            governing_equation:
315              latex: "x"
316            analytical_derivation:
317              secondary_sources: []
318            domain_of_validity: {}
319        "#,
320        )
321        .ok()
322        .unwrap_or(Value::Null);
323
324        let (errors, _) = validate_emc_schema(&yaml);
325        assert!(errors.iter().any(|e| e.contains("primary_citation")));
326    }
327
328    #[test]
329    fn test_missing_domain_of_validity() {
330        let yaml: Value = serde_yaml::from_str(
331            r#"
332            emc_version: "1.0"
333            emc_id: "test/example"
334            identity:
335              name: "Test"
336            governing_equation:
337              latex: "x"
338            analytical_derivation:
339              primary_citation: "cite"
340        "#,
341        )
342        .ok()
343        .unwrap_or(Value::Null);
344
345        let (errors, _) = validate_emc_schema(&yaml);
346        assert!(errors.iter().any(|e| e.contains("domain_of_validity")));
347    }
348
349    #[test]
350    fn test_edd_sections_warnings() {
351        let yaml = minimal_valid_emc();
352        let (_, warnings) = validate_emc_schema(&yaml);
353        assert!(warnings.iter().any(|w| w.contains("verification_tests")));
354        assert!(warnings
355            .iter()
356            .any(|w| w.contains("falsification_criteria")));
357    }
358
359    #[test]
360    fn test_with_verification_tests_no_warning() {
361        let yaml: Value = serde_yaml::from_str(
362            r#"
363            emc_version: "1.0"
364            emc_id: "test/example"
365            identity:
366              name: "Test"
367              version: "1.0"
368            governing_equation:
369              latex: "x"
370            analytical_derivation:
371              primary_citation: "cite"
372            domain_of_validity: {}
373            verification_tests:
374              - id: test1
375                description: "Test"
376        "#,
377        )
378        .ok()
379        .unwrap_or(Value::Null);
380
381        let (_, warnings) = validate_emc_schema(&yaml);
382        assert!(!warnings.iter().any(|w| w.contains("verification_tests")));
383    }
384
385    #[test]
386    fn test_with_falsification_criteria_no_warning() {
387        let yaml: Value = serde_yaml::from_str(
388            r#"
389            emc_version: "1.0"
390            emc_id: "test/example"
391            identity:
392              name: "Test"
393              version: "1.0"
394            governing_equation:
395              latex: "x"
396            analytical_derivation:
397              primary_citation: "cite"
398            domain_of_validity: {}
399            falsification_criteria:
400              - id: crit1
401                description: "Criteria"
402        "#,
403        )
404        .ok()
405        .unwrap_or(Value::Null);
406
407        let (_, warnings) = validate_emc_schema(&yaml);
408        assert!(!warnings
409            .iter()
410            .any(|w| w.contains("falsification_criteria")));
411    }
412
413    #[test]
414    fn test_empty_yaml() {
415        let yaml = Value::Null;
416        let (errors, _) = validate_emc_schema(&yaml);
417        // Should have errors for all required fields
418        assert!(errors.len() >= 6);
419    }
420
421    #[test]
422    fn test_multiple_errors() {
423        let yaml: Value = serde_yaml::from_str(
424            r#"
425            identity:
426              version: "1.0"
427        "#,
428        )
429        .ok()
430        .unwrap_or(Value::Null);
431
432        let (errors, _) = validate_emc_schema(&yaml);
433        // Should have multiple errors
434        assert!(errors.len() >= 5);
435        assert!(errors.iter().any(|e| e.contains("emc_version")));
436        assert!(errors.iter().any(|e| e.contains("emc_id")));
437        assert!(errors.iter().any(|e| e.contains("governing_equation")));
438        assert!(errors.iter().any(|e| e.contains("analytical_derivation")));
439        assert!(errors.iter().any(|e| e.contains("domain_of_validity")));
440    }
441}