Skip to main content

yaml_schema/
loader.rs

1//! The loader module loads the YAML schema from a file into the in-memory model
2
3use std::time::Duration;
4
5use reqwest::Url;
6use reqwest::blocking::Client;
7use saphyr::LoadableYamlNode;
8use saphyr::MarkedYaml;
9use saphyr::Scalar;
10use saphyr::YamlData;
11
12use crate::Error;
13use crate::Number;
14use crate::Result;
15use crate::RootSchema;
16use crate::schemas::BooleanOrSchema;
17use crate::schemas::YamlSchema;
18use crate::utils::format_marker;
19use crate::utils::try_unwrap_saphyr_scalar;
20
21/// Load a YAML schema from a file.
22/// Delegates to the `load_from_doc` function to load the schema from the first document.
23pub fn load_file<'f, S: AsRef<str>>(path: S) -> Result<RootSchema<'f>> {
24    let fs_metadata = std::fs::metadata(path.as_ref())?;
25    if !fs_metadata.is_file() {
26        return Err(Error::FileNotFound(path.as_ref().to_string()));
27    }
28    let s = std::fs::read_to_string(path.as_ref())?;
29    load_from_str(&s)
30}
31
32/// Load a YAML schema from a &str.
33pub fn load_from_str<'f>(s: &str) -> Result<RootSchema<'f>> {
34    let docs = MarkedYaml::load_from_str(s).map_err(Error::YamlParsingError)?;
35    load_from_docs(docs)
36}
37
38/// Load a RootSchema from Vec of docs.
39pub fn load_from_docs<'f>(docs: Vec<MarkedYaml<'f>>) -> Result<RootSchema<'f>> {
40    let Some(first_doc) = docs.first() else {
41        return Ok(RootSchema::empty());
42    };
43    load_from_doc(first_doc)
44}
45
46/// Load a YAML schema from a document. Basically just a wrapper around the TryFrom<&MarkedYaml<'_>> for RootSchema.
47pub fn load_from_doc<'f>(doc: &MarkedYaml<'f>) -> Result<RootSchema<'f>> {
48    RootSchema::try_from(doc)
49}
50
51/// Error type for URL loading operations
52#[derive(thiserror::Error, Debug)]
53pub enum UrlLoadError {
54    #[error("Failed to download from URL: {0}")]
55    DownloadError(#[from] reqwest::Error),
56
57    #[error("Failed to parse URL: {0}")]
58    ParseUrlError(#[from] url::ParseError),
59
60    #[error("Failed to parse YAML: {0}")]
61    ParseError(#[from] saphyr::ScanError),
62
63    #[error("No YAML documents found in the downloaded content")]
64    NoDocuments,
65}
66
67impl From<reqwest::Error> for crate::Error {
68    fn from(value: reqwest::Error) -> Self {
69        crate::Error::UrlLoadError(UrlLoadError::DownloadError(value))
70    }
71}
72
73/// Downloads a YAML schema from a URL and parses it into a YamlSchema
74///
75/// # Arguments
76/// * `url` - The URL to download the YAML schema from
77/// * `timeout_seconds` - Optional timeout in seconds for the HTTP request (default: 30 seconds)
78///
79/// # Returns
80/// A `Result` containing the parsed `YamlSchema` if successful, or an error if the download or parsing fails.
81///
82/// # Example
83/// ```no_run
84/// use yaml_schema::loader::download_from_url;
85///
86/// let schema = download_from_url("https://example.com/schema.yaml", None).unwrap();
87/// ```
88pub fn download_from_url(url_string: &str, timeout_seconds: Option<u64>) -> Result<RootSchema<'_>> {
89    // Create a new HTTP client with a custom timeout
90    let timeout = Duration::from_secs(timeout_seconds.unwrap_or(30));
91    let client = Client::builder()
92        .timeout(timeout)
93        .use_native_tls()
94        .build()?;
95
96    let url = Url::parse(url_string).map_err(|e| Error::UrlLoadError(e.into()))?;
97
98    // Download the YAML content
99    let response = client.get(url).send()?;
100    if !response.status().is_success() {
101        match response.error_for_status() {
102            Ok(_) => unreachable!(),
103            Err(e) => return Err(e.into()),
104        }
105    }
106
107    let yaml_content = response.text()?;
108
109    // Parse the YAML content
110    let docs = MarkedYaml::load_from_str(&yaml_content).map_err(UrlLoadError::ParseError)?;
111
112    match docs.first() {
113        Some(doc) => load_from_doc(doc),
114        None => Err(UrlLoadError::NoDocuments.into()),
115    }
116}
117
118pub fn marked_yaml_to_string<S: Into<String> + Copy>(yaml: &MarkedYaml, msg: S) -> Result<String> {
119    if let YamlData::Value(Scalar::String(s)) = &yaml.data {
120        Ok(s.to_string())
121    } else {
122        Err(Error::ExpectedScalar(msg.into()))
123    }
124}
125
126pub fn load_array_of_schemas_marked<'f>(value: &MarkedYaml<'f>) -> Result<Vec<YamlSchema<'f>>> {
127    if let YamlData::Sequence(values) = &value.data {
128        values
129            .iter()
130            .map(|v| {
131                if v.is_mapping() {
132                    v.try_into()
133                } else {
134                    Err(generic_error!("Expected a mapping, but got: {:?}", v))
135                }
136            })
137            .collect::<Result<Vec<YamlSchema>>>()
138    } else {
139        Err(generic_error!(
140            "{} Expected a sequence, but got: {:?}",
141            format_marker(&value.span.start),
142            value
143        ))
144    }
145}
146
147pub fn load_integer(value: &saphyr::Yaml) -> Result<i64> {
148    let scalar = try_unwrap_saphyr_scalar(value)?;
149    match scalar {
150        saphyr::Scalar::Integer(i) => Ok(*i),
151        _ => Err(unsupported_type!(
152            "Expected type: integer, but got: {:?}",
153            value
154        )),
155    }
156}
157
158pub fn load_integer_marked(value: &MarkedYaml) -> Result<i64> {
159    if let YamlData::Value(Scalar::Integer(i)) = &value.data {
160        Ok(*i)
161    } else {
162        Err(generic_error!(
163            "{} Expected integer value, got: {:?}",
164            format_marker(&value.span.start),
165            value
166        ))
167    }
168}
169
170pub fn load_number(value: &saphyr::Yaml) -> Result<Number> {
171    let scalar = try_unwrap_saphyr_scalar(value)?;
172    match scalar {
173        Scalar::Integer(i) => Ok(Number::integer(*i)),
174        Scalar::FloatingPoint(o) => Ok(Number::float(o.into_inner())),
175        _ => Err(unsupported_type!(
176            "Expected type: integer or float, but got: {:?}",
177            value
178        )),
179    }
180}
181
182pub fn load_array_items_marked<'input>(
183    value: &MarkedYaml<'input>,
184) -> Result<BooleanOrSchema<'input>> {
185    match &value.data {
186        YamlData::Value(scalar) => {
187            if let Scalar::Boolean(b) = scalar {
188                Ok(BooleanOrSchema::Boolean(*b))
189            } else {
190                Err(generic_error!(
191                    "array: boolean or mapping with type or $ref, but got: {:?}",
192                    value
193                ))
194            }
195        }
196        YamlData::Mapping(_mapping) => {
197            let schema: YamlSchema = value.try_into()?;
198            Ok(BooleanOrSchema::schema(schema))
199        }
200        _ => Err(generic_error!(
201            "array: boolean or mapping with type or $ref, but got: {:?}",
202            value
203        )),
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use regex::Regex;
210    use saphyr::LoadableYamlNode;
211    use saphyr::MarkedYaml;
212
213    use crate::ConstValue;
214    use crate::Engine;
215    use crate::Result;
216    use crate::Validator as _;
217    use crate::loader;
218    use crate::schemas::EnumSchema;
219    use crate::schemas::IntegerSchema;
220    use crate::schemas::SchemaType;
221    use crate::schemas::StringSchema;
222
223    use super::*;
224
225    #[test]
226    fn test_boolean_literal_true() {
227        let root_schema = load_from_doc(&MarkedYaml::value_from_str("true")).unwrap();
228        assert_eq!(root_schema.schema, YamlSchema::BooleanLiteral(true));
229    }
230
231    #[test]
232    fn test_boolean_literal_false() {
233        let root_schema = load_from_doc(&MarkedYaml::value_from_str("false")).unwrap();
234        assert_eq!(root_schema.schema, YamlSchema::BooleanLiteral(false));
235    }
236
237    #[test]
238    fn test_const_string() {
239        let docs = MarkedYaml::load_from_str("const: string value").unwrap();
240        let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
241        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
242            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
243        };
244        assert_eq!(subschema.r#const, Some(ConstValue::string("string value")));
245    }
246
247    #[test]
248    fn test_const_integer() {
249        let docs = MarkedYaml::load_from_str("const: 42").unwrap();
250        let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
251        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
252            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
253        };
254        assert_eq!(subschema.r#const, Some(ConstValue::integer(42)));
255    }
256
257    #[test]
258    fn test_const_array() {
259        let docs = MarkedYaml::load_from_str("const: [1, 2]").unwrap();
260        let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
261        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
262            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
263        };
264        let expected = ConstValue::Array(vec![ConstValue::integer(1), ConstValue::integer(2)]);
265        assert_eq!(subschema.r#const, Some(expected));
266    }
267
268    #[test]
269    fn test_const_object() {
270        let docs = MarkedYaml::load_from_str("const:\n  a: 1").unwrap();
271        let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
272        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
273            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
274        };
275        let mut expected_obj = hashlink::LinkedHashMap::new();
276        expected_obj.insert("a".into(), ConstValue::integer(1));
277        assert_eq!(subschema.r#const, Some(ConstValue::Object(expected_obj)));
278    }
279
280    #[test]
281    fn test_type_foo_should_error() {
282        let docs = MarkedYaml::load_from_str("type: foo").unwrap();
283        let root_schema = load_from_doc(docs.first().unwrap());
284        assert!(root_schema.is_err());
285        assert_eq!(
286            root_schema.unwrap_err().to_string(),
287            "Unsupported type: Expected type: string, number, integer, object, array, boolean, or null, but got: foo"
288        );
289    }
290
291    #[test]
292    fn test_type_string() {
293        let docs = MarkedYaml::load_from_str("type: string").unwrap();
294        let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
295        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
296            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
297        };
298        assert_eq!(subschema.r#type, SchemaType::new("string"));
299    }
300
301    #[test]
302    fn test_type_object_with_string_with_description() {
303        let root_schema = loader::load_from_str(
304            r#"
305            type: object
306            properties:
307                name:
308                    type: string
309                    description: This is a description
310        "#,
311        )
312        .expect("Failed to load schema");
313        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
314            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
315        };
316        let Some(object_schema) = &subschema.object_schema else {
317            panic!(
318                "Expected ObjectSchema, but got: {:?}",
319                &subschema.object_schema
320            );
321        };
322        let name_property = object_schema
323            .properties
324            .as_ref()
325            .expect("Expected properties")
326            .get("name")
327            .expect("Expected `name` property");
328
329        let YamlSchema::Subschema(name_property_schema) = &name_property else {
330            panic!(
331                "Expected Subschema for `name` property, but got: {:?}",
332                &name_property
333            );
334        };
335        assert_eq!(name_property_schema.r#type, SchemaType::new("string"));
336        assert_eq!(
337            name_property_schema.string_schema,
338            Some(StringSchema::default())
339        );
340        assert_eq!(
341            name_property_schema.metadata_and_annotations.description,
342            Some("This is a description".to_string())
343        );
344    }
345
346    #[test]
347    fn test_type_string_with_pattern() {
348        let root_schema = loader::load_from_str(
349            r#"
350        type: string
351        pattern: "^(\\([0-9]{3}\\))?[0-9]{3}-[0-9]{4}$"
352        "#,
353        )
354        .unwrap();
355        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
356            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
357        };
358        assert_eq!(subschema.r#type, SchemaType::new("string"));
359        let expected = StringSchema {
360            pattern: Some(Regex::new("^(\\([0-9]{3}\\))?[0-9]{3}-[0-9]{4}$").unwrap()),
361            ..Default::default()
362        };
363
364        assert_eq!(subschema.string_schema, Some(expected));
365    }
366
367    #[test]
368    fn test_integer_schema() {
369        let root_schema = loader::load_from_str("type: integer").unwrap();
370        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
371            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
372        };
373        let integer_schema = IntegerSchema::default();
374        assert_eq!(subschema.integer_schema, Some(integer_schema));
375    }
376
377    #[test]
378    fn test_enum() {
379        let root_schema = loader::load_from_str(
380            r#"
381        enum:
382          - foo
383          - bar
384          - baz
385        "#,
386        )
387        .unwrap();
388        let enum_values = ["foo", "bar", "baz"]
389            .iter()
390            .map(|s| ConstValue::string(s.to_string()))
391            .collect::<Vec<ConstValue>>();
392        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
393            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
394        };
395        assert_eq!(
396            subschema.r#enum,
397            Some(EnumSchema {
398                r#enum: enum_values
399            })
400        );
401    }
402
403    #[test]
404    fn test_enum_without_type() {
405        let root_schema = loader::load_from_str(
406            r#"
407            enum:
408              - red
409              - amber
410              - green
411              - null
412              - 42
413            "#,
414        )
415        .unwrap();
416        let enum_values = vec![
417            ConstValue::string("red".to_string()),
418            ConstValue::string("amber".to_string()),
419            ConstValue::string("green".to_string()),
420            ConstValue::null(),
421            ConstValue::integer(42),
422        ];
423        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
424            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
425        };
426        assert_eq!(
427            subschema.r#enum,
428            Some(EnumSchema {
429                r#enum: enum_values
430            })
431        );
432    }
433
434    #[test]
435    fn test_defs() {
436        let root_schema = loader::load_from_str(
437            r##"
438            $defs:
439              foo:
440                type: boolean
441            "##,
442        )
443        .unwrap();
444        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
445            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
446        };
447        assert!(subschema.defs.is_some());
448        let Some(defs) = &subschema.defs else {
449            panic!("Expected defs, but got: {:?}", &subschema.defs);
450        };
451        assert_eq!(defs.len(), 1);
452        assert_eq!(defs.get("foo"), Some(&YamlSchema::typed_boolean()));
453    }
454
455    #[test]
456    fn test_one_of_with_ref() {
457        let root_schema = loader::load_from_str(
458            r##"
459            $defs:
460              foo:
461                type: boolean
462            oneOf:
463              - type: string
464              - $ref: "#/$defs/foo"
465            "##,
466        )
467        .unwrap();
468        let YamlSchema::Subschema(subschema) = &root_schema.schema else {
469            panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
470        };
471        assert!(subschema.one_of.is_some());
472        let Some(one_of) = &subschema.one_of else {
473            panic!("Expected oneOf, but got: {:?}", &subschema.one_of);
474        };
475        assert_eq!(one_of.one_of.len(), 2);
476        assert_eq!(
477            one_of.one_of[0],
478            YamlSchema::typed_string(StringSchema::default()),
479            "one_of[0] should be a string schema"
480        );
481        assert_eq!(
482            one_of.one_of[1],
483            YamlSchema::ref_str("#/$defs/foo"),
484            "one_of[1] should be a reference to '#/$defs/foo'"
485        );
486
487        let s = r#"
488        false
489        "#;
490        let docs = MarkedYaml::load_from_str(s).unwrap();
491        let value = docs.first().unwrap();
492        let context = crate::Context::with_root_schema(&root_schema, true);
493        let result = root_schema.validate(&context, value);
494        assert!(result.is_ok());
495        assert!(!context.has_errors());
496    }
497
498    #[test]
499    fn test_self_validate() -> Result<()> {
500        let schema_filename = "yaml-schema.yaml";
501        let root_schema = match loader::load_file(schema_filename) {
502            Ok(schema) => schema,
503            Err(e) => {
504                eprintln!("Failed to read YAML schema file: {schema_filename}");
505                log::error!("{e}");
506                return Err(e);
507            }
508        };
509
510        let yaml_contents = std::fs::read_to_string(schema_filename)?;
511
512        let context = Engine::evaluate(&root_schema, &yaml_contents, false)?;
513        if context.has_errors() {
514            for error in context.errors.borrow().iter() {
515                eprintln!("{error}");
516            }
517        }
518        assert!(!context.has_errors());
519
520        Ok(())
521    }
522
523    #[test]
524    fn test_download_from_url() {
525        // This is an integration test that requires internet access
526        if std::env::var("CI").is_ok() {
527            // Skip in CI environments if needed
528            return;
529        }
530
531        let result = std::panic::catch_unwind(|| {
532            let url = "https://yaml-schema.net/yaml-schema.yaml";
533            let result = download_from_url(url, Some(10));
534
535            // Verify the download and parse was successful
536            let root_schema = result.expect("Failed to download and parse YAML schema from URL");
537
538            // Verify we got a valid schema with expected properties
539            let YamlSchema::Subschema(subschema) = &root_schema.schema else {
540                panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
541            };
542            assert_eq!(subschema.r#type, SchemaType::new("object"));
543            assert!(subschema.object_schema.is_some());
544
545            // Verify the local schema is valid against the downloaded schema
546            if let Ok(local_schema) = std::fs::read_to_string("yaml-schema.yaml") {
547                let context = Engine::evaluate(&root_schema, &local_schema, false);
548                if let Ok(ctx) = context {
549                    if ctx.has_errors() {
550                        for error in ctx.errors.borrow().iter() {
551                            eprintln!("Validation error: {}", error);
552                        }
553                        panic!("Downloaded schema failed validation against local schema");
554                    }
555                } else if let Err(e) = context {
556                    panic!("Failed to validate downloaded schema: {}", e);
557                }
558            }
559        });
560
561        if let Err(e) = result {
562            // If the test fails due to network issues, mark it as passed with a warning
563            if let Some(s) = e.downcast_ref::<String>()
564                && (s.contains("Network is unreachable")
565                    || s.contains("failed to lookup address information"))
566            {
567                eprintln!("Warning: Network unreachable, skipping download test");
568                return;
569            }
570
571            // Re-panic if the failure wasn't network-related
572            std::panic::resume_unwind(e);
573        }
574    }
575}