yaml-schema 0.9.1

A YAML schema validator
Documentation
// A module to contain object type validation logic
use std::collections::HashSet;

use hashlink::LinkedHashMap;
use log::{debug, error};

use crate::Error;
use crate::Result;
use crate::Validator;
use crate::YamlSchema;
use crate::schemas::BooleanOrSchema;
use crate::schemas::ObjectSchema;
use crate::utils::{format_marker, format_yaml_data, scalar_to_string};
use crate::validation::Context;

impl Validator for ObjectSchema {
    /// Validate the object according to the schema rules
    fn validate(&self, context: &Context, value: &saphyr::MarkedYaml) -> Result<()> {
        let data = &value.data;
        debug!("Validating object: {}", format_yaml_data(data));
        if let saphyr::YamlData::Mapping(mapping) = data {
            self.validate_object_mapping(context, value, mapping)
        } else {
            let error_message = format!(
                "[ObjectSchema] {} Expected an object, but got: {data:?}",
                format_marker(&value.span.start)
            );
            error!("{error_message}");
            context.add_error(value, error_message);
            Ok(())
        }
    }
}

pub fn try_validate_value_against_properties(
    context: &Context,
    key: &String,
    value: &saphyr::MarkedYaml,
    properties: &LinkedHashMap<String, YamlSchema>,
) -> Result<bool> {
    let sub_context = context.append_path(key);
    if let Some(schema) = properties.get(key) {
        debug!("Validating property '{key}' with schema: {schema}");
        let err_before = context.errors.borrow().len();
        let result = schema.validate(&sub_context, value);
        return match result {
            Ok(()) => {
                if context.errors.borrow().len() == err_before {
                    context.record_evaluated_property(key);
                }
                Ok(true)
            }
            Err(e) => Err(e),
        };
    }
    Ok(false)
}

/// Try and validate the value against an object type's additional_properties
///
/// Returns true if the validation passed, or false if it failed (signals fail-fast)
pub fn try_validate_value_against_additional_properties(
    context: &Context,
    key: &String,
    value: &saphyr::MarkedYaml,
    additional_properties: &BooleanOrSchema,
) -> Result<bool> {
    let sub_context = context.append_path(key);

    match additional_properties {
        // if additional_properties: true, then any additional properties are allowed
        BooleanOrSchema::Boolean(true) => { /* noop */ }
        // if additional_properties: false, then no additional properties are allowed
        BooleanOrSchema::Boolean(false) => {
            context.add_error(
                value,
                format!("Additional property '{key}' is not allowed!"),
            );
            // returning `false` signals fail fast
            return Ok(false);
        }
        // if additional_properties: a schema, then validate against it
        BooleanOrSchema::Schema(schema) => {
            schema.validate(&sub_context, value)?;
        }
    }
    Ok(true)
}

impl ObjectSchema {
    fn validate_object_mapping<'r>(
        &self,
        context: &Context<'r>,
        object: &saphyr::MarkedYaml,
        mapping: &saphyr::AnnotatedMapping<'r, saphyr::MarkedYaml<'r>>,
    ) -> Result<()> {
        for (k, value) in mapping {
            let key_string = match &k.data {
                saphyr::YamlData::Value(scalar) => scalar_to_string(scalar),
                v => {
                    return Err(expected_scalar!(
                        "[{}] Expected a scalar key, got: {:?}",
                        format_marker(&k.span.start),
                        v
                    ));
                }
            };
            let span = &k.span;
            debug!("validate_object_mapping: key: \"{key_string}\"");
            debug!(
                "validate_object_mapping: span.start: {:?}",
                format_marker(&span.start)
            );
            debug!(
                "validate_object_mapping: span.end: {:?}",
                format_marker(&span.end)
            );

            // Per JSON Schema spec (section 6), `$schema` is a meta-property
            // used by tooling to identify the schema. Skip it during validation.
            if key_string == "$schema" {
                continue;
            }

            // `properties` and `patternProperties` both apply when they match (JSON Schema 2020-12).
            let covered_by_properties = if let Some(properties) = &self.properties {
                try_validate_value_against_properties(context, &key_string, value, properties)?
            } else {
                false
            };

            let mut matched_pattern_property = false;
            if let Some(pattern_properties) = &self.pattern_properties {
                let pattern_context = context.append_path(&key_string);
                let err_before_patterns = context.errors.borrow().len();
                for pp in pattern_properties {
                    log::debug!("pattern: {}", pp.regex.as_str());
                    if pp.regex.is_match(key_string.as_ref()) {
                        matched_pattern_property = true;
                        pp.schema.validate(&pattern_context, value)?;
                    }
                }
                if matched_pattern_property && context.errors.borrow().len() == err_before_patterns
                {
                    context.record_evaluated_property(&key_string);
                }
            }

            // additionalProperties applies only when the name is not in `properties` and matches
            // no `patternProperties` regex (JSON Schema 2020-12).
            if !covered_by_properties
                && !matched_pattern_property
                && let Some(additional_properties) = &self.additional_properties
            {
                let err_before_add = context.errors.borrow().len();
                try_validate_value_against_additional_properties(
                    context,
                    &key_string,
                    value,
                    additional_properties,
                )?;
                if context.errors.borrow().len() == err_before_add {
                    context.record_evaluated_property(&key_string);
                }
            }
            // Finally, we check if it matches property_names
            if let Some(property_names) = &self.property_names {
                if let Some(re) = &property_names.pattern {
                    debug!("Regex for property names: {}", re.as_str());
                    if !re.is_match(key_string.as_ref()) {
                        context.add_error(
                            k,
                            format!(
                                "Property name '{}' does not match pattern '{}'",
                                key_string,
                                re.as_str()
                            ),
                        );
                        fail_fast!(context)
                    }
                } else {
                    return Err(Error::GenericError(
                        "Expected a pattern for `property_names`".to_string(),
                    ));
                }
            }
        }

        // Validate required properties
        if let Some(required) = &self.required {
            for required_property in required {
                if !mapping
                    .keys()
                    .filter_map(|k| k.data.as_str())
                    .any(|s| s == required_property)
                {
                    context.add_error(
                        object,
                        format!("Required property '{required_property}' is missing!"),
                    );
                    fail_fast!(context)
                }
            }
        }

        // Validate minProperties
        if let Some(min_properties) = &self.min_properties
            && mapping.len() < *min_properties
        {
            context.add_error(
                object,
                format!("Object has too few properties! Minimum is {min_properties}!"),
            );
            fail_fast!(context)
        }
        // Validate maxProperties
        if let Some(max_properties) = &self.max_properties
            && mapping.len() > *max_properties
        {
            context.add_error(
                object,
                format!("Object has too many properties! Maximum is {max_properties}!"),
            );
            fail_fast!(context)
        }

        // dependentRequired / dependentSchemas (JSON Schema 2020-12): after per-property and required/min/max.
        if self.dependent_required.is_some() || self.dependent_schemas.is_some() {
            let keys = Self::instance_property_keys(mapping)?;
            if let Some(dr) = &self.dependent_required {
                for (trigger, deps) in dr {
                    if keys.contains(trigger) {
                        for dep in deps {
                            if !keys.contains(dep) {
                                context.add_error(
                                    object,
                                    format!(
                                        "{} When property '{}' is present, property '{}' is required by dependentRequired",
                                        format_marker(&object.span.start),
                                        trigger,
                                        dep
                                    ),
                                );
                                fail_fast!(context)
                            }
                        }
                    }
                }
            }
            if let Some(ds) = &self.dependent_schemas {
                for (trigger, subschema) in ds {
                    if keys.contains(trigger) {
                        subschema.validate(context, object)?;
                    }
                }
            }
        }

        Ok(())
    }

    /// Property names present on the instance mapping (scalar keys only, same rules as the main validation loop).
    fn instance_property_keys<'r>(
        mapping: &saphyr::AnnotatedMapping<'r, saphyr::MarkedYaml<'r>>,
    ) -> Result<HashSet<String>> {
        let mut keys = HashSet::new();
        for (k, _) in mapping {
            let key_string = match &k.data {
                saphyr::YamlData::Value(scalar) => scalar_to_string(scalar),
                v => {
                    return Err(expected_scalar!(
                        "[{}] Expected a scalar key, got: {:?}",
                        format_marker(&k.span.start),
                        v
                    ));
                }
            };
            keys.insert(key_string);
        }
        Ok(keys)
    }
}

#[cfg(test)]
mod tests {
    use crate::RootSchema;
    use crate::YamlSchema;
    use crate::engine;
    use crate::loader;
    use crate::schemas::NumberSchema;
    use crate::schemas::StringSchema;
    use hashlink::LinkedHashMap;

    use super::*;

    #[test]
    fn test_should_validate_properties() {
        let mut properties = LinkedHashMap::new();
        properties.insert(
            "foo".to_string(),
            YamlSchema::typed_string(StringSchema::default()),
        );
        properties.insert(
            "bar".to_string(),
            YamlSchema::typed_number(NumberSchema::default()),
        );
        let object_schema = ObjectSchema {
            properties: Some(properties),
            ..Default::default()
        };
        let root_schema = RootSchema::new(YamlSchema::typed_object(object_schema));
        let value = r#"
            foo: "I'm a string"
            bar: 42
        "#;
        let result = engine::Engine::evaluate(&root_schema, value, true);
        assert!(result.is_ok());

        let value2 = r#"
            foo: 42
            baz: "I'm a string"
        "#;
        let context = engine::Engine::evaluate(&root_schema, value2, true).unwrap();
        assert!(context.has_errors());
        let errors = context.errors.borrow();
        let first_error = errors.first().unwrap();
        assert_eq!(first_error.path, "foo");
        assert_eq!(first_error.error, "Expected a string, but got: 42 (int)");
    }

    #[test]
    fn dependent_required_validation() {
        let yaml = r#"
        type: object
        dependentRequired:
          credit_card:
            - billing_address
        properties:
          credit_card:
            type: string
          billing_address:
            type: string
        "#;
        let root_schema = loader::load_from_str(yaml).unwrap();
        let ok = engine::Engine::evaluate(
            &root_schema,
            "credit_card: \"4111\"\nbilling_address: \"1 Main\"",
            false,
        )
        .unwrap();
        assert!(!ok.has_errors());

        let bad = engine::Engine::evaluate(&root_schema, "credit_card: \"4111\"", false).unwrap();
        assert!(bad.has_errors());
    }

    #[test]
    fn dependent_schemas_validation() {
        let yaml = r#"
        type: object
        dependentSchemas:
          credit_card:
            type: object
            required:
              - billing_address
        properties:
          credit_card:
            type: string
          billing_address:
            type: string
        "#;
        let root_schema = loader::load_from_str(yaml).unwrap();
        let ok = engine::Engine::evaluate(
            &root_schema,
            "credit_card: \"4111\"\nbilling_address: \"1 Main\"",
            false,
        )
        .unwrap();
        assert!(!ok.has_errors());

        let bad = engine::Engine::evaluate(&root_schema, "credit_card: \"4111\"", false).unwrap();
        assert!(bad.has_errors());
    }
}