jsonschema-schema 0.2.0

Typed Rust representation of JSON Schema draft 2020-12
Documentation
use alloc::collections::BTreeMap;

use combine_structs::combine_fields;
use indexmap::IndexMap;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use serde_json::{Number, Value};
use url::Url;

use crate::extensions::IntellijSchemaExt;
use crate::extensions::LintelSchemaExt;
use crate::extensions::TaploInfoSchemaExt;
use crate::extensions::TaploSchemaExt;
use crate::extensions::TombiSchemaExt;

mod add;
mod navigate;
pub mod vocabularies;

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests;

pub use navigate::{navigate_pointer, ref_name, resolve_ref};

/// Helper for `#[serde(skip_serializing_if)]` on `bool` fields.
#[allow(clippy::trivially_copy_pass_by_ref)] // serde skip_serializing_if requires &T
pub(crate) fn is_false(v: &bool) -> bool {
    !v
}

/// A JSON Schema value — either a boolean schema or an object schema.
///
/// A schema can be a JSON object or a JSON boolean. Boolean schemas are
/// equivalent to certain object schemas:
///
/// - `true` — always validates successfully (equivalent to `{}`).
/// - `false` — never validates successfully (equivalent to `{"not": {}}`).
///
/// The `Other` variant catches values that are neither booleans nor valid
/// schema objects (e.g. bare strings injected by buggy generators).  It
/// is treated identically to `Bool(false)` by [`as_schema`](Self::as_schema).
///
/// See [JSON Schema Core §4.3.2](https://json-schema.org/draft/2020-12/json-schema-core#section-4.3.2).
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum SchemaValue {
    /// A boolean schema: `true` accepts everything, `false` rejects everything.
    Bool(bool),
    /// An object schema with keyword-based constraints.
    Schema(Box<Schema>),
    /// Catch-all for invalid schema values (strings, numbers, etc.).
    Other(Value),
}

/// Primitive type names defined by JSON Schema (`simpleTypes`).
///
/// String values MUST be one of the six primitive types (`"null"`,
/// `"boolean"`, `"object"`, `"array"`, `"number"`, or `"string"`), or
/// `"integer"` which matches any number with a zero fractional part.
///
/// See [JSON Schema Validation §6.1.1](https://json-schema.org/draft/2020-12/json-schema-validation#section-6.1.1).
#[derive(
    Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema, strum::Display,
)]
#[serde(rename_all = "camelCase")]
#[strum(serialize_all = "lowercase")]
pub enum SimpleType {
    /// A JSON array (ordered sequence of values).
    Array,
    /// A JSON `true` or `false` value.
    Boolean,
    /// A JSON number with a zero fractional part (subset of `Number`).
    Integer,
    /// The JSON `null` value.
    Null,
    /// A JSON number (any numeric value, including integers).
    Number,
    /// A JSON object (unordered set of name/value pairs).
    Object,
    /// A JSON string.
    String,
}

/// The value of the JSON Schema `type` keyword.
///
/// The value of this keyword MUST be either a string or an array. If it is
/// an array, elements of the array MUST be strings and MUST be unique.
///
/// See [JSON Schema Validation §6.1.1](https://json-schema.org/draft/2020-12/json-schema-validation#section-6.1.1).
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum TypeValue {
    /// A single type constraint, e.g. `"type": "string"`.
    Single(SimpleType),
    /// A union of types, e.g. `"type": ["string", "null"]`.
    /// The array SHOULD have at least one element, and elements MUST be unique.
    Union(Vec<SimpleType>),
}

// ---------------------------------------------------------------------------
// Schema struct — generated by merging all vocabulary fields
// ---------------------------------------------------------------------------

/// A JSON Schema object (draft 2020-12).
///
/// Represents a single schema resource as defined by the
/// [JSON Schema Core](https://json-schema.org/draft/2020-12/json-schema-core) and
/// [JSON Schema Validation](https://json-schema.org/draft/2020-12/json-schema-validation)
/// specifications.
///
/// Fields are grouped by vocabulary:
///
/// - **Core** (`$schema`, `$id`, `$ref`, `$anchor`, `$dynamicRef`,
///   `$dynamicAnchor`, `$comment`, `$defs`, `$vocabulary`)
/// - **Metadata / Annotation** (`title`, `description`, `default`,
///   `deprecated`, `readOnly`, `writeOnly`, `examples`)
/// - **Validation — type** (`type`, `enum`, `const`)
/// - **Applicator — object** (`properties`, `patternProperties`,
///   `additionalProperties`, `propertyNames`, `unevaluatedProperties`)
/// - **Validation — object** (`required`, `minProperties`,
///   `maxProperties`, `dependentRequired`)
/// - **Applicator — array** (`items`, `prefixItems`, `contains`,
///   `unevaluatedItems`)
/// - **Validation — array** (`minItems`, `maxItems`, `uniqueItems`,
///   `minContains`, `maxContains`)
/// - **Validation — number** (`minimum`, `maximum`, `exclusiveMinimum`,
///   `exclusiveMaximum`, `multipleOf`)
/// - **Validation — string** (`minLength`, `maxLength`, `pattern`, `format`)
/// - **Applicator — composition** (`allOf`, `anyOf`, `oneOf`, `not`)
/// - **Applicator — conditional** (`if`, `then`, `else`,
///   `dependentSchemas`)
/// - **Content** (`contentMediaType`, `contentEncoding`, `contentSchema`)
#[combine_fields(
    CoreVocabulary,
    ApplicatorVocabulary,
    UnevaluatedVocabulary,
    ValidationVocabulary,
    MetaDataVocabulary,
    FormatAnnotationVocabulary,
    ContentVocabulary
)]
#[allow(clippy::struct_excessive_bools)] // mirrors the JSON Schema spec
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct Schema {
    /// The `markdownDescription` keyword — Markdown-formatted
    /// description (VS Code / non-standard extension).
    ///
    /// Not part of the JSON Schema specification. When present, it is
    /// preferred over [`description`](Self::description) by editors
    /// that support Markdown rendering.
    #[serde(
        rename = "markdownDescription",
        skip_serializing_if = "Option::is_none"
    )]
    pub markdown_description: Option<String>,

    /// Per-enum-value Markdown descriptions (VS Code / non-standard extension).
    #[serde(
        rename = "markdownEnumDescriptions",
        skip_serializing_if = "Option::is_none"
    )]
    pub markdown_enum_descriptions: Option<Vec<Option<String>>>,

    /// Lintel provenance metadata (`x-lintel`).
    #[serde(rename = "x-lintel", skip_serializing_if = "Option::is_none")]
    pub x_lintel: Option<LintelSchemaExt>,

    /// Taplo TOML toolkit extension (`x-taplo`).
    #[serde(rename = "x-taplo", skip_serializing_if = "Option::is_none")]
    pub x_taplo: Option<TaploSchemaExt>,
    /// Taplo informational metadata (`x-taplo-info`).
    #[serde(rename = "x-taplo-info", skip_serializing_if = "Option::is_none")]
    pub x_taplo_info: Option<TaploInfoSchemaExt>,
    /// Tombi TOML extensions (`x-tombi-*`).
    #[serde(flatten)]
    pub x_tombi: TombiSchemaExt,
    /// `IntelliJ` IDEA extensions (`x-intellij-*`).
    #[serde(flatten)]
    pub x_intellij: IntellijSchemaExt,

    /// Unknown or unsupported properties.
    ///
    /// Any JSON property that is not recognized as a standard keyword
    /// or known extension is captured here, preserving round-trip
    /// fidelity.
    #[serde(flatten)]
    pub extra: BTreeMap<String, Value>,
}

// ---------------------------------------------------------------------------
// Impl blocks
// ---------------------------------------------------------------------------

impl SchemaValue {
    /// Get the inner `Schema` if this is an object schema, `None` for bool
    /// schemas and invalid (`Other`) values.
    pub fn as_schema(&self) -> Option<&Schema> {
        match self {
            Self::Schema(s) => Some(s),
            Self::Bool(_) | Self::Other(_) => None,
        }
    }
}

impl Schema {
    /// Parse from a `serde_json::Value` without migration.
    ///
    /// # Errors
    ///
    /// Returns an error if the value cannot be deserialized into a `Schema`.
    pub fn from_value(value: Value) -> Result<Self, serde_json::Error> {
        serde_json::from_value(value)
    }

    /// Get the best description text, preferring `markdownDescription`.
    pub fn description(&self) -> Option<&str> {
        self.markdown_description
            .as_deref()
            .or(self.description.as_deref())
    }

    /// Get the required fields, or an empty slice.
    pub fn required_set(&self) -> &[String] {
        self.required.as_deref().unwrap_or_default()
    }

    /// Whether this schema is deprecated.
    pub fn is_deprecated(&self) -> bool {
        self.deprecated
    }

    /// Produce a short human-readable type string.
    pub fn type_str(&self) -> Option<String> {
        schema_type_str(self)
    }

    /// Validate structural integrity of this schema.
    ///
    /// Recursively walks the schema tree and checks that all local `$ref`
    /// pointers (starting with `#/`) resolve to valid targets.
    pub fn validate(&self) -> Vec<crate::validate::SchemaError> {
        crate::validate::validate(self)
    }

    /// Rewrite all local `$ref` pointers (`#/…`) to absolute URLs using the
    /// schema's `$id` as base.  Returns the schema unchanged if `$id` is absent.
    #[must_use]
    pub fn absolute(&self) -> Schema {
        crate::absolute::make_absolute(self)
    }

    /// Flatten composition keywords (currently `allOf`) into a single merged schema.
    ///
    /// Properties from `allOf` entries are merged into the root, and unreferenced
    /// `$defs` entries are pruned. The `allOf` array is preserved so provenance
    /// remains visible.
    #[must_use]
    pub fn flatten(&self, root: &SchemaValue) -> Schema {
        crate::flatten::flatten_all_of(self, root)
    }

    /// Look up a schema-keyword field by its JSON key name.
    ///
    /// Returns a reference to the `SchemaValue` stored under that keyword,
    /// or `None` if the field is absent.
    pub fn get_keyword(&self, key: &str) -> Option<&SchemaValue> {
        match key {
            "items" => self.items.as_deref(),
            "contains" => self.contains.as_deref(),
            "additionalProperties" => self.additional_properties.as_deref(),
            "propertyNames" => self.property_names.as_deref(),
            "unevaluatedProperties" => self.unevaluated_properties.as_deref(),
            "unevaluatedItems" => self.unevaluated_items.as_deref(),
            "not" => self.not.as_deref(),
            "if" => self.if_.as_deref(),
            "then" => self.then_.as_deref(),
            "else" => self.else_.as_deref(),
            "contentSchema" => self.content_schema.as_deref(),
            _ => None,
        }
    }

    /// Look up a named child within a keyword that holds a map of schemas.
    ///
    /// For example, `get_map_entry("properties", "name")` returns the schema
    /// for the `name` property.
    pub fn get_map_entry(&self, keyword: &str, key: &str) -> Option<&SchemaValue> {
        match keyword {
            "properties" => self.properties.get(key),
            "patternProperties" => self.pattern_properties.get(key),
            "$defs" => self.defs.as_ref()?.get(key),
            "dependentSchemas" => self.dependent_schemas.get(key),
            _ => None,
        }
    }

    /// Look up an indexed child within a keyword that holds an array of schemas.
    pub fn get_array_entry(&self, keyword: &str, index: usize) -> Option<&SchemaValue> {
        match keyword {
            "allOf" => self.all_of.as_ref()?.get(index),
            "anyOf" => self.any_of.as_ref()?.get(index),
            "oneOf" => self.one_of.as_ref()?.get(index),
            "prefixItems" => self.prefix_items.as_ref()?.get(index),
            _ => None,
        }
    }

    /// Look up a child by a JSON pointer segment name.
    /// This handles both map keywords (where the segment is a key within the map)
    /// and direct keywords.
    fn get_map_entry_by_pointer_segment(&self, segment: &str) -> Option<&SchemaValue> {
        // Try all map-bearing keyword fields.
        // For pointer navigation, when we're inside a "properties" object,
        // the segment is the property name.
        self.properties
            .get(segment)
            .or_else(|| self.pattern_properties.get(segment))
            .or_else(|| self.defs.as_ref().and_then(|m| m.get(segment)))
            .or_else(|| self.dependent_schemas.get(segment))
    }
}

/// Produce a short human-readable type string for a schema.
fn schema_type_str(schema: &Schema) -> Option<String> {
    // Explicit type field
    if let Some(ref ty) = schema.type_ {
        return match ty {
            TypeValue::Single(s) if *s == SimpleType::Array => {
                let item_ty = schema
                    .items
                    .as_ref()
                    .and_then(|sv| sv.as_schema())
                    .and_then(schema_type_str);
                match item_ty {
                    Some(item_ty) => Some(format!("{item_ty}[]")),
                    None => Some("array".to_string()),
                }
            }
            TypeValue::Single(s) => Some(s.to_string()),
            TypeValue::Union(arr) => Some(
                arr.iter()
                    .map(SimpleType::to_string)
                    .collect::<Vec<_>>()
                    .join(" | "),
            ),
        };
    }

    // $ref
    if let Some(ref r) = schema.ref_ {
        return Some(ref_name(r).to_string());
    }

    // oneOf/anyOf
    for variants in [&schema.one_of, &schema.any_of].into_iter().flatten() {
        let mut types: Vec<String> = variants
            .iter()
            .filter_map(|v| match v {
                SchemaValue::Schema(s) => {
                    schema_type_str(s).or_else(|| s.ref_.as_ref().map(|r| ref_name(r).to_string()))
                }
                SchemaValue::Bool(_) | SchemaValue::Other(_) => None,
            })
            .collect();
        types.dedup();
        if !types.is_empty() {
            return Some(types.join(" | "));
        }
    }

    // const
    if let Some(ref c) = schema.const_ {
        return Some(format!("const: {c}"));
    }

    // enum — single-value enums show the value (e.g. `"lf"`), multi-value show `enum`
    if let Some(ref values) = schema.enum_ {
        if values.len() == 1 {
            let val = &values[0];
            return Some(
                val.as_str()
                    .map_or_else(|| val.to_string(), |s| format!("\"{s}\"")),
            );
        }
        return Some("enum".to_string());
    }

    None
}