use std::{
collections::BTreeMap,
fmt::{self, Display, Formatter},
sync::OnceLock,
};
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime};
use http::Uri;
use regex::Regex;
use serde_json::Value;
use uuid::Uuid;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BuiltinScalar {
Uuid,
Email,
Number,
Date,
DateTime,
Time,
Uri,
}
impl BuiltinScalar {
pub const ALL: [BuiltinScalar; 7] = [
BuiltinScalar::Uuid,
BuiltinScalar::Email,
BuiltinScalar::Number,
BuiltinScalar::Date,
BuiltinScalar::DateTime,
BuiltinScalar::Time,
BuiltinScalar::Uri,
];
pub fn all() -> &'static [BuiltinScalar] {
&Self::ALL
}
pub fn parse(name: &str) -> Option<Self> {
match name.trim().to_ascii_uppercase().as_str() {
"UUID" => Some(BuiltinScalar::Uuid),
"EMAIL" => Some(BuiltinScalar::Email),
"NUMBER" => Some(BuiltinScalar::Number),
"DATE" => Some(BuiltinScalar::Date),
"DATE_TIME" => Some(BuiltinScalar::DateTime),
"TIME" => Some(BuiltinScalar::Time),
"URI" => Some(BuiltinScalar::Uri),
_ => None,
}
}
pub fn name(self) -> &'static str {
match self {
BuiltinScalar::Uuid => "UUID",
BuiltinScalar::Email => "EMAIL",
BuiltinScalar::Number => "NUMBER",
BuiltinScalar::Date => "DATE",
BuiltinScalar::DateTime => "DATE_TIME",
BuiltinScalar::Time => "TIME",
BuiltinScalar::Uri => "URI",
}
}
}
pub const PRIMITIVE_TYPE_NAMES: [&str; 5] = ["string", "integer", "number", "boolean", "null"];
pub fn is_reserved_name(name: &str) -> bool {
PRIMITIVE_TYPE_NAMES.contains(&name)
|| BuiltinScalar::all()
.iter()
.any(|builtin| builtin.name() == name)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PrimitiveType {
String,
Integer,
Number,
Boolean,
Null,
}
impl PrimitiveType {
pub fn keyword(self) -> &'static str {
match self {
PrimitiveType::String => "string",
PrimitiveType::Integer => "integer",
PrimitiveType::Number => "number",
PrimitiveType::Boolean => "boolean",
PrimitiveType::Null => "null",
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct TypeReference {
pub kind: TypeReferenceKind,
pub nullable: bool,
}
impl TypeReference {
pub fn primitive(kind: PrimitiveType) -> Self {
Self {
kind: TypeReferenceKind::Primitive(kind),
nullable: false,
}
}
pub fn named(name: impl Into<String>) -> Self {
Self {
kind: TypeReferenceKind::Named(name.into()),
nullable: false,
}
}
pub fn array(item_type: TypeReference) -> Self {
Self {
kind: TypeReferenceKind::Array(Box::new(item_type)),
nullable: false,
}
}
pub fn nullable(mut self) -> Self {
self.nullable = true;
self
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum TypeReferenceKind {
Primitive(PrimitiveType),
Named(String),
Array(Box<TypeReference>),
}
#[derive(Debug, Clone, PartialEq)]
pub enum ScalarBase {
Primitive(PrimitiveType),
Named(String),
}
#[derive(Debug, Clone, PartialEq)]
pub struct ScalarExpression {
pub base: Option<ScalarBase>,
pub predicates: Vec<ScalarPredicate>,
}
impl ScalarExpression {
pub fn new(base: Option<ScalarBase>, predicates: Vec<ScalarPredicate>) -> Self {
Self { base, predicates }
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ScalarPredicate {
Enum(Vec<ScalarLiteral>),
Format(String),
Length {
min: Option<usize>,
max: Option<usize>,
},
Pattern(String),
Range {
min: Option<NumericValue>,
max: Option<NumericValue>,
},
}
#[derive(Debug, Clone, PartialEq)]
pub enum ScalarLiteral {
String(String),
Integer(i64),
Number(f64),
Boolean(bool),
Null,
}
#[derive(Debug, Clone, PartialEq)]
pub enum NumericValue {
Integer(i64),
Number(f64),
}
#[derive(Debug, Clone, PartialEq)]
pub struct ScalarDefinition {
pub name: String,
pub expression: ScalarExpression,
}
impl ScalarDefinition {
pub fn new(name: impl Into<String>, expression: ScalarExpression) -> Self {
Self {
name: name.into(),
expression,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct SchemaDefinition {
pub name: String,
pub shape: SchemaShape,
}
impl SchemaDefinition {
pub fn new(name: impl Into<String>, shape: SchemaShape) -> Self {
Self {
name: name.into(),
shape,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct DiscriminatorBranch {
pub tag: ScalarLiteral,
pub target: String,
}
#[derive(Debug, Clone, PartialEq)]
pub enum SchemaShape {
Object(ObjectSchema),
Array(TypeReference),
Alias(TypeReference),
AllOf(Vec<String>),
OneOf(Vec<String>),
AnyOf(Vec<String>),
Not(String),
Discriminator { field: String, branches: Vec<DiscriminatorBranch> },
}
#[derive(Debug, Clone, PartialEq)]
pub struct ObjectSchema {
pub fields: Vec<SchemaField>,
pub open: bool,
}
impl ObjectSchema {
pub fn open(fields: Vec<SchemaField>) -> Self {
Self { fields, open: true }
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct SchemaField {
pub name: String,
pub value_type: TypeReference,
pub presence: FieldPresence,
}
impl SchemaField {
pub fn required(name: impl Into<String>, value_type: TypeReference) -> Self {
Self {
name: name.into(),
value_type,
presence: FieldPresence::Required,
}
}
pub fn optional(name: impl Into<String>, value_type: TypeReference) -> Self {
Self {
name: name.into(),
value_type,
presence: FieldPresence::Optional,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FieldPresence {
Required,
Optional,
}
#[derive(Debug, Clone, PartialEq)]
pub enum RegistryEntry {
BuiltinScalar(BuiltinScalar),
Scalar(ScalarDefinition),
Schema(SchemaDefinition),
}
impl RegistryEntry {
pub fn name(&self) -> &str {
match self {
RegistryEntry::BuiltinScalar(builtin) => builtin.name(),
RegistryEntry::Scalar(definition) => definition.name.as_str(),
RegistryEntry::Schema(definition) => definition.name.as_str(),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct SchemaRegistry {
entries: BTreeMap<String, RegistryEntry>,
}
impl SchemaRegistry {
pub fn with_builtins() -> Self {
let mut registry = Self {
entries: BTreeMap::new(),
};
for builtin in BuiltinScalar::all() {
registry.entries.insert(
builtin.name().to_string(),
RegistryEntry::BuiltinScalar(*builtin),
);
}
registry
}
pub fn register_scalar(
&mut self,
definition: ScalarDefinition,
) -> Result<(), SchemaRegistryError> {
let name = definition.name.clone();
self.register_user_entry(name, RegistryEntry::Scalar(definition))
}
pub fn register_schema(
&mut self,
definition: SchemaDefinition,
) -> Result<(), SchemaRegistryError> {
let name = definition.name.clone();
self.register_user_entry(name, RegistryEntry::Schema(definition))
}
pub fn get(&self, name: &str) -> Option<&RegistryEntry> {
self.entries.get(name)
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn iter(&self) -> impl Iterator<Item = (&str, &RegistryEntry)> {
self.entries.iter().map(|(name, entry)| (name.as_str(), entry))
}
pub fn validate_scalar_target(
&self,
target: &str,
value: &Value,
) -> Result<(), ScalarValidationError> {
match self.get(target) {
Some(RegistryEntry::BuiltinScalar(builtin)) => {
validate_builtin_scalar(target, *builtin, builtin.name(), value)
}
Some(RegistryEntry::Scalar(definition)) => {
self.validate_scalar_definition_value(definition, value)
}
Some(RegistryEntry::Schema(_)) => Err(ScalarValidationError::schema_target(target, value)),
None => Err(ScalarValidationError::unknown_target(target, value)),
}
}
pub fn validate_schema_target(
&self,
target: &str,
value: &Value,
) -> Result<(), SchemaValidationError> {
match self.get(target) {
Some(RegistryEntry::Schema(definition)) => {
self.validate_schema_definition_value(target, "$", definition, value)
}
Some(RegistryEntry::BuiltinScalar(_) | RegistryEntry::Scalar(_)) => {
Err(SchemaValidationError::scalar_target(target, "$", value))
}
None => Err(SchemaValidationError::unknown_target(target, "$", value)),
}
}
pub fn validate_references(&self) -> Result<(), SchemaRegistryError> {
for entry in self.entries.values() {
match entry {
RegistryEntry::BuiltinScalar(_) => {}
RegistryEntry::Scalar(definition) => {
self.validate_scalar_definition(definition)?;
}
RegistryEntry::Schema(definition) => {
self.validate_schema_definition(definition)?;
}
}
}
self.detect_cycles()
}
fn register_user_entry(
&mut self,
name: String,
entry: RegistryEntry,
) -> Result<(), SchemaRegistryError> {
if is_reserved_name(&name) {
return Err(SchemaRegistryError::ReservedName(name));
}
if self.entries.contains_key(&name) {
return Err(SchemaRegistryError::DuplicateName(name));
}
self.entries.insert(name, entry);
Ok(())
}
fn validate_scalar_definition(
&self,
definition: &ScalarDefinition,
) -> Result<(), SchemaRegistryError> {
let Some(ScalarBase::Named(reference)) = definition.expression.base.as_ref() else {
return Ok(());
};
match self.entries.get(reference.as_str()) {
Some(RegistryEntry::BuiltinScalar(_) | RegistryEntry::Scalar(_)) => Ok(()),
Some(RegistryEntry::Schema(_)) => Err(SchemaRegistryError::InvalidScalarBaseReference {
scalar: definition.name.clone(),
reference: reference.clone(),
}),
None => Err(SchemaRegistryError::UnknownReference {
owner: definition.name.clone(),
reference: reference.clone(),
}),
}
}
fn validate_schema_definition(
&self,
definition: &SchemaDefinition,
) -> Result<(), SchemaRegistryError> {
match &definition.shape {
SchemaShape::Object(object) => {
for field in &object.fields {
self.validate_type_reference(definition.name.as_str(), &field.value_type)?;
}
}
SchemaShape::Alias(value_type) | SchemaShape::Array(value_type) => {
self.validate_type_reference(definition.name.as_str(), value_type)?;
}
SchemaShape::AllOf(targets)
| SchemaShape::OneOf(targets)
| SchemaShape::AnyOf(targets) => {
for target in targets {
self.validate_named_target_reference(definition.name.as_str(), target)?;
}
}
SchemaShape::Not(target) => {
self.validate_named_target_reference(definition.name.as_str(), target)?;
}
SchemaShape::Discriminator { branches, .. } => {
for branch in branches {
self.validate_named_target_reference(definition.name.as_str(), &branch.target)?;
}
}
}
Ok(())
}
fn validate_named_target_reference(
&self,
owner: &str,
reference: &str,
) -> Result<(), SchemaRegistryError> {
if self.entries.contains_key(reference) {
Ok(())
} else {
Err(SchemaRegistryError::UnknownReference {
owner: owner.to_string(),
reference: reference.to_string(),
})
}
}
fn validate_type_reference(
&self,
owner: &str,
value_type: &TypeReference,
) -> Result<(), SchemaRegistryError> {
match &value_type.kind {
TypeReferenceKind::Primitive(_) => Ok(()),
TypeReferenceKind::Array(item_type) => self.validate_type_reference(owner, item_type),
TypeReferenceKind::Named(reference) => {
if self.entries.contains_key(reference.as_str()) {
Ok(())
} else {
Err(SchemaRegistryError::UnknownReference {
owner: owner.to_string(),
reference: reference.clone(),
})
}
}
}
}
fn detect_cycles(&self) -> Result<(), SchemaRegistryError> {
let graph = self.user_dependency_graph()?;
let mut visit_state = BTreeMap::new();
let mut stack = Vec::new();
for name in graph.keys() {
if !visit_state.contains_key(name.as_str()) {
visit_node(name.as_str(), &graph, &mut visit_state, &mut stack)?;
}
}
Ok(())
}
fn user_dependency_graph(&self) -> Result<BTreeMap<String, Vec<String>>, SchemaRegistryError> {
let mut graph = BTreeMap::new();
for (name, entry) in &self.entries {
match entry {
RegistryEntry::BuiltinScalar(_) => {}
RegistryEntry::Scalar(definition) => {
let mut dependencies = Vec::new();
if let Some(ScalarBase::Named(reference)) = definition.expression.base.as_ref() {
match self.entries.get(reference.as_str()) {
Some(RegistryEntry::BuiltinScalar(_)) => {}
Some(RegistryEntry::Scalar(_)) => dependencies.push(reference.clone()),
Some(RegistryEntry::Schema(_)) => {
return Err(SchemaRegistryError::InvalidScalarBaseReference {
scalar: definition.name.clone(),
reference: reference.clone(),
});
}
None => {
return Err(SchemaRegistryError::UnknownReference {
owner: definition.name.clone(),
reference: reference.clone(),
});
}
}
}
graph.insert(name.clone(), dependencies);
}
RegistryEntry::Schema(definition) => {
let mut dependencies = Vec::new();
collect_schema_dependencies(self, &definition.shape, &mut dependencies)?;
dependencies.sort();
dependencies.dedup();
graph.insert(name.clone(), dependencies);
}
}
}
Ok(graph)
}
fn validate_scalar_definition_value(
&self,
definition: &ScalarDefinition,
value: &Value,
) -> Result<(), ScalarValidationError> {
if let Some(base) = &definition.expression.base {
self.validate_scalar_base(base, value)?;
}
for predicate in &definition.expression.predicates {
validate_scalar_predicate(definition.name.as_str(), predicate, value)?;
}
Ok(())
}
fn validate_scalar_base(
&self,
base: &ScalarBase,
value: &Value,
) -> Result<(), ScalarValidationError> {
match base {
ScalarBase::Primitive(primitive) => validate_primitive_scalar(primitive.keyword(), *primitive, value),
ScalarBase::Named(name) => self.validate_scalar_target(name.as_str(), value),
}
}
fn validate_schema_definition_value(
&self,
root_target: &str,
path: &str,
definition: &SchemaDefinition,
value: &Value,
) -> Result<(), SchemaValidationError> {
match &definition.shape {
SchemaShape::Object(object) => self.validate_object_schema(root_target, path, object, value),
SchemaShape::Array(item_type) | SchemaShape::Alias(item_type) => {
self.validate_type_reference_value(root_target, path, item_type, value)
}
SchemaShape::AllOf(targets) => {
for target in targets {
self.validate_named_target_value(root_target, path, target, value)?;
}
Ok(())
}
SchemaShape::OneOf(targets) => self.validate_one_of(root_target, path, targets, value),
SchemaShape::AnyOf(targets) => self.validate_any_of(root_target, path, targets, value),
SchemaShape::Not(target) => {
if self.validate_named_target_value(root_target, path, target, value).is_ok() {
Err(SchemaValidationError::not_matched(root_target, path, value, target))
} else {
Ok(())
}
}
SchemaShape::Discriminator { field, branches } => {
self.validate_discriminator(root_target, path, field, branches, value)
}
}
}
fn validate_one_of(
&self,
root_target: &str,
path: &str,
targets: &[String],
value: &Value,
) -> Result<(), SchemaValidationError> {
let mut matches = Vec::new();
for target in targets {
if self.validate_named_target_value(root_target, path, target, value).is_ok() {
matches.push(target.clone());
}
}
match matches.len() {
1 => Ok(()),
0 => Err(SchemaValidationError::one_of_no_match(root_target, path, value, targets)),
_ => Err(SchemaValidationError::one_of_multiple_matches(root_target, path, value, &matches)),
}
}
fn validate_any_of(
&self,
root_target: &str,
path: &str,
targets: &[String],
value: &Value,
) -> Result<(), SchemaValidationError> {
if targets
.iter()
.any(|target| self.validate_named_target_value(root_target, path, target, value).is_ok())
{
Ok(())
} else {
Err(SchemaValidationError::any_of_no_match(root_target, path, value, targets))
}
}
fn validate_discriminator(
&self,
root_target: &str,
path: &str,
field: &str,
branches: &[DiscriminatorBranch],
value: &Value,
) -> Result<(), SchemaValidationError> {
let Value::Object(map) = value else {
return Err(SchemaValidationError::type_mismatch(root_target, path, value, "object"));
};
let field_path = object_field_path(path, field);
let Some(tag_value) = map.get(field) else {
return Err(SchemaValidationError::missing_required_field(
root_target,
field_path.as_str(),
field,
));
};
let Some(branch) = branches.iter().find(|branch| scalar_literal_matches(&branch.tag, tag_value)) else {
return Err(SchemaValidationError::discriminator_unknown_tag(
root_target,
field_path.as_str(),
tag_value,
field,
branches.iter().map(|branch| branch.tag.clone()).collect(),
));
};
self.validate_named_target_value(root_target, path, &branch.target, value)
}
fn validate_object_schema(
&self,
root_target: &str,
path: &str,
object: &ObjectSchema,
value: &Value,
) -> Result<(), SchemaValidationError> {
let Value::Object(map) = value else {
return Err(SchemaValidationError::type_mismatch(
root_target,
path,
value,
"object",
));
};
for field in &object.fields {
let field_path = object_field_path(path, field.name.as_str());
match map.get(field.name.as_str()) {
Some(field_value) => {
self.validate_type_reference_value(
root_target,
field_path.as_str(),
&field.value_type,
field_value,
)?;
}
None if matches!(field.presence, FieldPresence::Optional) => {}
None => {
return Err(SchemaValidationError::missing_required_field(
root_target,
field_path.as_str(),
field.name.as_str(),
));
}
}
}
Ok(())
}
fn validate_type_reference_value(
&self,
root_target: &str,
path: &str,
value_type: &TypeReference,
value: &Value,
) -> Result<(), SchemaValidationError> {
if matches!(value, Value::Null) {
if value_type.nullable {
return Ok(());
}
return Err(SchemaValidationError::type_mismatch(
root_target,
path,
value,
type_reference_label(value_type).as_str(),
));
}
match &value_type.kind {
TypeReferenceKind::Primitive(primitive) => {
validate_primitive_scalar(primitive.keyword(), *primitive, value)
.map_err(|error| SchemaValidationError::from_scalar(path, root_target, primitive.keyword(), error))
}
TypeReferenceKind::Array(item_type) => {
let Value::Array(items) = value else {
return Err(SchemaValidationError::type_mismatch(
root_target,
path,
value,
type_reference_label(value_type).as_str(),
));
};
for (index, item) in items.iter().enumerate() {
let item_path = array_index_path(path, index);
self.validate_type_reference_value(
root_target,
item_path.as_str(),
item_type,
item,
)?;
}
Ok(())
}
TypeReferenceKind::Named(name) => match self.get(name.as_str()) {
Some(_) => self.validate_named_target_value(root_target, path, name.as_str(), value),
None => Err(SchemaValidationError::unknown_target(root_target, path, value)),
},
}
}
fn validate_named_target_value(
&self,
root_target: &str,
path: &str,
target: &str,
value: &Value,
) -> Result<(), SchemaValidationError> {
match self.get(target) {
Some(RegistryEntry::BuiltinScalar(builtin)) => validate_builtin_scalar(
target,
*builtin,
builtin.name(),
value,
)
.map_err(|error| SchemaValidationError::from_scalar(path, root_target, target, error)),
Some(RegistryEntry::Scalar(definition)) => self
.validate_scalar_definition_value(definition, value)
.map_err(|error| SchemaValidationError::from_scalar(path, root_target, target, error)),
Some(RegistryEntry::Schema(definition)) => {
self.validate_schema_definition_value(root_target, path, definition, value)
}
None => Err(SchemaValidationError::unknown_target(root_target, path, value)),
}
}
}
impl Default for SchemaRegistry {
fn default() -> Self {
Self::with_builtins()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SchemaRegistryError {
ReservedName(String),
DuplicateName(String),
UnknownReference { owner: String, reference: String },
InvalidScalarBaseReference { scalar: String, reference: String },
CircularReference(Vec<String>),
}
impl Display for SchemaRegistryError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
SchemaRegistryError::ReservedName(name) => {
write!(f, "{name} is reserved and cannot be redefined")
}
SchemaRegistryError::DuplicateName(name) => {
write!(f, "{name} is already defined")
}
SchemaRegistryError::UnknownReference { owner, reference } => {
write!(f, "{owner} references unknown validation target {reference}")
}
SchemaRegistryError::InvalidScalarBaseReference { scalar, reference } => {
write!(
f,
"scalar {scalar} cannot use schema {reference} as a scalar base"
)
}
SchemaRegistryError::CircularReference(path) => {
write!(
f,
"schema declarations contain a circular reference: {}",
path.join(" -> ")
)
}
}
}
}
impl std::error::Error for SchemaRegistryError {}
#[derive(Debug, Clone, PartialEq)]
pub struct ScalarValidationError {
pub target: String,
pub actual: Value,
pub actual_type: String,
pub reason: ScalarValidationErrorKind,
}
impl ScalarValidationError {
fn new(target: &str, value: &Value, reason: ScalarValidationErrorKind) -> Self {
Self {
target: target.to_string(),
actual: value.clone(),
actual_type: json_value_type_name(value).to_string(),
reason,
}
}
fn unknown_target(target: &str, value: &Value) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::UnknownTarget(target.to_string()),
)
}
fn schema_target(target: &str, value: &Value) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::TargetIsSchema(target.to_string()),
)
}
fn type_mismatch(target: &str, value: &Value, expected: impl Into<String>) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::TypeMismatch {
expected: expected.into(),
},
)
}
fn enum_mismatch(target: &str, value: &Value, expected: Vec<ScalarLiteral>) -> Self {
Self::new(target, value, ScalarValidationErrorKind::EnumMismatch { expected })
}
fn format_mismatch(target: &str, value: &Value, format: impl Into<String>) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::FormatMismatch {
format: format.into(),
},
)
}
fn unknown_format(target: &str, value: &Value, format: impl Into<String>) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::UnknownFormat(format.into()),
)
}
fn length_out_of_range(
target: &str,
value: &Value,
min: Option<usize>,
max: Option<usize>,
actual_length: usize,
) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::LengthOutOfRange {
min,
max,
actual_length,
},
)
}
fn pattern_mismatch(target: &str, value: &Value, pattern: impl Into<String>) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::PatternMismatch {
pattern: pattern.into(),
},
)
}
fn invalid_pattern(
target: &str,
value: &Value,
pattern: impl Into<String>,
message: impl Into<String>,
) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::InvalidPattern {
pattern: pattern.into(),
message: message.into(),
},
)
}
fn range_out_of_range(
target: &str,
value: &Value,
min: Option<NumericValue>,
max: Option<NumericValue>,
actual: NumericValue,
) -> Self {
Self::new(
target,
value,
ScalarValidationErrorKind::RangeOutOfRange { min, max, actual },
)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ScalarValidationErrorKind {
UnknownTarget(String),
TargetIsSchema(String),
TypeMismatch { expected: String },
EnumMismatch { expected: Vec<ScalarLiteral> },
FormatMismatch { format: String },
UnknownFormat(String),
LengthOutOfRange {
min: Option<usize>,
max: Option<usize>,
actual_length: usize,
},
PatternMismatch {
pattern: String,
},
InvalidPattern {
pattern: String,
message: String,
},
RangeOutOfRange {
min: Option<NumericValue>,
max: Option<NumericValue>,
actual: NumericValue,
},
}
impl Display for ScalarValidationError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match &self.reason {
ScalarValidationErrorKind::UnknownTarget(target) => {
write!(f, "Unknown scalar validation target {target}")
}
ScalarValidationErrorKind::TargetIsSchema(target) => {
write!(f, "{target} is a schema, not a scalar validation target")
}
ScalarValidationErrorKind::TypeMismatch { expected } => {
write!(
f,
"{} expected {}, got {} ({})",
self.target,
expected,
self.actual_type,
self.actual
)
}
ScalarValidationErrorKind::EnumMismatch { expected } => {
write!(
f,
"{} expected one of [{}], got {}",
self.target,
scalar_literal_list(expected),
self.actual
)
}
ScalarValidationErrorKind::FormatMismatch { format } => {
write!(f, "{} does not match format {}", self.actual, format)
}
ScalarValidationErrorKind::UnknownFormat(format) => {
write!(f, "{} uses unknown format {}", self.target, format)
}
ScalarValidationErrorKind::LengthOutOfRange {
min,
max,
actual_length,
} => {
write!(
f,
"{} length {} is outside {}",
self.target,
actual_length,
render_usize_bounds(*min, *max)
)
}
ScalarValidationErrorKind::PatternMismatch { pattern } => {
write!(f, "{} does not match pattern /{pattern}/", self.actual)
}
ScalarValidationErrorKind::InvalidPattern { pattern, message } => {
write!(f, "{} has invalid pattern /{pattern}/: {message}", self.target)
}
ScalarValidationErrorKind::RangeOutOfRange { min, max, actual } => {
write!(
f,
"{} value {} is outside {}",
self.target,
render_numeric_value(actual),
render_numeric_bounds(min.as_ref(), max.as_ref())
)
}
}
}
}
impl std::error::Error for ScalarValidationError {}
#[derive(Debug, Clone, PartialEq)]
pub struct SchemaValidationError {
pub target: String,
pub path: String,
pub actual: Value,
pub actual_type: String,
pub reason: SchemaValidationErrorKind,
}
impl SchemaValidationError {
fn new(
target: &str,
path: &str,
value: &Value,
reason: SchemaValidationErrorKind,
) -> Self {
Self {
target: target.to_string(),
path: path.to_string(),
actual: value.clone(),
actual_type: json_value_type_name(value).to_string(),
reason,
}
}
fn unknown_target(target: &str, path: &str, value: &Value) -> Self {
Self::new(
target,
path,
value,
SchemaValidationErrorKind::UnknownTarget(target.to_string()),
)
}
fn scalar_target(target: &str, path: &str, value: &Value) -> Self {
Self::new(
target,
path,
value,
SchemaValidationErrorKind::TargetIsScalar(target.to_string()),
)
}
fn missing_required_field(target: &str, path: &str, field: &str) -> Self {
Self {
target: target.to_string(),
path: path.to_string(),
actual: Value::Null,
actual_type: "missing".to_string(),
reason: SchemaValidationErrorKind::MissingRequiredField {
field: field.to_string(),
},
}
}
fn type_mismatch(target: &str, path: &str, value: &Value, expected: &str) -> Self {
Self::new(
target,
path,
value,
SchemaValidationErrorKind::TypeMismatch {
expected: expected.to_string(),
},
)
}
fn from_scalar(
path: &str,
target: &str,
scalar_target: &str,
error: ScalarValidationError,
) -> Self {
Self {
target: target.to_string(),
path: path.to_string(),
actual: error.actual,
actual_type: error.actual_type,
reason: SchemaValidationErrorKind::ScalarViolation {
target: scalar_target.to_string(),
reason: error.reason,
},
}
}
fn one_of_no_match(target: &str, path: &str, value: &Value, candidates: &[String]) -> Self {
Self::new(
target,
path,
value,
SchemaValidationErrorKind::OneOfNoMatch {
candidates: candidates.to_vec(),
},
)
}
fn one_of_multiple_matches(target: &str, path: &str, value: &Value, matches: &[String]) -> Self {
Self::new(
target,
path,
value,
SchemaValidationErrorKind::OneOfMultipleMatches {
matches: matches.to_vec(),
},
)
}
fn any_of_no_match(target: &str, path: &str, value: &Value, candidates: &[String]) -> Self {
Self::new(
target,
path,
value,
SchemaValidationErrorKind::AnyOfNoMatch {
candidates: candidates.to_vec(),
},
)
}
fn not_matched(target: &str, path: &str, value: &Value, excluded_target: &str) -> Self {
Self::new(
target,
path,
value,
SchemaValidationErrorKind::NotMatched {
target: excluded_target.to_string(),
},
)
}
fn discriminator_unknown_tag(
target: &str,
path: &str,
value: &Value,
field: &str,
expected: Vec<ScalarLiteral>,
) -> Self {
Self::new(
target,
path,
value,
SchemaValidationErrorKind::DiscriminatorUnknownTag {
field: field.to_string(),
expected,
},
)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum SchemaValidationErrorKind {
UnknownTarget(String),
TargetIsScalar(String),
MissingRequiredField { field: String },
TypeMismatch { expected: String },
OneOfNoMatch { candidates: Vec<String> },
OneOfMultipleMatches { matches: Vec<String> },
AnyOfNoMatch { candidates: Vec<String> },
NotMatched { target: String },
DiscriminatorUnknownTag { field: String, expected: Vec<ScalarLiteral> },
ScalarViolation {
target: String,
reason: ScalarValidationErrorKind,
},
}
impl Display for SchemaValidationError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match &self.reason {
SchemaValidationErrorKind::UnknownTarget(target) => {
write!(f, "Unknown schema validation target {target}")
}
SchemaValidationErrorKind::TargetIsScalar(target) => {
write!(f, "{target} is a scalar target, not an object or array schema")
}
SchemaValidationErrorKind::MissingRequiredField { field } => {
write!(f, "{} is missing required field {} at {}", self.target, field, self.path)
}
SchemaValidationErrorKind::TypeMismatch { expected } => {
write!(
f,
"{} expected {} at {}, got {} ({})",
self.target,
expected,
self.path,
self.actual_type,
self.actual
)
}
SchemaValidationErrorKind::OneOfNoMatch { candidates } => {
write!(f, "{} expected exactly one of [{}] at {}, but none matched", self.target, candidates.join(", "), self.path)
}
SchemaValidationErrorKind::OneOfMultipleMatches { matches } => {
write!(f, "{} expected exactly one branch at {}, but multiple matched: [{}]", self.target, self.path, matches.join(", "))
}
SchemaValidationErrorKind::AnyOfNoMatch { candidates } => {
write!(f, "{} expected one or more of [{}] at {}, but none matched", self.target, candidates.join(", "), self.path)
}
SchemaValidationErrorKind::NotMatched { target } => {
write!(f, "{} expected {} to fail at {}, but it matched", self.target, target, self.path)
}
SchemaValidationErrorKind::DiscriminatorUnknownTag { field, expected } => {
write!(f, "{} discriminator field {} at {} did not match any of [{}]", self.target, field, self.path, scalar_literal_list(expected))
}
SchemaValidationErrorKind::ScalarViolation { target, reason } => {
write!(f, "{} failed scalar target {} at {}: {}", self.target, target, self.path, render_scalar_reason(reason))
}
}
}
}
impl std::error::Error for SchemaValidationError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum VisitState {
Visiting,
Visited,
}
fn validate_primitive_scalar(
target: &str,
primitive: PrimitiveType,
value: &Value,
) -> Result<(), ScalarValidationError> {
if primitive_matches(primitive, value) {
Ok(())
} else {
Err(ScalarValidationError::type_mismatch(
target,
value,
primitive.keyword(),
))
}
}
fn validate_scalar_predicate(
target: &str,
predicate: &ScalarPredicate,
value: &Value,
) -> Result<(), ScalarValidationError> {
match predicate {
ScalarPredicate::Enum(expected) => {
if expected.iter().any(|literal| scalar_literal_matches(literal, value)) {
Ok(())
} else {
Err(ScalarValidationError::enum_mismatch(
target,
value,
expected.clone(),
))
}
}
ScalarPredicate::Format(format) => {
let Some(builtin) = BuiltinScalar::parse(format.as_str()) else {
return Err(ScalarValidationError::unknown_format(target, value, format.clone()));
};
validate_builtin_scalar(target, builtin, format.as_str(), value)
}
ScalarPredicate::Length { min, max } => {
let text = require_string_value(target, value)?;
let actual_length = text.chars().count();
if usize_in_bounds(actual_length, *min, *max) {
Ok(())
} else {
Err(ScalarValidationError::length_out_of_range(
target,
value,
*min,
*max,
actual_length,
))
}
}
ScalarPredicate::Pattern(pattern) => {
let text = require_string_value(target, value)?;
let regex = Regex::new(pattern).map_err(|err| {
ScalarValidationError::invalid_pattern(target, value, pattern.clone(), err.to_string())
})?;
if regex.is_match(text) {
Ok(())
} else {
Err(ScalarValidationError::pattern_mismatch(
target,
value,
pattern.clone(),
))
}
}
ScalarPredicate::Range { min, max } => {
let Some(actual) = json_numeric_value(value) else {
return Err(ScalarValidationError::type_mismatch(target, value, "number"));
};
if numeric_in_bounds(&actual, min.as_ref(), max.as_ref()) {
Ok(())
} else {
Err(ScalarValidationError::range_out_of_range(
target,
value,
min.clone(),
max.clone(),
actual,
))
}
}
}
}
fn validate_builtin_scalar(
target: &str,
builtin: BuiltinScalar,
display_format: &str,
value: &Value,
) -> Result<(), ScalarValidationError> {
match builtin {
BuiltinScalar::Number => {
if matches!(value, Value::Number(_)) {
Ok(())
} else {
Err(ScalarValidationError::type_mismatch(target, value, "number"))
}
}
_ => {
let text = require_string_value(target, value)?;
if builtin_scalar_matches(builtin, text) {
Ok(())
} else {
Err(ScalarValidationError::format_mismatch(
target,
value,
display_format.to_string(),
))
}
}
}
}
fn require_string_value<'a>(
target: &str,
value: &'a Value,
) -> Result<&'a str, ScalarValidationError> {
match value {
Value::String(text) => Ok(text.as_str()),
_ => Err(ScalarValidationError::type_mismatch(target, value, "string")),
}
}
fn primitive_matches(primitive: PrimitiveType, value: &Value) -> bool {
match primitive {
PrimitiveType::String => matches!(value, Value::String(_)),
PrimitiveType::Integer => is_integer_value(value),
PrimitiveType::Number => matches!(value, Value::Number(_)),
PrimitiveType::Boolean => matches!(value, Value::Bool(_)),
PrimitiveType::Null => matches!(value, Value::Null),
}
}
fn is_integer_value(value: &Value) -> bool {
matches!(value, Value::Number(number) if number.as_i64().is_some() || number.as_u64().is_some())
}
fn scalar_literal_matches(literal: &ScalarLiteral, value: &Value) -> bool {
match literal {
ScalarLiteral::String(expected) => matches!(value, Value::String(actual) if actual == expected),
ScalarLiteral::Integer(expected) => json_numeric_value(value)
.map(|actual| numeric_values_equal(&actual, &NumericValue::Integer(*expected)))
.unwrap_or(false),
ScalarLiteral::Number(expected) => json_numeric_value(value)
.map(|actual| numeric_values_equal(&actual, &NumericValue::Number(*expected)))
.unwrap_or(false),
ScalarLiteral::Boolean(expected) => matches!(value, Value::Bool(actual) if actual == expected),
ScalarLiteral::Null => matches!(value, Value::Null),
}
}
fn json_numeric_value(value: &Value) -> Option<NumericValue> {
let Value::Number(number) = value else {
return None;
};
if let Some(integer) = number.as_i64() {
return Some(NumericValue::Integer(integer));
}
if let Some(integer) = number.as_u64() {
if let Ok(integer) = i64::try_from(integer) {
return Some(NumericValue::Integer(integer));
}
return Some(NumericValue::Number(integer as f64));
}
number.as_f64().map(NumericValue::Number)
}
fn numeric_values_equal(left: &NumericValue, right: &NumericValue) -> bool {
(numeric_value_to_f64(left) - numeric_value_to_f64(right)).abs() <= f64::EPSILON
}
fn numeric_in_bounds(
actual: &NumericValue,
min: Option<&NumericValue>,
max: Option<&NumericValue>,
) -> bool {
let actual = numeric_value_to_f64(actual);
if let Some(min) = min {
if actual < numeric_value_to_f64(min) {
return false;
}
}
if let Some(max) = max {
if actual > numeric_value_to_f64(max) {
return false;
}
}
true
}
fn numeric_value_to_f64(value: &NumericValue) -> f64 {
match value {
NumericValue::Integer(integer) => *integer as f64,
NumericValue::Number(number) => *number,
}
}
fn json_value_type_name(value: &Value) -> &'static str {
match value {
Value::Null => "null",
Value::Bool(_) => "boolean",
Value::Number(number) if number.as_i64().is_some() || number.as_u64().is_some() => "integer",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
fn usize_in_bounds(actual: usize, min: Option<usize>, max: Option<usize>) -> bool {
if let Some(min) = min {
if actual < min {
return false;
}
}
if let Some(max) = max {
if actual > max {
return false;
}
}
true
}
fn builtin_scalar_matches(builtin: BuiltinScalar, text: &str) -> bool {
match builtin {
BuiltinScalar::Uuid => Uuid::parse_str(text).is_ok(),
BuiltinScalar::Email => email_regex().is_match(text),
BuiltinScalar::Number => false,
BuiltinScalar::Date => NaiveDate::parse_from_str(text, "%Y-%m-%d").is_ok(),
BuiltinScalar::DateTime => {
DateTime::parse_from_rfc3339(text).is_ok()
|| NaiveDateTime::parse_from_str(text, "%Y-%m-%dT%H:%M:%S").is_ok()
|| NaiveDateTime::parse_from_str(text, "%Y-%m-%dT%H:%M:%S%.f").is_ok()
}
BuiltinScalar::Time => {
["%H:%M", "%H:%M:%S", "%H:%M:%S%.f"]
.iter()
.any(|format| NaiveTime::parse_from_str(text, format).is_ok())
}
BuiltinScalar::Uri => text.parse::<Uri>().map(|uri| uri.scheme().is_some()).unwrap_or(false),
}
}
fn email_regex() -> &'static Regex {
static EMAIL_REGEX: OnceLock<Regex> = OnceLock::new();
EMAIL_REGEX.get_or_init(|| {
Regex::new(r"^[A-Za-z0-9.!#$%&'*+/=?^_`{|}~-]+@[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)+$")
.expect("email regex should compile")
})
}
fn scalar_literal_list(values: &[ScalarLiteral]) -> String {
values.iter().map(render_scalar_literal).collect::<Vec<_>>().join(", ")
}
fn render_scalar_literal(value: &ScalarLiteral) -> String {
match value {
ScalarLiteral::String(text) => format!("\"{}\"", text),
ScalarLiteral::Integer(integer) => integer.to_string(),
ScalarLiteral::Number(number) => number.to_string(),
ScalarLiteral::Boolean(boolean) => boolean.to_string(),
ScalarLiteral::Null => "null".to_string(),
}
}
fn render_numeric_value(value: &NumericValue) -> String {
match value {
NumericValue::Integer(integer) => integer.to_string(),
NumericValue::Number(number) => number.to_string(),
}
}
fn render_numeric_bounds(min: Option<&NumericValue>, max: Option<&NumericValue>) -> String {
format!(
"{}..{}",
min.map(render_numeric_value).unwrap_or_default(),
max.map(render_numeric_value).unwrap_or_default()
)
}
fn render_usize_bounds(min: Option<usize>, max: Option<usize>) -> String {
format!(
"{}..{}",
min.map(|value| value.to_string()).unwrap_or_default(),
max.map(|value| value.to_string()).unwrap_or_default()
)
}
fn render_scalar_reason(reason: &ScalarValidationErrorKind) -> String {
match reason {
ScalarValidationErrorKind::UnknownTarget(target) => {
format!("unknown scalar validation target {target}")
}
ScalarValidationErrorKind::TargetIsSchema(target) => {
format!("{target} is a schema, not a scalar validation target")
}
ScalarValidationErrorKind::TypeMismatch { expected } => {
format!("type mismatch, expected {expected}")
}
ScalarValidationErrorKind::EnumMismatch { expected } => {
format!("expected one of [{}]", scalar_literal_list(expected))
}
ScalarValidationErrorKind::FormatMismatch { format } => {
format!("format mismatch for {format}")
}
ScalarValidationErrorKind::UnknownFormat(format) => {
format!("unknown format {format}")
}
ScalarValidationErrorKind::LengthOutOfRange {
min,
max,
actual_length,
} => format!(
"length {} is outside {}",
actual_length,
render_usize_bounds(*min, *max)
),
ScalarValidationErrorKind::PatternMismatch { pattern } => {
format!("pattern mismatch /{pattern}/")
}
ScalarValidationErrorKind::InvalidPattern { pattern, message } => {
format!("invalid pattern /{pattern}/: {message}")
}
ScalarValidationErrorKind::RangeOutOfRange { min, max, actual } => format!(
"value {} is outside {}",
render_numeric_value(actual),
render_numeric_bounds(min.as_ref(), max.as_ref())
),
}
}
fn type_reference_label(value_type: &TypeReference) -> String {
let mut label = match &value_type.kind {
TypeReferenceKind::Primitive(primitive) => primitive.keyword().to_string(),
TypeReferenceKind::Named(name) => name.clone(),
TypeReferenceKind::Array(item_type) => format!("{}[]", type_reference_label(item_type)),
};
if value_type.nullable {
label.push('?');
}
label
}
fn object_field_path(base: &str, field: &str) -> String {
if base == "$" {
format!("$.{field}")
} else {
format!("{base}.{field}")
}
}
fn array_index_path(base: &str, index: usize) -> String {
format!("{base}[{index}]")
}
fn collect_schema_dependencies(
registry: &SchemaRegistry,
shape: &SchemaShape,
dependencies: &mut Vec<String>,
) -> Result<(), SchemaRegistryError> {
match shape {
SchemaShape::Object(object) => {
for field in &object.fields {
collect_type_dependencies(registry, &field.value_type, dependencies)?;
}
}
SchemaShape::Array(item_type) | SchemaShape::Alias(item_type) => {
collect_type_dependencies(registry, item_type, dependencies)?;
}
SchemaShape::AllOf(targets) | SchemaShape::OneOf(targets) | SchemaShape::AnyOf(targets) => {
dependencies.extend(targets.iter().cloned());
}
SchemaShape::Not(target) => dependencies.push(target.clone()),
SchemaShape::Discriminator { branches, .. } => {
dependencies.extend(branches.iter().map(|branch| branch.target.clone()));
}
}
Ok(())
}
fn collect_type_dependencies(
registry: &SchemaRegistry,
value_type: &TypeReference,
dependencies: &mut Vec<String>,
) -> Result<(), SchemaRegistryError> {
match &value_type.kind {
TypeReferenceKind::Primitive(_) => Ok(()),
TypeReferenceKind::Array(item_type) => collect_type_dependencies(registry, item_type, dependencies),
TypeReferenceKind::Named(reference) => match registry.get(reference.as_str()) {
Some(RegistryEntry::BuiltinScalar(_)) => Ok(()),
Some(RegistryEntry::Scalar(_) | RegistryEntry::Schema(_)) => {
dependencies.push(reference.clone());
Ok(())
}
None => Err(SchemaRegistryError::UnknownReference {
owner: reference.clone(),
reference: reference.clone(),
}),
},
}
}
fn visit_node(
name: &str,
graph: &BTreeMap<String, Vec<String>>,
visit_state: &mut BTreeMap<String, VisitState>,
stack: &mut Vec<String>,
) -> Result<(), SchemaRegistryError> {
visit_state.insert(name.to_string(), VisitState::Visiting);
stack.push(name.to_string());
if let Some(neighbors) = graph.get(name) {
for neighbor in neighbors {
match visit_state.get(neighbor.as_str()) {
Some(VisitState::Visiting) => {
let start = stack
.iter()
.position(|entry| entry == neighbor)
.unwrap_or_default();
let mut cycle = stack[start..].to_vec();
cycle.push(neighbor.clone());
return Err(SchemaRegistryError::CircularReference(cycle));
}
Some(VisitState::Visited) => {}
None => visit_node(neighbor.as_str(), graph, visit_state, stack)?,
}
}
}
stack.pop();
visit_state.insert(name.to_string(), VisitState::Visited);
Ok(())
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::{
BuiltinScalar, FieldPresence, NumericValue, ObjectSchema, PrimitiveType, ScalarBase,
ScalarDefinition, ScalarExpression, ScalarLiteral, ScalarPredicate, ScalarValidationErrorKind,
SchemaDefinition, SchemaField, SchemaRegistry, SchemaRegistryError, SchemaShape,
SchemaValidationErrorKind, TypeReference, TypeReferenceKind, is_reserved_name,
};
#[test]
fn registry_starts_with_builtin_scalars() {
let registry = SchemaRegistry::default();
assert_eq!(registry.len(), BuiltinScalar::all().len());
assert!(matches!(
registry.get("UUID"),
Some(super::RegistryEntry::BuiltinScalar(BuiltinScalar::Uuid))
));
assert!(matches!(
registry.get("EMAIL"),
Some(super::RegistryEntry::BuiltinScalar(BuiltinScalar::Email))
));
}
#[test]
fn reserved_names_cover_builtin_and_primitive_types() {
assert!(is_reserved_name("UUID"));
assert!(is_reserved_name("NUMBER"));
assert!(is_reserved_name("DATE_TIME"));
assert!(is_reserved_name("string"));
assert!(is_reserved_name("boolean"));
assert!(!is_reserved_name("User"));
}
#[test]
fn registry_rejects_reserved_names() {
let mut registry = SchemaRegistry::default();
let definition = ScalarDefinition::new(
"UUID",
ScalarExpression::new(
Some(ScalarBase::Primitive(PrimitiveType::String)),
vec![ScalarPredicate::Enum(vec![ScalarLiteral::String(
"value".to_string(),
)])],
),
);
let error = registry.register_scalar(definition).unwrap_err();
assert_eq!(error, SchemaRegistryError::ReservedName("UUID".to_string()));
}
#[test]
fn registry_rejects_duplicate_user_names() {
let mut registry = SchemaRegistry::default();
let status = ScalarDefinition::new(
"Status",
ScalarExpression::new(
Some(ScalarBase::Primitive(PrimitiveType::String)),
vec![ScalarPredicate::Enum(vec![
ScalarLiteral::String("queued".to_string()),
ScalarLiteral::String("done".to_string()),
])],
),
);
registry.register_scalar(status.clone()).unwrap();
let error = registry.register_scalar(status).unwrap_err();
assert_eq!(error, SchemaRegistryError::DuplicateName("Status".to_string()));
}
#[test]
fn schema_model_preserves_field_presence_and_nullability() {
let schema = SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![
SchemaField::required("id", TypeReference::named("UUID")),
SchemaField::optional("birthday", TypeReference::named("DATE").nullable()),
SchemaField::required(
"favoriteFoods",
TypeReference::array(TypeReference::named("Food")),
),
])),
);
let SchemaShape::Object(object) = schema.shape else {
panic!("expected object schema");
};
assert!(object.open);
assert_eq!(object.fields[0].presence, FieldPresence::Required);
assert_eq!(object.fields[1].presence, FieldPresence::Optional);
assert!(object.fields[1].value_type.nullable);
assert_eq!(
object.fields[2].value_type.kind,
TypeReferenceKind::Array(Box::new(TypeReference::named("Food")))
);
}
#[test]
fn validation_allows_forward_scalar_references() {
let mut registry = SchemaRegistry::default();
registry
.register_scalar(ScalarDefinition::new(
"HANDLE",
ScalarExpression::new(
Some(ScalarBase::Named("WORD".to_string())),
vec![ScalarPredicate::Length {
min: Some(3),
max: Some(24),
}],
),
))
.unwrap();
registry
.register_scalar(ScalarDefinition::new(
"WORD",
ScalarExpression::new(
Some(ScalarBase::Primitive(PrimitiveType::String)),
vec![ScalarPredicate::Pattern("^[a-z]+$".to_string())],
),
))
.unwrap();
assert!(registry.validate_references().is_ok());
}
#[test]
fn validation_rejects_unknown_named_references() {
let mut registry = SchemaRegistry::default();
registry
.register_schema(SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![SchemaField::required(
"id",
TypeReference::named("AccountId"),
)])),
))
.unwrap();
let error = registry.validate_references().unwrap_err();
assert_eq!(
error,
SchemaRegistryError::UnknownReference {
owner: "User".to_string(),
reference: "AccountId".to_string(),
}
);
}
#[test]
fn validation_rejects_scalar_base_reference_to_schema() {
let mut registry = SchemaRegistry::default();
registry
.register_schema(SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![SchemaField::required(
"id",
TypeReference::named("UUID"),
)])),
))
.unwrap();
registry
.register_scalar(ScalarDefinition::new(
"UserId",
ScalarExpression::new(Some(ScalarBase::Named("User".to_string())), vec![]),
))
.unwrap();
let error = registry.validate_references().unwrap_err();
assert_eq!(
error,
SchemaRegistryError::InvalidScalarBaseReference {
scalar: "UserId".to_string(),
reference: "User".to_string(),
}
);
}
#[test]
fn validation_rejects_cyclic_schema_references() {
let mut registry = SchemaRegistry::default();
registry
.register_schema(SchemaDefinition::new(
"Address",
SchemaShape::Object(ObjectSchema::open(vec![SchemaField::required(
"user",
TypeReference::named("User"),
)])),
))
.unwrap();
registry
.register_schema(SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![SchemaField::required(
"address",
TypeReference::named("Address"),
)])),
))
.unwrap();
let error = registry.validate_references().unwrap_err();
assert_eq!(
error,
SchemaRegistryError::CircularReference(vec![
"Address".to_string(),
"User".to_string(),
"Address".to_string(),
])
);
}
#[test]
fn built_in_scalar_validation_accepts_common_formats() {
let registry = SchemaRegistry::default();
assert!(registry
.validate_scalar_target("UUID", &json!("550e8400-e29b-41d4-a716-446655440000"))
.is_ok());
assert!(registry
.validate_scalar_target("EMAIL", &json!("alice@example.com"))
.is_ok());
assert!(registry.validate_scalar_target("NUMBER", &json!(1)).is_ok());
assert!(registry.validate_scalar_target("NUMBER", &json!(3.45)).is_ok());
assert!(registry.validate_scalar_target("NUMBER", &json!(-3.2)).is_ok());
assert!(registry.validate_scalar_target("NUMBER", &json!(-5)).is_ok());
assert!(registry
.validate_scalar_target("DATE", &json!("2026-05-02"))
.is_ok());
assert!(registry
.validate_scalar_target("DATE_TIME", &json!("2026-05-02T10:15:30Z"))
.is_ok());
assert!(registry
.validate_scalar_target("TIME", &json!("10:15:30"))
.is_ok());
assert!(registry
.validate_scalar_target("URI", &json!("https://example.com/users/1"))
.is_ok());
}
#[test]
fn built_in_scalar_validation_rejects_wrong_types() {
let registry = SchemaRegistry::default();
let error = registry.validate_scalar_target("UUID", &json!(123)).unwrap_err();
assert_eq!(error.actual_type, "integer");
assert_eq!(
error.reason,
ScalarValidationErrorKind::TypeMismatch {
expected: "string".to_string(),
}
);
let number_error = registry
.validate_scalar_target("NUMBER", &json!("3.45"))
.unwrap_err();
assert_eq!(number_error.actual_type, "string");
assert_eq!(
number_error.reason,
ScalarValidationErrorKind::TypeMismatch {
expected: "number".to_string(),
}
);
}
#[test]
fn format_predicate_reuses_builtin_scalar_validation() {
let mut registry = SchemaRegistry::default();
registry
.register_scalar(ScalarDefinition::new(
"UserId",
ScalarExpression::new(
Some(ScalarBase::Primitive(PrimitiveType::String)),
vec![ScalarPredicate::Format("uuid".to_string())],
),
))
.unwrap();
registry.validate_references().unwrap();
assert!(registry
.validate_scalar_target("UserId", &json!("550e8400-e29b-41d4-a716-446655440000"))
.is_ok());
let error = registry
.validate_scalar_target("UserId", &json!("not-a-uuid"))
.unwrap_err();
assert_eq!(
error.reason,
ScalarValidationErrorKind::FormatMismatch {
format: "uuid".to_string(),
}
);
}
#[test]
fn scalar_predicates_validate_enum_length_pattern_and_range() {
let mut registry = SchemaRegistry::default();
registry
.register_scalar(ScalarDefinition::new(
"Status",
ScalarExpression::new(
None,
vec![ScalarPredicate::Enum(vec![
ScalarLiteral::String("queued".to_string()),
ScalarLiteral::String("running".to_string()),
ScalarLiteral::String("done".to_string()),
])],
),
))
.unwrap();
registry
.register_scalar(ScalarDefinition::new(
"Handle",
ScalarExpression::new(
Some(ScalarBase::Primitive(PrimitiveType::String)),
vec![
ScalarPredicate::Length {
min: Some(3),
max: Some(24),
},
ScalarPredicate::Pattern("^[a-z][a-z0-9_]*$".to_string()),
],
),
))
.unwrap();
registry
.register_scalar(ScalarDefinition::new(
"Age",
ScalarExpression::new(
Some(ScalarBase::Primitive(PrimitiveType::Integer)),
vec![ScalarPredicate::Range {
min: Some(NumericValue::Integer(0)),
max: Some(NumericValue::Integer(130)),
}],
),
))
.unwrap();
registry.validate_references().unwrap();
assert!(registry.validate_scalar_target("Status", &json!("queued")).is_ok());
assert!(registry.validate_scalar_target("Handle", &json!("alice_1")).is_ok());
assert!(registry.validate_scalar_target("Age", &json!(42)).is_ok());
let status_error = registry
.validate_scalar_target("Status", &json!("blocked"))
.unwrap_err();
assert!(matches!(
status_error.reason,
ScalarValidationErrorKind::EnumMismatch { .. }
));
let handle_error = registry
.validate_scalar_target("Handle", &json!("ab"))
.unwrap_err();
assert_eq!(
handle_error.reason,
ScalarValidationErrorKind::LengthOutOfRange {
min: Some(3),
max: Some(24),
actual_length: 2,
}
);
let age_error = registry.validate_scalar_target("Age", &json!(131)).unwrap_err();
assert_eq!(
age_error.reason,
ScalarValidationErrorKind::RangeOutOfRange {
min: Some(NumericValue::Integer(0)),
max: Some(NumericValue::Integer(130)),
actual: NumericValue::Integer(131),
}
);
}
#[test]
fn scalar_validation_reports_unknown_format_predicates() {
let mut registry = SchemaRegistry::default();
registry
.register_scalar(ScalarDefinition::new(
"Token",
ScalarExpression::new(
Some(ScalarBase::Primitive(PrimitiveType::String)),
vec![ScalarPredicate::Format("custom".to_string())],
),
))
.unwrap();
registry.validate_references().unwrap();
let error = registry
.validate_scalar_target("Token", &json!("value"))
.unwrap_err();
assert_eq!(
error.reason,
ScalarValidationErrorKind::UnknownFormat("custom".to_string())
);
}
#[test]
fn scalar_validation_reports_invalid_pattern_predicates() {
let mut registry = SchemaRegistry::default();
registry
.register_scalar(ScalarDefinition::new(
"BrokenWord",
ScalarExpression::new(
Some(ScalarBase::Primitive(PrimitiveType::String)),
vec![ScalarPredicate::Pattern("[".to_string())],
),
))
.unwrap();
registry.validate_references().unwrap();
let error = registry
.validate_scalar_target("BrokenWord", &json!("alice"))
.unwrap_err();
assert!(matches!(
error.reason,
ScalarValidationErrorKind::InvalidPattern { .. }
));
}
#[test]
fn schema_validation_accepts_required_optional_nullable_and_extra_fields() {
let mut registry = SchemaRegistry::default();
registry
.register_scalar(ScalarDefinition::new(
"Food",
ScalarExpression::new(
None,
vec![ScalarPredicate::Enum(vec![
ScalarLiteral::String("pizza".to_string()),
ScalarLiteral::String("taco".to_string()),
])],
),
))
.unwrap();
registry
.register_schema(SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![
SchemaField::required("id", TypeReference::named("UUID")),
SchemaField::optional("birthday", TypeReference::named("DATE").nullable()),
SchemaField::optional("favoriteFood", TypeReference::named("Food")),
])),
))
.unwrap();
registry.validate_references().unwrap();
assert!(registry
.validate_schema_target(
"User",
&json!({
"id": "550e8400-e29b-41d4-a716-446655440000",
"birthday": null,
"favoriteFood": "pizza",
"nickname": "ben"
})
)
.is_ok());
assert!(registry
.validate_schema_target(
"User",
&json!({
"id": "550e8400-e29b-41d4-a716-446655440000"
})
)
.is_ok());
}
#[test]
fn schema_validation_rejects_missing_required_field() {
let mut registry = SchemaRegistry::default();
registry
.register_schema(SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![SchemaField::required(
"id",
TypeReference::named("UUID"),
)])),
))
.unwrap();
registry.validate_references().unwrap();
let error = registry.validate_schema_target("User", &json!({})).unwrap_err();
assert_eq!(error.path, "$.id");
assert_eq!(error.actual_type, "missing");
assert_eq!(
error.reason,
SchemaValidationErrorKind::MissingRequiredField {
field: "id".to_string(),
}
);
}
#[test]
fn schema_validation_rejects_non_nullable_null_fields() {
let mut registry = SchemaRegistry::default();
registry
.register_schema(SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![SchemaField::required(
"id",
TypeReference::named("UUID"),
)])),
))
.unwrap();
registry.validate_references().unwrap();
let error = registry
.validate_schema_target("User", &json!({ "id": null }))
.unwrap_err();
assert_eq!(error.path, "$.id");
assert!(matches!(
error.reason,
SchemaValidationErrorKind::TypeMismatch { .. }
));
}
#[test]
fn schema_validation_validates_nested_named_schemas() {
let mut registry = SchemaRegistry::default();
registry
.register_schema(SchemaDefinition::new(
"Address",
SchemaShape::Object(ObjectSchema::open(vec![
SchemaField::required("city", TypeReference::primitive(PrimitiveType::String)),
SchemaField::required("postalCode", TypeReference::primitive(PrimitiveType::String)),
])),
))
.unwrap();
registry
.register_schema(SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![
SchemaField::required("id", TypeReference::named("UUID")),
SchemaField::required("address", TypeReference::named("Address")),
])),
))
.unwrap();
registry.validate_references().unwrap();
assert!(registry
.validate_schema_target(
"User",
&json!({
"id": "550e8400-e29b-41d4-a716-446655440000",
"address": {
"city": "Austin",
"postalCode": "78701"
}
})
)
.is_ok());
let error = registry
.validate_schema_target(
"User",
&json!({
"id": "550e8400-e29b-41d4-a716-446655440000",
"address": {
"city": "Austin"
}
})
)
.unwrap_err();
assert_eq!(error.path, "$.address.postalCode");
assert_eq!(
error.reason,
SchemaValidationErrorKind::MissingRequiredField {
field: "postalCode".to_string(),
}
);
}
#[test]
fn schema_validation_validates_root_array_schemas() {
let mut registry = SchemaRegistry::default();
registry
.register_schema(SchemaDefinition::new(
"User",
SchemaShape::Object(ObjectSchema::open(vec![SchemaField::required(
"id",
TypeReference::named("UUID"),
)])),
))
.unwrap();
registry
.register_schema(SchemaDefinition::new(
"Users",
SchemaShape::Array(TypeReference::array(TypeReference::named("User"))),
))
.unwrap();
registry.validate_references().unwrap();
assert!(registry
.validate_schema_target(
"Users",
&json!([
{ "id": "550e8400-e29b-41d4-a716-446655440000" },
{ "id": "123e4567-e89b-12d3-a456-426614174000" }
])
)
.is_ok());
let error = registry
.validate_schema_target(
"Users",
&json!([
{ "id": "550e8400-e29b-41d4-a716-446655440000" },
{ "id": "invalid" }
])
)
.unwrap_err();
assert_eq!(error.path, "$[1].id");
assert!(matches!(
error.reason,
SchemaValidationErrorKind::ScalarViolation { .. }
));
}
#[test]
fn schema_validation_validates_arrays_of_named_scalars() {
let mut registry = SchemaRegistry::default();
registry
.register_scalar(ScalarDefinition::new(
"Food",
ScalarExpression::new(
None,
vec![ScalarPredicate::Enum(vec![
ScalarLiteral::String("pizza".to_string()),
ScalarLiteral::String("taco".to_string()),
])],
),
))
.unwrap();
registry
.register_schema(SchemaDefinition::new(
"MealPlan",
SchemaShape::Object(ObjectSchema::open(vec![SchemaField::required(
"foods",
TypeReference::array(TypeReference::named("Food")),
)])),
))
.unwrap();
registry.validate_references().unwrap();
assert!(registry
.validate_schema_target("MealPlan", &json!({ "foods": ["pizza", "taco"] }))
.is_ok());
let error = registry
.validate_schema_target("MealPlan", &json!({ "foods": ["pizza", "sushi"] }))
.unwrap_err();
assert_eq!(error.path, "$.foods[1]");
assert!(matches!(
error.reason,
SchemaValidationErrorKind::ScalarViolation { .. }
));
}
}