Skip to main content

jtd/
validate.rs

1use crate::{Schema, Type};
2use chrono::DateTime;
3use serde_json::Value;
4use std::borrow::Cow;
5use thiserror::Error;
6
7/// Options you can pass to [`validate()`].
8#[derive(Clone, Debug, Default, PartialEq, Eq)]
9pub struct ValidateOptions {
10    max_depth: usize,
11    max_errors: usize,
12}
13
14impl ValidateOptions {
15    /// Construct a new set of options with all default values.
16    ///
17    /// Equivalent to [`Default::default()`] or calling `with_max_depth(0)` and
18    /// `with_max_errors(0)`.
19    pub fn new() -> Self {
20        Self::default()
21    }
22
23    /// Sets the maximum "depth" of references to following in [`validate()`].
24    ///
25    /// This option exists to handle the possibility of an infinite loop in a
26    /// schema. For instance, this is a valid schema:
27    ///
28    /// ```json
29    /// { "ref": "loop", "definitions": { "loop": { "ref": "loop" }}}
30    /// ```
31    ///
32    /// There are good reasons to sometimes have self-referential schemas -- for
33    /// instance, to describe a recursive data structure. What `with_max_depth`
34    /// does is limit how many recursive `ref` nodes will be followed before
35    /// [`validate()`] errors with [`ValidateError::MaxDepthExceeded`].
36    ///
37    /// The default max depth of `0` indicates that no max depth should be
38    /// implemented. An infinite `ref` loop will eventually overflow the stack
39    /// during [`validate()`].
40    pub fn with_max_depth(mut self, max_depth: usize) -> Self {
41        self.max_depth = max_depth;
42        self
43    }
44
45    /// Sets the maximum number of validation errors to return from
46    /// [`validate()`].
47    ///
48    /// This option exists as an optimization for [`validate()`]. If all you
49    /// care about is whether an input is valid, then consider using
50    /// `set_max_errors(1)` to have [`validate()`] immediately return after
51    /// finding a validation error.
52    ///
53    /// The default max errors of `0` indicates that all errors will be
54    /// returned.
55    pub fn with_max_errors(mut self, max_errors: usize) -> Self {
56        self.max_errors = max_errors;
57        self
58    }
59}
60
61/// Errors that may arise from [`validate()`].
62#[derive(Clone, Debug, PartialEq, Eq, Error)]
63pub enum ValidateError {
64    /// The maximum depth, as specified by [`ValidateOptions::with_max_depth`],
65    /// was exceeded.
66    ///
67    /// ```
68    /// use serde_json::json;
69    /// use jtd::{Schema, ValidateError, ValidateOptions};
70    ///
71    /// let schema = Schema::from_serde_schema(
72    ///     serde_json::from_value(json!({
73    ///         "definitions": {
74    ///             "loop": { "ref": "loop" },
75    ///         },
76    ///         "ref": "loop",
77    ///     }))
78    ///     .unwrap(),
79    /// )
80    /// .unwrap();
81    ///
82    /// assert_eq!(
83    ///     ValidateError::MaxDepthExceeded,
84    ///     jtd::validate(
85    ///         &schema,
86    ///         &json!(null),
87    ///         ValidateOptions::new().with_max_depth(3)
88    ///     )
89    ///     .unwrap_err()
90    /// )
91    /// ```
92    #[error("max depth exceeded")]
93    MaxDepthExceeded,
94}
95
96/// A single validation error returned by [`validate()`].
97///
98/// This type has *Indicator* at the end of its name to emphasize that it is
99/// *not* a Rust error. It is an ordinary struct, and corresponds to the concept
100/// of a validation error indicator in the JSON Typedef specification. See
101/// [RFC8927, Section 3.2](https://tools.ietf.org/html/rfc8927#section-3.2).
102///
103/// In order to avoid unncessary allocations, this struct uses
104/// [`std::borrow::Cow`] instead of [`String`] directly. If you would prefer not
105/// to have to deal with that, and are OK with copying all the data out of this
106/// struct, then use
107/// [`into_owned_paths`][`ValidationErrorIndicator::into_owned_paths`] to
108/// convert instances of this type into a pair of plain old `Vec<String>`s.
109#[derive(Clone, Debug, PartialEq, Eq)]
110pub struct ValidationErrorIndicator<'a> {
111    /// A path to the part of the instance that was rejected.
112    pub instance_path: Vec<Cow<'a, str>>,
113
114    /// A path to the part of the schema that rejected the instance.
115    pub schema_path: Vec<Cow<'a, str>>,
116}
117
118impl<'a> ValidationErrorIndicator<'a> {
119    /// Converts this struct into a `instance_path` and `schema_path` pair.
120    ///
121    /// This is a convenience function for those who don't want to manipulate
122    /// [`std::borrow::Cow`].
123    ///
124    /// ```
125    /// use std::borrow::Cow;
126    ///
127    /// let indicator = jtd::ValidationErrorIndicator {
128    ///     instance_path: vec![Cow::Borrowed("foo")],
129    ///     schema_path: vec![Cow::Owned("bar".to_owned())],
130    /// };
131    ///
132    /// let (instance_path, schema_path) = indicator.into_owned_paths();
133    /// assert_eq!(vec!["foo".to_owned()], instance_path);
134    /// assert_eq!(vec!["bar".to_owned()], schema_path);
135    /// ```
136    pub fn into_owned_paths(self) -> (Vec<String>, Vec<String>) {
137        (
138            self.instance_path
139                .into_iter()
140                .map(|c| c.into_owned())
141                .collect(),
142            self.schema_path
143                .into_iter()
144                .map(|c| c.into_owned())
145                .collect(),
146        )
147    }
148}
149
150/// Validates a schema against an instance, returning a set of error indicators.
151///
152/// In keeping with the conventions of RFC8927, the "input" JSON -- the second
153/// argument to this function -- is called an *instance*.
154///
155/// The set of error indicators returned is specified by the JSON Typedef
156/// specification. The ordering of those errors is not defined by the JSON
157/// Typedef specification, and is subject to change in a future version of this
158/// crate.
159///
160/// ```
161/// use jtd::{Schema, ValidationErrorIndicator, ValidateOptions};
162/// use serde_json::json;
163///
164/// let schema = Schema::from_serde_schema(
165///     serde_json::from_value(json!({
166///         "elements": {
167///             "type": "uint8"
168///         }
169///     })).unwrap()).unwrap();
170///
171/// let instance = serde_json::json!([ "a", "b", "c" ]);
172///
173/// // By default, jtd::validate() will return all errors in the input.
174/// let validate_options = ValidateOptions::new();
175/// let errors = jtd::validate(&schema, &instance, validate_options).unwrap();
176/// assert_eq!(
177///     vec![
178///         ValidationErrorIndicator {
179///             instance_path: vec!["0".to_owned().into()],
180///             schema_path: vec!["elements".into(), "type".into()],
181///         },
182///         ValidationErrorIndicator {
183///             instance_path: vec!["1".to_owned().into()],
184///             schema_path: vec!["elements".into(), "type".into()],
185///         },
186///         ValidationErrorIndicator {
187///             instance_path: vec!["2".to_owned().into()],
188///             schema_path: vec!["elements".into(), "type".into()],
189///         },
190///     ],
191///     errors,
192/// );
193///
194/// // If you don't care about validation errors beyond a certain amount of
195/// // errors, use with_max_errors on the ValidateOptions you pass to validate.
196/// let validate_options = ValidateOptions::new().with_max_errors(1);
197/// let errors = jtd::validate(&schema, &instance, validate_options).unwrap();
198/// assert_eq!(
199///     vec![
200///         ValidationErrorIndicator {
201///             instance_path: vec!["0".to_owned().into()],
202///             schema_path: vec!["elements".into(), "type".into()],
203///         },
204///     ],
205///     errors,
206/// );
207/// ```
208///
209/// # Security considerations
210///
211/// (This note is copied from [the top-level documentation][`crate`], because
212/// it's important.)
213///
214/// If you're running [`validate()`] with untrusted schemas (untrusted inputs is
215/// fine), then be aware of this security consideration from RFC 8927:
216///
217/// > Implementations that evaluate user-inputted schemas SHOULD implement
218/// > mechanisms to detect and abort circular references that might cause a
219/// > naive implementation to go into an infinite loop.  Without such
220/// > mechanisms, implementations may be vulnerable to denial-of-service
221/// > attacks.
222///
223/// This crate supports that "detect and abort" mechanism via
224/// [`ValidateOptions::with_max_depth`]. Please see that documentation if you're
225/// validating data against untrusted schemas.
226pub fn validate<'a>(
227    schema: &'a Schema,
228    instance: &'a Value,
229    options: ValidateOptions,
230) -> Result<Vec<ValidationErrorIndicator<'a>>, ValidateError> {
231    let mut vm = Vm::new(schema, options);
232
233    match vm.validate(schema, None, instance) {
234        Ok(()) | Err(VmValidateError::MaxErrorsReached) => Ok(vm.into_errors()),
235        Err(VmValidateError::MaxDepthExceeded) => Err(ValidateError::MaxDepthExceeded),
236    }
237}
238
239struct Vm<'a> {
240    root: &'a Schema,
241    options: ValidateOptions,
242    instance_tokens: Vec<Cow<'a, str>>,
243    schema_tokens: Vec<Vec<Cow<'a, str>>>,
244    errors: Vec<ValidationErrorIndicator<'a>>,
245}
246
247enum VmValidateError {
248    MaxErrorsReached,
249    MaxDepthExceeded,
250}
251
252impl<'a> Vm<'a> {
253    pub fn new(schema: &'a Schema, options: ValidateOptions) -> Self {
254        Self {
255            root: schema,
256            options,
257            instance_tokens: vec![],
258            schema_tokens: vec![vec![]],
259            errors: vec![],
260        }
261    }
262
263    pub fn into_errors(self) -> Vec<ValidationErrorIndicator<'a>> {
264        self.errors
265    }
266
267    pub fn validate(
268        &mut self,
269        schema: &'a Schema,
270        parent_tag: Option<&'a str>,
271        instance: &'a Value,
272    ) -> Result<(), VmValidateError> {
273        if instance.is_null() && schema.nullable() {
274            return Ok(());
275        }
276
277        match schema {
278            Schema::Empty { .. } => {}
279            Schema::Ref { ref_, .. } => {
280                self.schema_tokens
281                    .push(vec!["definitions".into(), ref_.into()]);
282
283                if self.schema_tokens.len() == self.options.max_depth {
284                    return Err(VmValidateError::MaxDepthExceeded);
285                }
286
287                self.validate(&self.root.definitions()[ref_], None, instance)?;
288                self.schema_tokens.pop();
289            }
290            Schema::Type { type_, .. } => {
291                self.push_schema_token("type");
292
293                match type_ {
294                    Type::Boolean => {
295                        if !instance.is_boolean() {
296                            self.push_error()?;
297                        }
298                    }
299                    Type::Float32 | Type::Float64 => {
300                        if !instance.is_f64() && !instance.is_i64() {
301                            self.push_error()?;
302                        }
303                    }
304                    Type::Int8 => self.validate_int(instance, -128.0, 127.0)?,
305                    Type::Uint8 => self.validate_int(instance, 0.0, 255.0)?,
306                    Type::Int16 => self.validate_int(instance, -32768.0, 32767.0)?,
307                    Type::Uint16 => self.validate_int(instance, 0.0, 65535.0)?,
308                    Type::Int32 => self.validate_int(instance, -2147483648.0, 2147483647.0)?,
309                    Type::Uint32 => self.validate_int(instance, 0.0, 4294967295.0)?,
310                    Type::String => {
311                        if !instance.is_string() {
312                            self.push_error()?;
313                        }
314                    }
315                    Type::Timestamp => {
316                        if let Some(s) = instance.as_str() {
317                            if DateTime::parse_from_rfc3339(s).is_err() {
318                                self.push_error()?;
319                            }
320                        } else {
321                            self.push_error()?;
322                        }
323                    }
324                };
325
326                self.pop_schema_token();
327            }
328            Schema::Enum { enum_, .. } => {
329                self.push_schema_token("enum");
330                if let Some(s) = instance.as_str() {
331                    if !enum_.contains(s) {
332                        self.push_error()?;
333                    }
334                } else {
335                    self.push_error()?;
336                }
337                self.pop_schema_token();
338            }
339            Schema::Elements { elements, .. } => {
340                self.push_schema_token("elements");
341
342                if let Some(arr) = instance.as_array() {
343                    for (i, sub_instance) in arr.iter().enumerate() {
344                        // This is the only case where we push a non-Borrowed
345                        // instance token. We handle pushing to instance_tokens
346                        // manually here, to keep push_instance_token simpler.
347                        self.instance_tokens.push(Cow::Owned(i.to_string()));
348
349                        self.validate(elements, None, sub_instance)?;
350                        self.pop_instance_token();
351                    }
352                } else {
353                    self.push_error()?;
354                }
355
356                self.pop_schema_token();
357            }
358            Schema::Properties {
359                properties,
360                optional_properties,
361                properties_is_present,
362                additional_properties,
363                ..
364            } => {
365                if let Some(obj) = instance.as_object() {
366                    self.push_schema_token("properties");
367                    for (name, sub_schema) in properties {
368                        self.push_schema_token(name);
369                        if let Some(sub_instance) = obj.get(name) {
370                            self.push_instance_token(name);
371                            self.validate(sub_schema, None, sub_instance)?;
372                            self.pop_instance_token();
373                        } else {
374                            self.push_error()?;
375                        }
376                        self.pop_schema_token();
377                    }
378                    self.pop_schema_token();
379
380                    self.push_schema_token("optionalProperties");
381                    for (name, sub_schema) in optional_properties {
382                        self.push_schema_token(name);
383                        if let Some(sub_instance) = obj.get(name) {
384                            self.push_instance_token(name);
385                            self.validate(sub_schema, None, sub_instance)?;
386                            self.pop_instance_token();
387                        }
388                        self.pop_schema_token();
389                    }
390                    self.pop_schema_token();
391
392                    if !*additional_properties {
393                        for name in obj.keys() {
394                            if parent_tag != Some(name)
395                                && !properties.contains_key(name)
396                                && !optional_properties.contains_key(name)
397                            {
398                                self.push_instance_token(name);
399                                self.push_error()?;
400                                self.pop_instance_token();
401                            }
402                        }
403                    }
404                } else {
405                    self.push_schema_token(if *properties_is_present {
406                        "properties"
407                    } else {
408                        "optionalProperties"
409                    });
410                    self.push_error()?;
411                    self.pop_schema_token();
412                }
413            }
414            Schema::Values { values, .. } => {
415                self.push_schema_token("values");
416
417                if let Some(obj) = instance.as_object() {
418                    for (name, sub_instance) in obj {
419                        self.push_instance_token(name);
420                        self.validate(values, None, sub_instance)?;
421                        self.pop_instance_token();
422                    }
423                } else {
424                    self.push_error()?;
425                }
426
427                self.pop_schema_token();
428            }
429            Schema::Discriminator {
430                discriminator,
431                mapping,
432                ..
433            } => {
434                if let Some(obj) = instance.as_object() {
435                    if let Some(tag) = obj.get(discriminator) {
436                        if let Some(tag) = tag.as_str() {
437                            if let Some(schema) = mapping.get(tag) {
438                                self.push_schema_token("mapping");
439                                self.push_schema_token(tag);
440                                self.validate(schema, Some(discriminator), instance)?;
441                                self.pop_schema_token();
442                                self.pop_schema_token();
443                            } else {
444                                self.push_schema_token("mapping");
445                                self.push_instance_token(discriminator);
446                                self.push_error()?;
447                                self.pop_instance_token();
448                                self.pop_schema_token();
449                            }
450                        } else {
451                            self.push_schema_token("discriminator");
452                            self.push_instance_token(discriminator);
453                            self.push_error()?;
454                            self.pop_instance_token();
455                            self.pop_schema_token();
456                        }
457                    } else {
458                        self.push_schema_token("discriminator");
459                        self.push_error()?;
460                        self.pop_schema_token();
461                    }
462                } else {
463                    self.push_schema_token("discriminator");
464                    self.push_error()?;
465                    self.pop_schema_token();
466                }
467            }
468        };
469
470        Ok(())
471    }
472
473    fn validate_int(
474        &mut self,
475        instance: &Value,
476        min: f64,
477        max: f64,
478    ) -> Result<(), VmValidateError> {
479        if let Some(val) = instance.as_f64() {
480            if val.fract() != 0.0 || val < min || val > max {
481                self.push_error()
482            } else {
483                Ok(())
484            }
485        } else {
486            self.push_error()
487        }
488    }
489
490    fn push_error(&mut self) -> Result<(), VmValidateError> {
491        self.errors.push(ValidationErrorIndicator {
492            instance_path: self.instance_tokens.clone(),
493            schema_path: self.schema_tokens.last().unwrap().clone(),
494        });
495
496        if self.options.max_errors == self.errors.len() {
497            Err(VmValidateError::MaxErrorsReached)
498        } else {
499            Ok(())
500        }
501    }
502
503    fn push_schema_token(&mut self, token: &'a str) {
504        self.schema_tokens.last_mut().unwrap().push(token.into());
505    }
506
507    fn pop_schema_token(&mut self) {
508        self.schema_tokens.last_mut().unwrap().pop().unwrap();
509    }
510
511    fn push_instance_token(&mut self, token: &'a str) {
512        self.instance_tokens.push(token.into());
513    }
514
515    fn pop_instance_token(&mut self) {
516        self.instance_tokens.pop().unwrap();
517    }
518}
519
520#[cfg(test)]
521mod tests {
522    #[test]
523    fn max_depth() {
524        use serde_json::json;
525
526        let schema = crate::Schema::from_serde_schema(
527            serde_json::from_value(json!({
528                "definitions": {
529                    "loop": { "ref": "loop" },
530                },
531                "ref": "loop",
532            }))
533            .unwrap(),
534        )
535        .unwrap();
536
537        assert_eq!(
538            super::ValidateError::MaxDepthExceeded,
539            super::validate(
540                &schema,
541                &json!(null),
542                super::ValidateOptions::new().with_max_depth(3)
543            )
544            .unwrap_err()
545        )
546    }
547
548    #[test]
549    fn max_errors() {
550        use serde_json::json;
551
552        let schema = crate::Schema::from_serde_schema(
553            serde_json::from_value(json!({
554                "elements": { "type": "string" }
555            }))
556            .unwrap(),
557        )
558        .unwrap();
559
560        assert_eq!(
561            3,
562            super::validate(
563                &schema,
564                &json!([null, null, null, null, null]),
565                super::ValidateOptions::new().with_max_errors(3)
566            )
567            .unwrap()
568            .len()
569        )
570    }
571
572    #[test]
573    fn validation_spec() {
574        use std::collections::{BTreeMap, HashSet};
575
576        #[derive(serde::Deserialize, PartialEq, Debug, Eq, Hash)]
577        struct TestCaseError {
578            #[serde(rename = "instancePath")]
579            instance_path: Vec<String>,
580
581            #[serde(rename = "schemaPath")]
582            schema_path: Vec<String>,
583        }
584
585        #[derive(serde::Deserialize)]
586        struct TestCase {
587            schema: crate::SerdeSchema,
588            instance: serde_json::Value,
589            errors: Vec<TestCaseError>,
590        }
591
592        let test_cases: BTreeMap<String, TestCase> =
593            serde_json::from_str(include_str!("../json-typedef-spec/tests/validation.json"))
594                .expect("parse validation.json");
595
596        for (test_case_name, test_case) in test_cases {
597            let schema = crate::Schema::from_serde_schema(test_case.schema).expect(&test_case_name);
598            schema.validate().expect(&test_case_name);
599
600            let errors: HashSet<_> =
601                super::validate(&schema, &test_case.instance, super::ValidateOptions::new())
602                    .expect(&test_case_name)
603                    .into_iter()
604                    .map(|err| err.into_owned_paths())
605                    .map(|(instance_path, schema_path)| TestCaseError {
606                        instance_path,
607                        schema_path,
608                    })
609                    .collect();
610
611            let test_case_errors: HashSet<_> = test_case.errors.into_iter().collect();
612
613            assert_eq!(
614                test_case_errors, errors,
615                "wrong validation errors returned: {}",
616                &test_case_name
617            );
618        }
619    }
620}