jtd/validate.rs
1use crate::{Schema, Type};
2use chrono::DateTime;
3use serde_json::Value;
4use std::borrow::Cow;
5use thiserror::Error;
6
7/// Options you can pass to [`validate()`].
8#[derive(Clone, Debug, Default, PartialEq, Eq)]
9pub struct ValidateOptions {
10 max_depth: usize,
11 max_errors: usize,
12}
13
14impl ValidateOptions {
15 /// Construct a new set of options with all default values.
16 ///
17 /// Equivalent to [`Default::default()`] or calling `with_max_depth(0)` and
18 /// `with_max_errors(0)`.
19 pub fn new() -> Self {
20 Self::default()
21 }
22
23 /// Sets the maximum "depth" of references to following in [`validate()`].
24 ///
25 /// This option exists to handle the possibility of an infinite loop in a
26 /// schema. For instance, this is a valid schema:
27 ///
28 /// ```json
29 /// { "ref": "loop", "definitions": { "loop": { "ref": "loop" }}}
30 /// ```
31 ///
32 /// There are good reasons to sometimes have self-referential schemas -- for
33 /// instance, to describe a recursive data structure. What `with_max_depth`
34 /// does is limit how many recursive `ref` nodes will be followed before
35 /// [`validate()`] errors with [`ValidateError::MaxDepthExceeded`].
36 ///
37 /// The default max depth of `0` indicates that no max depth should be
38 /// implemented. An infinite `ref` loop will eventually overflow the stack
39 /// during [`validate()`].
40 pub fn with_max_depth(mut self, max_depth: usize) -> Self {
41 self.max_depth = max_depth;
42 self
43 }
44
45 /// Sets the maximum number of validation errors to return from
46 /// [`validate()`].
47 ///
48 /// This option exists as an optimization for [`validate()`]. If all you
49 /// care about is whether an input is valid, then consider using
50 /// `set_max_errors(1)` to have [`validate()`] immediately return after
51 /// finding a validation error.
52 ///
53 /// The default max errors of `0` indicates that all errors will be
54 /// returned.
55 pub fn with_max_errors(mut self, max_errors: usize) -> Self {
56 self.max_errors = max_errors;
57 self
58 }
59}
60
61/// Errors that may arise from [`validate()`].
62#[derive(Clone, Debug, PartialEq, Eq, Error)]
63pub enum ValidateError {
64 /// The maximum depth, as specified by [`ValidateOptions::with_max_depth`],
65 /// was exceeded.
66 ///
67 /// ```
68 /// use serde_json::json;
69 /// use jtd::{Schema, ValidateError, ValidateOptions};
70 ///
71 /// let schema = Schema::from_serde_schema(
72 /// serde_json::from_value(json!({
73 /// "definitions": {
74 /// "loop": { "ref": "loop" },
75 /// },
76 /// "ref": "loop",
77 /// }))
78 /// .unwrap(),
79 /// )
80 /// .unwrap();
81 ///
82 /// assert_eq!(
83 /// ValidateError::MaxDepthExceeded,
84 /// jtd::validate(
85 /// &schema,
86 /// &json!(null),
87 /// ValidateOptions::new().with_max_depth(3)
88 /// )
89 /// .unwrap_err()
90 /// )
91 /// ```
92 #[error("max depth exceeded")]
93 MaxDepthExceeded,
94}
95
96/// A single validation error returned by [`validate()`].
97///
98/// This type has *Indicator* at the end of its name to emphasize that it is
99/// *not* a Rust error. It is an ordinary struct, and corresponds to the concept
100/// of a validation error indicator in the JSON Typedef specification. See
101/// [RFC8927, Section 3.2](https://tools.ietf.org/html/rfc8927#section-3.2).
102///
103/// In order to avoid unncessary allocations, this struct uses
104/// [`std::borrow::Cow`] instead of [`String`] directly. If you would prefer not
105/// to have to deal with that, and are OK with copying all the data out of this
106/// struct, then use
107/// [`into_owned_paths`][`ValidationErrorIndicator::into_owned_paths`] to
108/// convert instances of this type into a pair of plain old `Vec<String>`s.
109#[derive(Clone, Debug, PartialEq, Eq)]
110pub struct ValidationErrorIndicator<'a> {
111 /// A path to the part of the instance that was rejected.
112 pub instance_path: Vec<Cow<'a, str>>,
113
114 /// A path to the part of the schema that rejected the instance.
115 pub schema_path: Vec<Cow<'a, str>>,
116}
117
118impl<'a> ValidationErrorIndicator<'a> {
119 /// Converts this struct into a `instance_path` and `schema_path` pair.
120 ///
121 /// This is a convenience function for those who don't want to manipulate
122 /// [`std::borrow::Cow`].
123 ///
124 /// ```
125 /// use std::borrow::Cow;
126 ///
127 /// let indicator = jtd::ValidationErrorIndicator {
128 /// instance_path: vec![Cow::Borrowed("foo")],
129 /// schema_path: vec![Cow::Owned("bar".to_owned())],
130 /// };
131 ///
132 /// let (instance_path, schema_path) = indicator.into_owned_paths();
133 /// assert_eq!(vec!["foo".to_owned()], instance_path);
134 /// assert_eq!(vec!["bar".to_owned()], schema_path);
135 /// ```
136 pub fn into_owned_paths(self) -> (Vec<String>, Vec<String>) {
137 (
138 self.instance_path
139 .into_iter()
140 .map(|c| c.into_owned())
141 .collect(),
142 self.schema_path
143 .into_iter()
144 .map(|c| c.into_owned())
145 .collect(),
146 )
147 }
148}
149
150/// Validates a schema against an instance, returning a set of error indicators.
151///
152/// In keeping with the conventions of RFC8927, the "input" JSON -- the second
153/// argument to this function -- is called an *instance*.
154///
155/// The set of error indicators returned is specified by the JSON Typedef
156/// specification. The ordering of those errors is not defined by the JSON
157/// Typedef specification, and is subject to change in a future version of this
158/// crate.
159///
160/// ```
161/// use jtd::{Schema, ValidationErrorIndicator, ValidateOptions};
162/// use serde_json::json;
163///
164/// let schema = Schema::from_serde_schema(
165/// serde_json::from_value(json!({
166/// "elements": {
167/// "type": "uint8"
168/// }
169/// })).unwrap()).unwrap();
170///
171/// let instance = serde_json::json!([ "a", "b", "c" ]);
172///
173/// // By default, jtd::validate() will return all errors in the input.
174/// let validate_options = ValidateOptions::new();
175/// let errors = jtd::validate(&schema, &instance, validate_options).unwrap();
176/// assert_eq!(
177/// vec![
178/// ValidationErrorIndicator {
179/// instance_path: vec!["0".to_owned().into()],
180/// schema_path: vec!["elements".into(), "type".into()],
181/// },
182/// ValidationErrorIndicator {
183/// instance_path: vec!["1".to_owned().into()],
184/// schema_path: vec!["elements".into(), "type".into()],
185/// },
186/// ValidationErrorIndicator {
187/// instance_path: vec!["2".to_owned().into()],
188/// schema_path: vec!["elements".into(), "type".into()],
189/// },
190/// ],
191/// errors,
192/// );
193///
194/// // If you don't care about validation errors beyond a certain amount of
195/// // errors, use with_max_errors on the ValidateOptions you pass to validate.
196/// let validate_options = ValidateOptions::new().with_max_errors(1);
197/// let errors = jtd::validate(&schema, &instance, validate_options).unwrap();
198/// assert_eq!(
199/// vec![
200/// ValidationErrorIndicator {
201/// instance_path: vec!["0".to_owned().into()],
202/// schema_path: vec!["elements".into(), "type".into()],
203/// },
204/// ],
205/// errors,
206/// );
207/// ```
208///
209/// # Security considerations
210///
211/// (This note is copied from [the top-level documentation][`crate`], because
212/// it's important.)
213///
214/// If you're running [`validate()`] with untrusted schemas (untrusted inputs is
215/// fine), then be aware of this security consideration from RFC 8927:
216///
217/// > Implementations that evaluate user-inputted schemas SHOULD implement
218/// > mechanisms to detect and abort circular references that might cause a
219/// > naive implementation to go into an infinite loop. Without such
220/// > mechanisms, implementations may be vulnerable to denial-of-service
221/// > attacks.
222///
223/// This crate supports that "detect and abort" mechanism via
224/// [`ValidateOptions::with_max_depth`]. Please see that documentation if you're
225/// validating data against untrusted schemas.
226pub fn validate<'a>(
227 schema: &'a Schema,
228 instance: &'a Value,
229 options: ValidateOptions,
230) -> Result<Vec<ValidationErrorIndicator<'a>>, ValidateError> {
231 let mut vm = Vm::new(schema, options);
232
233 match vm.validate(schema, None, instance) {
234 Ok(()) | Err(VmValidateError::MaxErrorsReached) => Ok(vm.into_errors()),
235 Err(VmValidateError::MaxDepthExceeded) => Err(ValidateError::MaxDepthExceeded),
236 }
237}
238
239struct Vm<'a> {
240 root: &'a Schema,
241 options: ValidateOptions,
242 instance_tokens: Vec<Cow<'a, str>>,
243 schema_tokens: Vec<Vec<Cow<'a, str>>>,
244 errors: Vec<ValidationErrorIndicator<'a>>,
245}
246
247enum VmValidateError {
248 MaxErrorsReached,
249 MaxDepthExceeded,
250}
251
252impl<'a> Vm<'a> {
253 pub fn new(schema: &'a Schema, options: ValidateOptions) -> Self {
254 Self {
255 root: schema,
256 options,
257 instance_tokens: vec![],
258 schema_tokens: vec![vec![]],
259 errors: vec![],
260 }
261 }
262
263 pub fn into_errors(self) -> Vec<ValidationErrorIndicator<'a>> {
264 self.errors
265 }
266
267 pub fn validate(
268 &mut self,
269 schema: &'a Schema,
270 parent_tag: Option<&'a str>,
271 instance: &'a Value,
272 ) -> Result<(), VmValidateError> {
273 if instance.is_null() && schema.nullable() {
274 return Ok(());
275 }
276
277 match schema {
278 Schema::Empty { .. } => {}
279 Schema::Ref { ref_, .. } => {
280 self.schema_tokens
281 .push(vec!["definitions".into(), ref_.into()]);
282
283 if self.schema_tokens.len() == self.options.max_depth {
284 return Err(VmValidateError::MaxDepthExceeded);
285 }
286
287 self.validate(&self.root.definitions()[ref_], None, instance)?;
288 self.schema_tokens.pop();
289 }
290 Schema::Type { type_, .. } => {
291 self.push_schema_token("type");
292
293 match type_ {
294 Type::Boolean => {
295 if !instance.is_boolean() {
296 self.push_error()?;
297 }
298 }
299 Type::Float32 | Type::Float64 => {
300 if !instance.is_f64() && !instance.is_i64() {
301 self.push_error()?;
302 }
303 }
304 Type::Int8 => self.validate_int(instance, -128.0, 127.0)?,
305 Type::Uint8 => self.validate_int(instance, 0.0, 255.0)?,
306 Type::Int16 => self.validate_int(instance, -32768.0, 32767.0)?,
307 Type::Uint16 => self.validate_int(instance, 0.0, 65535.0)?,
308 Type::Int32 => self.validate_int(instance, -2147483648.0, 2147483647.0)?,
309 Type::Uint32 => self.validate_int(instance, 0.0, 4294967295.0)?,
310 Type::String => {
311 if !instance.is_string() {
312 self.push_error()?;
313 }
314 }
315 Type::Timestamp => {
316 if let Some(s) = instance.as_str() {
317 if DateTime::parse_from_rfc3339(s).is_err() {
318 self.push_error()?;
319 }
320 } else {
321 self.push_error()?;
322 }
323 }
324 };
325
326 self.pop_schema_token();
327 }
328 Schema::Enum { enum_, .. } => {
329 self.push_schema_token("enum");
330 if let Some(s) = instance.as_str() {
331 if !enum_.contains(s) {
332 self.push_error()?;
333 }
334 } else {
335 self.push_error()?;
336 }
337 self.pop_schema_token();
338 }
339 Schema::Elements { elements, .. } => {
340 self.push_schema_token("elements");
341
342 if let Some(arr) = instance.as_array() {
343 for (i, sub_instance) in arr.iter().enumerate() {
344 // This is the only case where we push a non-Borrowed
345 // instance token. We handle pushing to instance_tokens
346 // manually here, to keep push_instance_token simpler.
347 self.instance_tokens.push(Cow::Owned(i.to_string()));
348
349 self.validate(elements, None, sub_instance)?;
350 self.pop_instance_token();
351 }
352 } else {
353 self.push_error()?;
354 }
355
356 self.pop_schema_token();
357 }
358 Schema::Properties {
359 properties,
360 optional_properties,
361 properties_is_present,
362 additional_properties,
363 ..
364 } => {
365 if let Some(obj) = instance.as_object() {
366 self.push_schema_token("properties");
367 for (name, sub_schema) in properties {
368 self.push_schema_token(name);
369 if let Some(sub_instance) = obj.get(name) {
370 self.push_instance_token(name);
371 self.validate(sub_schema, None, sub_instance)?;
372 self.pop_instance_token();
373 } else {
374 self.push_error()?;
375 }
376 self.pop_schema_token();
377 }
378 self.pop_schema_token();
379
380 self.push_schema_token("optionalProperties");
381 for (name, sub_schema) in optional_properties {
382 self.push_schema_token(name);
383 if let Some(sub_instance) = obj.get(name) {
384 self.push_instance_token(name);
385 self.validate(sub_schema, None, sub_instance)?;
386 self.pop_instance_token();
387 }
388 self.pop_schema_token();
389 }
390 self.pop_schema_token();
391
392 if !*additional_properties {
393 for name in obj.keys() {
394 if parent_tag != Some(name)
395 && !properties.contains_key(name)
396 && !optional_properties.contains_key(name)
397 {
398 self.push_instance_token(name);
399 self.push_error()?;
400 self.pop_instance_token();
401 }
402 }
403 }
404 } else {
405 self.push_schema_token(if *properties_is_present {
406 "properties"
407 } else {
408 "optionalProperties"
409 });
410 self.push_error()?;
411 self.pop_schema_token();
412 }
413 }
414 Schema::Values { values, .. } => {
415 self.push_schema_token("values");
416
417 if let Some(obj) = instance.as_object() {
418 for (name, sub_instance) in obj {
419 self.push_instance_token(name);
420 self.validate(values, None, sub_instance)?;
421 self.pop_instance_token();
422 }
423 } else {
424 self.push_error()?;
425 }
426
427 self.pop_schema_token();
428 }
429 Schema::Discriminator {
430 discriminator,
431 mapping,
432 ..
433 } => {
434 if let Some(obj) = instance.as_object() {
435 if let Some(tag) = obj.get(discriminator) {
436 if let Some(tag) = tag.as_str() {
437 if let Some(schema) = mapping.get(tag) {
438 self.push_schema_token("mapping");
439 self.push_schema_token(tag);
440 self.validate(schema, Some(discriminator), instance)?;
441 self.pop_schema_token();
442 self.pop_schema_token();
443 } else {
444 self.push_schema_token("mapping");
445 self.push_instance_token(discriminator);
446 self.push_error()?;
447 self.pop_instance_token();
448 self.pop_schema_token();
449 }
450 } else {
451 self.push_schema_token("discriminator");
452 self.push_instance_token(discriminator);
453 self.push_error()?;
454 self.pop_instance_token();
455 self.pop_schema_token();
456 }
457 } else {
458 self.push_schema_token("discriminator");
459 self.push_error()?;
460 self.pop_schema_token();
461 }
462 } else {
463 self.push_schema_token("discriminator");
464 self.push_error()?;
465 self.pop_schema_token();
466 }
467 }
468 };
469
470 Ok(())
471 }
472
473 fn validate_int(
474 &mut self,
475 instance: &Value,
476 min: f64,
477 max: f64,
478 ) -> Result<(), VmValidateError> {
479 if let Some(val) = instance.as_f64() {
480 if val.fract() != 0.0 || val < min || val > max {
481 self.push_error()
482 } else {
483 Ok(())
484 }
485 } else {
486 self.push_error()
487 }
488 }
489
490 fn push_error(&mut self) -> Result<(), VmValidateError> {
491 self.errors.push(ValidationErrorIndicator {
492 instance_path: self.instance_tokens.clone(),
493 schema_path: self.schema_tokens.last().unwrap().clone(),
494 });
495
496 if self.options.max_errors == self.errors.len() {
497 Err(VmValidateError::MaxErrorsReached)
498 } else {
499 Ok(())
500 }
501 }
502
503 fn push_schema_token(&mut self, token: &'a str) {
504 self.schema_tokens.last_mut().unwrap().push(token.into());
505 }
506
507 fn pop_schema_token(&mut self) {
508 self.schema_tokens.last_mut().unwrap().pop().unwrap();
509 }
510
511 fn push_instance_token(&mut self, token: &'a str) {
512 self.instance_tokens.push(token.into());
513 }
514
515 fn pop_instance_token(&mut self) {
516 self.instance_tokens.pop().unwrap();
517 }
518}
519
520#[cfg(test)]
521mod tests {
522 #[test]
523 fn max_depth() {
524 use serde_json::json;
525
526 let schema = crate::Schema::from_serde_schema(
527 serde_json::from_value(json!({
528 "definitions": {
529 "loop": { "ref": "loop" },
530 },
531 "ref": "loop",
532 }))
533 .unwrap(),
534 )
535 .unwrap();
536
537 assert_eq!(
538 super::ValidateError::MaxDepthExceeded,
539 super::validate(
540 &schema,
541 &json!(null),
542 super::ValidateOptions::new().with_max_depth(3)
543 )
544 .unwrap_err()
545 )
546 }
547
548 #[test]
549 fn max_errors() {
550 use serde_json::json;
551
552 let schema = crate::Schema::from_serde_schema(
553 serde_json::from_value(json!({
554 "elements": { "type": "string" }
555 }))
556 .unwrap(),
557 )
558 .unwrap();
559
560 assert_eq!(
561 3,
562 super::validate(
563 &schema,
564 &json!([null, null, null, null, null]),
565 super::ValidateOptions::new().with_max_errors(3)
566 )
567 .unwrap()
568 .len()
569 )
570 }
571
572 #[test]
573 fn validation_spec() {
574 use std::collections::{BTreeMap, HashSet};
575
576 #[derive(serde::Deserialize, PartialEq, Debug, Eq, Hash)]
577 struct TestCaseError {
578 #[serde(rename = "instancePath")]
579 instance_path: Vec<String>,
580
581 #[serde(rename = "schemaPath")]
582 schema_path: Vec<String>,
583 }
584
585 #[derive(serde::Deserialize)]
586 struct TestCase {
587 schema: crate::SerdeSchema,
588 instance: serde_json::Value,
589 errors: Vec<TestCaseError>,
590 }
591
592 let test_cases: BTreeMap<String, TestCase> =
593 serde_json::from_str(include_str!("../json-typedef-spec/tests/validation.json"))
594 .expect("parse validation.json");
595
596 for (test_case_name, test_case) in test_cases {
597 let schema = crate::Schema::from_serde_schema(test_case.schema).expect(&test_case_name);
598 schema.validate().expect(&test_case_name);
599
600 let errors: HashSet<_> =
601 super::validate(&schema, &test_case.instance, super::ValidateOptions::new())
602 .expect(&test_case_name)
603 .into_iter()
604 .map(|err| err.into_owned_paths())
605 .map(|(instance_path, schema_path)| TestCaseError {
606 instance_path,
607 schema_path,
608 })
609 .collect();
610
611 let test_case_errors: HashSet<_> = test_case.errors.into_iter().collect();
612
613 assert_eq!(
614 test_case_errors, errors,
615 "wrong validation errors returned: {}",
616 &test_case_name
617 );
618 }
619 }
620}