json_threat_protection/
validator.rs

1use thiserror::Error;
2
3use crate::lexer::Lexer;
4use crate::lexer::LexerError;
5use crate::lexer::Token;
6use crate::read::{Position, Read};
7use std::collections::HashSet;
8
9#[allow(unused_macros)]
10macro_rules! allow_duplicate_object_entry_name {
11    ($self:ident) => {
12        $self.allow_duplicate_object_entry_name
13    };
14}
15
16macro_rules! disallow_duplicate_object_entry_name {
17    ($self:ident) => {{
18        !$self.allow_duplicate_object_entry_name
19    }};
20}
21
22macro_rules! try_inc_depth {
23    ($self:ident, $position:ident) => {
24        $self.cur_depth += 1;
25
26        if $self.cur_depth > $self.max_depth {
27            return Err(ValidatorError::MaxDepthExceeded {
28                position: $position,
29                limit: $self.max_depth,
30            });
31        }
32    };
33}
34
35macro_rules! try_dec_depth {
36    ($self:ident, $position:ident) => {
37        if $self.cur_depth == 0 {
38            return Err(ValidatorError::InvalidJSON($position));
39        }
40
41        $self.cur_depth -= 1;
42
43        if $self.cur_depth > $self.max_depth {
44            return Err(ValidatorError::MaxDepthExceeded {
45                position: $position,
46                limit: $self.max_depth,
47            });
48        }
49    };
50}
51
52macro_rules! try_active_array {
53    ($self:ident, $position:ident) => {
54        $self.entires.push(0);
55        try_inc_depth!($self, $position);
56    };
57}
58
59macro_rules! try_add_array_entry {
60    ($self:ident, $position:ident) => {
61        let entries = $self
62            .entires
63            .last_mut()
64            .ok_or(ValidatorError::InvalidJSON($position))?;
65        *entries += 1;
66
67        if *entries > $self.max_array_entries {
68            return Err(ValidatorError::MaxArrayEntriesExceeded {
69                position: $position,
70                limit: $self.max_array_entries,
71            });
72        }
73    };
74}
75
76macro_rules! try_finalize_array {
77    ($self:ident, $position:ident) => {{
78        let entries = $self
79            .entires
80            .pop()
81            .ok_or(ValidatorError::InvalidJSON($position))?;
82        try_dec_depth!($self, $position);
83        entries
84    }};
85}
86
87macro_rules! try_active_object {
88    ($self:ident, $position:ident) => {
89        if disallow_duplicate_object_entry_name!($self) {
90            $self.keys.push(HashSet::with_capacity(8));
91        }
92        $self.entires.push(0);
93        try_inc_depth!($self, $position);
94    };
95}
96
97macro_rules! try_add_object_key {
98    ($self:ident, $key:ident, $position:ident) => {
99        if $key.len() > $self.max_object_entry_name_length {
100            return Err(ValidatorError::MaxObjectEntryNameLengthExceeded {
101                position: $position,
102                limit: $self.max_object_entry_name_length,
103                name: $key.to_string(),
104            });
105        }
106
107        if disallow_duplicate_object_entry_name!($self) {
108            let keys = $self
109                .keys
110                .last_mut()
111                .ok_or(ValidatorError::InvalidJSON($position))?;
112
113            if !keys.insert($key.to_string()) {
114                return Err(ValidatorError::DuplicateObjectEntryName {
115                    position: $position,
116                    key: $key.to_string(),
117                });
118            }
119        }
120    };
121}
122
123macro_rules! try_add_object_value {
124    ($self:ident, $position:ident) => {
125        let entries = $self
126            .entires
127            .last_mut()
128            .ok_or(ValidatorError::InvalidJSON($position))?;
129        *entries += 1;
130
131        if *entries > $self.max_object_entries {
132            return Err(ValidatorError::MaxObjectEntriesExceeded {
133                position: $position,
134                limit: $self.max_object_entries,
135            });
136        }
137    };
138}
139
140macro_rules! try_finalize_object {
141    ($self:ident, $position:ident) => {{
142        if disallow_duplicate_object_entry_name!($self) {
143            $self
144                .keys
145                .pop()
146                .ok_or(ValidatorError::InvalidJSON($position))?;
147        }
148        let entries = $self
149            .entires
150            .pop()
151            .ok_or(ValidatorError::InvalidJSON($position))?;
152        try_dec_depth!($self, $position);
153        entries
154    }};
155}
156
157#[derive(Debug, Clone, PartialEq, Eq)]
158enum State {
159    /// Validator is expecting an optional object entry
160    ProcessingObject,
161
162    /// Validator is expecting an optional array entry
163    ProcessingArray,
164
165    /// Validator is expecting an element
166    RequireElement,
167
168    /// Validator is expecting an required colon (`:`)
169    RequireColon,
170
171    /// Validator is expecting an required object key
172    RequireObjectKey,
173
174    /// Validator is expecting an optional comma (`,`) at the end of an entry
175    OptionalComma,
176
177    /// Validator is expecting an optional object key
178    OptionalObjectKey,
179
180    /// Validator is expecting an optional element
181    OptionalElement,
182}
183
184#[derive(Error, Debug)]
185/// Error occurred during JSON validation
186pub enum ValidatorError {
187    /// Error occurred during lexing
188    #[error("lexer error: {0}")]
189    LexerError(#[from] LexerError),
190
191    /// Error occurred once the JSON is invalid, such as unclosed object or array
192    #[error("invalid JSON ({0})")]
193    InvalidJSON(Position),
194
195    /// Error occurred when there is trailing data after the valid JSON
196    #[error("trailing data (position {0} bytes)")]
197    TrailingData(Position),
198
199    /// Error occurred when the maximum depth is exceeded
200    #[error("maximum depth exceeded (limit: {limit}, {position})")]
201    MaxDepthExceeded {
202        /// Position where the error occurred
203        position: Position,
204
205        /// Maximum depth allowed
206        limit: usize,
207    },
208
209    /// Error occurred when the maximum string length is exceeded
210    #[error("maximum string length exceeded (limit: {limit}, {position})")]
211    MaxStringLengthExceeded {
212        /// Position where the error occurred
213        position: Position,
214
215        /// Maximum string length allowed
216        limit: usize,
217
218        /// String that exceeds the limit
219        str: String,
220    },
221
222    /// Error occurred when the maximum array entries is exceeded
223    #[error("maximum array entries exceeded (limit: {limit}, {position})")]
224    MaxArrayEntriesExceeded {
225        /// Position where the error occurred
226        position: Position,
227
228        /// Maximum array entries allowed
229        limit: usize,
230    },
231
232    /// Error occurred when the maximum object entries is exceeded
233    #[error("maximum object entries exceeded (limit: {limit}, {position})")]
234    MaxObjectEntriesExceeded {
235        /// Position where the error occurred
236        position: Position,
237
238        /// Maximum object entries allowed
239        limit: usize,
240    },
241
242    /// Error occurred when the maximum object entry name length is exceeded
243    #[error("maximum object entry name length exceeded (limit: {limit}, {position})")]
244    MaxObjectEntryNameLengthExceeded {
245        /// Position where the error occurred
246        position: Position,
247
248        /// Maximum object entry name length allowed
249        limit: usize,
250
251        /// Object entry name that exceeds the limit
252        name: String,
253    },
254
255    /// Error occurred when there is a duplicate object entry name
256    #[error("duplicate object entry name (key: {key}, {position})")]
257    DuplicateObjectEntryName {
258        /// Position where the error occurred
259        position: Position,
260
261        /// Duplicate object entry name
262        key: String,
263    },
264
265    /// Error occurred when running into unexpected state, please report this issue to the maintainer
266    #[error("running into unexpected state, please report this issue to the maintainer, ({msg}) ({position})")]
267    Bug {
268        /// Diagnostic message
269        msg: String,
270
271        /// Position where the error occurred
272        position: Position,
273    },
274}
275
276/// Interal JSON validator
277pub struct Validator<R: Read> {
278    lexer: Lexer<R>,
279
280    /// Stack of states, keep track of the current state of the validator
281    states: Vec<State>,
282
283    /// Stack of entries, keep track of the number of entries in the current array or object
284    entires: Vec<usize>,
285
286    /// Stack of keys, keep track of the keys in the current object
287    keys: Vec<HashSet<String>>,
288
289    /// Current depth of the JSON
290    cur_depth: usize,
291
292    max_depth: usize,
293    max_string_length: usize,
294    max_array_entries: usize,
295    max_object_entries: usize,
296    max_object_entry_name_length: usize,
297    allow_duplicate_object_entry_name: bool,
298}
299
300impl<R: Read> Validator<R> {
301    pub fn new(
302        read: R,
303        max_depth: usize,
304        max_string_length: usize,
305        max_array_entries: usize,
306        max_object_entries: usize,
307        max_object_entry_name_length: usize,
308        allow_duplicate_object_entry_name: bool,
309    ) -> Self {
310        let mut states = Vec::with_capacity(32);
311        states.push(State::RequireElement);
312
313        Validator {
314            lexer: Lexer::new(read),
315            states,
316            entires: Vec::with_capacity(32),
317            keys: Vec::with_capacity(32),
318            cur_depth: 0,
319
320            max_depth,
321            max_string_length,
322            max_array_entries,
323            max_object_entries,
324            max_object_entry_name_length,
325            allow_duplicate_object_entry_name,
326        }
327    }
328
329    pub fn with_max_depth(mut self, max_depth: usize) -> Self {
330        self.max_depth = max_depth;
331        self
332    }
333
334    pub fn with_max_string_length(mut self, max_string_length: usize) -> Self {
335        self.max_string_length = max_string_length;
336        self
337    }
338
339    pub fn with_max_array_entries(mut self, max_array_entries: usize) -> Self {
340        self.max_array_entries = max_array_entries;
341        self
342    }
343
344    pub fn with_max_object_entries(mut self, max_object_entries: usize) -> Self {
345        self.max_object_entries = max_object_entries;
346        self
347    }
348
349    pub fn with_max_object_entry_name_length(
350        mut self,
351        max_object_entry_name_length: usize,
352    ) -> Self {
353        self.max_object_entry_name_length = max_object_entry_name_length;
354        self
355    }
356
357    pub fn allow_duplicate_object_entry_name(mut self) -> Self {
358        self.allow_duplicate_object_entry_name = true;
359        self
360    }
361
362    pub fn disallow_duplicate_object_entry_name(mut self) -> Self {
363        self.allow_duplicate_object_entry_name = false;
364        self
365    }
366
367    pub fn validate(mut self) -> Result<(), ValidatorError> {
368        loop {
369            match self.validate_with_steps(usize::MAX)? {
370                true => return Ok(()),
371                false => (),
372            }
373        }
374    }
375
376    pub fn validate_with_steps(&mut self, steps: usize) -> Result<bool, ValidatorError> {
377        match self.inner_validate(steps) {
378            Ok(finished) => Ok(finished),
379            Err(e) => Err(self.correct_position(e)),
380        }
381    }
382
383    /// Correct the position of the error
384    fn correct_position(&self, err: ValidatorError) -> ValidatorError {
385        match err {
386            ValidatorError::InvalidJSON(_) => ValidatorError::InvalidJSON(self.lexer.position()),
387            ValidatorError::TrailingData(_) => ValidatorError::TrailingData(self.lexer.position()),
388            ValidatorError::MaxDepthExceeded { position: _, limit } => {
389                ValidatorError::MaxDepthExceeded {
390                    position: self.lexer.position(),
391                    limit,
392                }
393            }
394            ValidatorError::MaxStringLengthExceeded {
395                position: _,
396                limit,
397                str,
398            } => ValidatorError::MaxStringLengthExceeded {
399                position: self.lexer.position(),
400                limit,
401                str,
402            },
403            ValidatorError::MaxArrayEntriesExceeded { position: _, limit } => {
404                ValidatorError::MaxArrayEntriesExceeded {
405                    position: self.lexer.position(),
406                    limit,
407                }
408            }
409            ValidatorError::MaxObjectEntriesExceeded { position: _, limit } => {
410                ValidatorError::MaxObjectEntriesExceeded {
411                    position: self.lexer.position(),
412                    limit,
413                }
414            }
415            ValidatorError::MaxObjectEntryNameLengthExceeded {
416                position: _,
417                limit,
418                name,
419            } => ValidatorError::MaxObjectEntryNameLengthExceeded {
420                position: self.lexer.position(),
421                limit,
422                name,
423            },
424            ValidatorError::DuplicateObjectEntryName { position: _, key } => {
425                ValidatorError::DuplicateObjectEntryName {
426                    position: self.lexer.position(),
427                    key,
428                }
429            }
430            ValidatorError::LexerError(e) => ValidatorError::LexerError(e),
431            ValidatorError::Bug { msg, position: _ } => ValidatorError::Bug {
432                msg,
433                position: self.lexer.position(),
434            },
435        }
436    }
437
438    fn inner_validate(&mut self, steps: usize) -> Result<bool, ValidatorError> {
439        let mut remaining_steps = steps;
440        let mut str_buf = Vec::with_capacity(64);
441
442        // Dummy position for constructing `ValidatorError`
443        // so that we can keep the error reason once the error occurs,
444        // and the real position will be updated by the caller of this method.
445        // Since the `self.lexer.next()` will borrow the `self.lexer` mutably,
446        // and its return value has the same lifetime of this borrow,
447        // so we can't borrow `self.lexer` imuutably to get the position
448        // after the invoking of `self.lexer.next()`.
449        // This is a workaround to make the borrow checker happy.
450        let dummy_position = Position::default();
451
452        while let Some(state) = self.states.pop() {
453            let token = self.lexer.next(&mut str_buf)?;
454
455            if token.is_none() {
456                return Err(ValidatorError::InvalidJSON(dummy_position));
457            }
458
459            // unwrap is safe here since we have checked the token is not None.
460            match token.unwrap() {
461                Token::LBrace => match state {
462                    State::RequireElement | State::OptionalElement => {
463                        self.states.push(State::ProcessingObject);
464                        self.states.push(State::OptionalObjectKey);
465                        try_active_object!(self, dummy_position);
466                    }
467                    _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
468                },
469                Token::RBrace => {
470                    match state {
471                        State::OptionalComma | State::OptionalObjectKey => {
472                            let state = self
473                                .states
474                                .pop()
475                                .ok_or(ValidatorError::InvalidJSON(dummy_position))?;
476                            if state != State::ProcessingObject {
477                                return Err(ValidatorError::InvalidJSON(dummy_position));
478                            }
479
480                            let entires = try_finalize_object!(self, dummy_position);
481                            if state == State::OptionalObjectKey && entires != 0 {
482                                return Err(ValidatorError::InvalidJSON(dummy_position));
483                            }
484                        }
485                        State::ProcessingObject => {
486                            if try_finalize_object!(self, dummy_position) == 0 {
487                                return Err(ValidatorError::InvalidJSON(dummy_position));
488                            }
489                        }
490                        _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
491                    }
492
493                    if matches!(
494                        self.states.last(),
495                        Some(State::ProcessingArray | State::ProcessingObject)
496                    ) {
497                        self.states.push(State::OptionalComma);
498                    }
499                }
500                Token::LBracket => match state {
501                    State::RequireElement | State::OptionalElement => {
502                        self.states.push(State::ProcessingArray);
503                        self.states.push(State::OptionalElement);
504                        try_active_array!(self, dummy_position);
505                    }
506                    _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
507                },
508                Token::RBracket => {
509                    match state {
510                        State::OptionalComma | State::OptionalElement => {
511                            let state = self
512                                .states
513                                .pop()
514                                .ok_or(ValidatorError::InvalidJSON(dummy_position))?;
515                            if state != State::ProcessingArray {
516                                return Err(ValidatorError::InvalidJSON(dummy_position));
517                            }
518
519                            let entries = try_finalize_array!(self, dummy_position);
520                            if state == State::OptionalElement && entries != 0 {
521                                return Err(ValidatorError::InvalidJSON(dummy_position));
522                            }
523                        }
524                        State::ProcessingArray => {
525                            if try_finalize_array!(self, dummy_position) == 0 {
526                                return Err(ValidatorError::InvalidJSON(dummy_position));
527                            }
528                        }
529                        _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
530                    }
531
532                    if matches!(
533                        self.states.last(),
534                        Some(State::ProcessingArray | State::ProcessingObject)
535                    ) {
536                        self.states.push(State::OptionalComma);
537                    }
538                }
539                Token::Colon => match state {
540                    State::RequireColon => self.states.push(State::RequireElement),
541                    _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
542                },
543                Token::Comma => match state {
544                    State::OptionalComma => match self.states.last() {
545                        Some(State::ProcessingObject) => self.states.push(State::RequireObjectKey),
546                        Some(State::ProcessingArray) => self.states.push(State::RequireElement),
547                        _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
548                    },
549                    _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
550                },
551                Token::String => match state {
552                    State::OptionalObjectKey | State::RequireObjectKey => {
553                        let str = unsafe { std::str::from_utf8_unchecked(str_buf.as_slice()) };
554                        try_add_object_key!(self, str, dummy_position);
555                        self.states.push(State::RequireColon);
556                    }
557                    State::OptionalElement | State::RequireElement => {
558                        let str = unsafe { std::str::from_utf8_unchecked(str_buf.as_slice()) };
559                        if str.len() > self.max_string_length {
560                            return Err(ValidatorError::MaxStringLengthExceeded {
561                                position: dummy_position,
562                                limit: self.max_string_length,
563                                str: str.to_string(),
564                            });
565                        }
566
567                        let last_state = self.states.last_mut();
568                        match last_state {
569                            Some(State::ProcessingObject) => {
570                                try_add_object_value!(self, dummy_position);
571                                self.states.push(State::OptionalComma);
572                            }
573                            Some(State::ProcessingArray) => {
574                                try_add_array_entry!(self, dummy_position);
575                                self.states.push(State::OptionalComma);
576                            }
577                            _ => (),
578                        }
579                    }
580                    _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
581                },
582                Token::Number | Token::True | Token::False | Token::Null => match state {
583                    State::OptionalElement | State::RequireElement => {
584                        let last_state = self.states.last_mut();
585                        match last_state {
586                            Some(State::ProcessingObject) => {
587                                try_add_object_value!(self, dummy_position);
588                                self.states.push(State::OptionalComma);
589                            }
590                            Some(State::ProcessingArray) => {
591                                try_add_array_entry!(self, dummy_position);
592                                self.states.push(State::OptionalComma);
593                            }
594                            _ => (),
595                        }
596                    }
597                    _ => return Err(ValidatorError::InvalidJSON(dummy_position)),
598                },
599            }
600
601            remaining_steps -= 1;
602            if remaining_steps == 0 {
603                break;
604            }
605        }
606
607        // At this point, there are four possible cases:
608        // * The JSON is valid and there is no more token to process.
609        //     * `self.states.is_empty() && self.cur_depth == 0 && self.lexer.peek()?.is_none()`.
610        // * The JSON is valid but there is trailing data.
611        //     * `self.states.is_empty() && self.cur_depth == 0 && self.lexer.peek()?.is_some()`.
612        // * The JSON is invalid.
613        //     * `(!self.states.is_empty() || self.cur_depth != 0) && self.lexer.peek()?.is_none()`.
614        // * The JSON is still being processed, and the `remaining_steps`` are exhausted.
615        //     * `(!self.states.is_empty() || self.cur_depth != 0) && self.lexer.peek()?.is_some()`.
616
617        let has_states = !self.states.is_empty();
618        let no_depth = self.cur_depth == 0;
619        let has_more_token = self.lexer.peek(&mut str_buf)?.is_some();
620
621        if has_states || !no_depth {
622            if has_more_token {
623                if remaining_steps != 0 {
624                    return Err(ValidatorError::Bug {
625                        msg: format!(
626                            "Validator.inner_validate: remaining_steps should be 0, but got {}",
627                            remaining_steps
628                        ),
629                        position: dummy_position,
630                    });
631                }
632                return Ok(false);
633            }
634
635            return Err(ValidatorError::InvalidJSON(dummy_position));
636        }
637
638        if !no_depth {
639            return Err(ValidatorError::Bug {
640                msg: "Validator.inner_validate: current depth should be 0".to_string(),
641                position: dummy_position,
642            });
643        }
644
645        if !has_states && !has_more_token {
646            return Ok(true);
647        }
648
649        if has_states {
650            return Err(ValidatorError::InvalidJSON(dummy_position));
651        }
652
653        Err(ValidatorError::TrailingData(dummy_position))
654    }
655}