formatjson 0.3.1

Formats JSON files
Documentation
//! Validates the tokens generated by the [tokenizer][crate::tokenizer].
use std::{iter::Peekable, slice::Iter};

use thiserror::Error;

use crate::tokenizer::{Token, TokenType};

/// Error class raised by the validator.
#[derive(Error, Debug)]
pub enum ValidationError {
    #[error("The file is empty")]
    EmptyFile(usize),
    #[error("Unexpected token: {1:?}")]
    UnexpectedToken(usize, String),
    #[error("Unexpected end of file")]
    UnexpectedEOF(usize),
}

impl ValidationError {
    pub fn byte_offset(&self) -> usize {
        match self {
            ValidationError::EmptyFile(x)
            | ValidationError::UnexpectedToken(x, _)
            | ValidationError::UnexpectedEOF(x) => *x,
        }
    }
}

/// Yields tokens, while maintaining the byte offset of the latest token.
pub struct TokenIterator<'a> {
    pub tokens: Peekable<Iter<'a, Token<'a>>>,
    pub last_byte_offset: usize,
}

impl<'a> Iterator for TokenIterator<'a> {
    type Item = &'a Token<'a>;

    /// Stores the byte offset of the token being yielded.
    fn next(&mut self) -> Option<Self::Item> {
        let token = self.tokens.next()?;
        self.last_byte_offset = token.byte_offset;
        Some(token)
    }
}

impl<'a> TokenIterator<'a> {
    pub fn new(tokens: Iter<'a, Token<'a>>) -> Self {
        Self {
            tokens: tokens.peekable(),
            last_byte_offset: 0,
        }
    }

    fn next_or_eof(&mut self) -> Result<&Token, ValidationError> {
        self.next()
            .ok_or(ValidationError::UnexpectedEOF(self.last_byte_offset))
    }

    pub fn peek(&mut self) -> Result<&Token, ValidationError> {
        self.tokens
            .peek()
            .copied()
            .ok_or(ValidationError::UnexpectedEOF(self.last_byte_offset))
    }
}

/// Validates the tokens generated by the [tokenizer][crate::tokenizer].
///
/// Raises a [ValidationError] if validation fails.
pub fn validate(tokens: &[Token]) -> Result<(), ValidationError> {
    let mut tokens = TokenIterator::new(tokens.iter());
    validate_recursive(&mut tokens)?;

    // now tokens should be empty. otherwise raise an error.
    if let Some(token) = tokens.next() {
        return Err(ValidationError::UnexpectedToken(
            token.byte_offset,
            token.to_string(),
        ));
    }
    Ok(())
}

fn validate_recursive<'a>(tokens: &mut TokenIterator) -> Result<(), ValidationError> {
    if let Some(token) = tokens.next() {
        match token.token_type {
            TokenType::RightBracket
            | TokenType::RightBrace
            | TokenType::Comma
            | TokenType::Colon => Err(ValidationError::UnexpectedToken(
                token.byte_offset,
                token.to_string(),
            )),

            TokenType::String(_)
            | TokenType::Number(_)
            | TokenType::Boolean(_)
            | TokenType::Null => Ok(()),

            TokenType::LeftBracket => validate_array(tokens),
            TokenType::LeftBrace => validate_object(tokens),
        }
    } else {
        Err(ValidationError::EmptyFile(tokens.last_byte_offset))
    }
}

fn validate_array<'a>(tokens: &mut TokenIterator) -> Result<(), ValidationError> {
    // Special case: in an empty array we see the closing bracket immediately.
    // in that case, consume the bracket and early exit.
    if matches!(tokens.peek()?.token_type, TokenType::RightBracket) {
        tokens
            .next()
            .expect("We peeked, this should not have failed.");
        return Ok(());
    }

    loop {
        // Expect a value, followed by a comma or a final closing bracket
        validate_recursive(tokens)?;
        let token = tokens.next_or_eof()?;
        match token.token_type {
            TokenType::Comma => (),
            TokenType::RightBracket => return Ok(()),
            _ => {
                return Err(ValidationError::UnexpectedToken(
                    token.byte_offset,
                    token.to_string(),
                ))
            }
        }
    }
}

fn validate_object<'a>(tokens: &mut TokenIterator) -> Result<(), ValidationError> {
    // Special case: in an empty array we see the closing bracket immediately.
    // in that case, consume the bracket and early exit.
    if matches!(tokens.peek()?.token_type, TokenType::RightBrace) {
        tokens
            .next()
            .expect("We peeked, this should not have failed.");
        return Ok(());
    }

    loop {
        // Expect a string key, followed by a colon
        let token = tokens.next_or_eof()?;
        if !matches!(token.token_type, TokenType::String(_)) {
            return Err(ValidationError::UnexpectedToken(
                token.byte_offset,
                token.to_string(),
            ));
        }
        let token = tokens.next_or_eof()?;
        if !matches!(token.token_type, TokenType::Colon) {
            return Err(ValidationError::UnexpectedToken(
                token.byte_offset,
                token.to_string(),
            ));
        }

        // Then expect a value, followed by a comma or a final closing brace
        validate_recursive(tokens)?;
        let token = tokens.next_or_eof()?;
        match token.token_type {
            TokenType::Comma => (),
            TokenType::RightBrace => return Ok(()),
            _ => {
                return Err(ValidationError::UnexpectedToken(
                    token.byte_offset,
                    token.to_string(),
                ))
            }
        }
    }
}