json-parser 1.0.2

JSON parser
Documentation
use crate::error::Error;
use crate::result::Result;
use regex::Regex;
use std::str::FromStr;
use std::vec::IntoIter;
use std::iter::{Iterator, Peekable};

pub type PeekableIter<T> = Peekable<IntoIter<T>>;

#[derive(Debug, PartialEq)]
pub enum Token {
    Coma,
    Colon,
    CurlyOpen,
    CurlyClose,
    SquareOpen,
    SquareClose,
    StringValue(String),
    NumberValue(f64),
    BoolValue(bool),
    NullValue,
}

pub struct Tokenizer {
    char_stream: PeekableIter<char>,
}

impl Tokenizer {
    pub fn new(s: &str) -> Tokenizer {
        let vec: Vec<char> = s.chars().collect();
        let char_stream = vec.into_iter().peekable();
        Tokenizer { char_stream }
    }

    fn take_until(&mut self, predicate: fn(char) -> bool) -> Result<Vec<char>> {
        let mut res: Vec<char> = vec![];
        while let Some(c) = self.char_stream.next() {
            if !predicate(c) {
                res.push(c);
            } else {
                return Ok(res);
            }
        }
        let s: String = res.iter().collect();
        Err(Error::Tokenize(
            format!("unterminated token `{}`", s).into(),
        ))
    }

    fn take_while(&mut self, predicate: fn(char) -> bool) -> Result<Vec<char>> {
        let mut res = vec![];
        while let Some(&c) = self.char_stream.peek() {
            if predicate(c) {
                self.char_stream.next();
                res.push(c);
            } else {
                return Ok(res);
            }
        }
        Ok(res)
    }

    fn skip(&mut self, ch: char) -> Result<()> {
        match self.char_stream.next() {
            Some(c) if c == ch => Ok(()),
            _ => Err(Error::Tokenize(format!("expected token `{}`", ch).into())),
        }
    }

    fn string_token(&mut self) -> Result<Token> {
        self.skip('"')?;
        let chars = self.take_until(|c| c == '"')?;
        Ok(Token::StringValue(chars.iter().collect()))
    }

    fn number_token(&mut self) -> Result<Token> {
        let chars = self.take_while(|c| Regex::new(r"^\d$").unwrap().is_match(&c.to_string()))?;
        let num_string: String = chars.iter().collect();
        match num_string.parse() {
            Ok(num) => Ok(Token::NumberValue(num)),
            Err(pfe) => Err(Error::Tokenize(pfe.to_string())),
        }
    }

    fn keyword_token(&mut self) -> Result<Token> {
        let chars = self.take_while(|c| {
            Regex::new(r"^[a-zA-Z_\d]$")
                .unwrap()
                .is_match(&c.to_string())
        })?;
        let token: String = chars.iter().collect();
        match &token[..] {
            "true" => Ok(Token::BoolValue(true)),
            "false" => Ok(Token::BoolValue(false)),
            "null" => Ok(Token::NullValue),
            _ => Err(Error::Tokenize(
                format!("unrecognized token {}", token).into(),
            )),
        }
    }

    pub fn tokenize(&mut self) -> Result<Vec<Token>> {
        let mut v: Vec<Token> = vec![];
        while let Some(c) = self.char_stream.peek() {
            match c {
                ' ' | '\n' | '\t' => {
                    self.char_stream.next();
                }
                '{' => {
                    v.push(Token::CurlyOpen);
                    self.char_stream.next();
                }
                '}' => {
                    v.push(Token::CurlyClose);
                    self.char_stream.next();
                }
                '[' => {
                    v.push(Token::SquareOpen);
                    self.char_stream.next();
                }
                ']' => {
                    v.push(Token::SquareClose);
                    self.char_stream.next();
                }
                ',' => {
                    v.push(Token::Coma);
                    self.char_stream.next();
                }
                ':' => {
                    v.push(Token::Colon);
                    self.char_stream.next();
                }
                '"' => v.push(self.string_token()?),
                '0'...'9' => v.push(self.number_token()?),
                _ => v.push(self.keyword_token()?),
            }
        }
        Ok(v)
    }
}

impl FromStr for Tokenizer {
    type Err = Error;
    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
        Ok(Tokenizer::new(s))
    }
}

#[test]
fn test_string_token() {
    let mut tokenizer: Tokenizer = (r#""hello""#).parse().unwrap();
    let result = tokenizer.string_token();
    assert_eq!(result.unwrap(), Token::StringValue("hello".into()));
}

#[test]
fn test_number_token() {
    let mut tokenizer = Tokenizer::new(r#"123"#);
    let result = tokenizer.number_token();
    assert_eq!(result.unwrap(), Token::NumberValue(123.0));
}

#[test]
fn test_true_token() {
    let mut tokenizer = Tokenizer::new(r#"true"#);
    let result = tokenizer.keyword_token();
    assert_eq!(result.unwrap(), Token::BoolValue(true));
}

#[test]
fn test_false_token() {
    let mut tokenizer = Tokenizer::new(r#"false"#);
    let result = tokenizer.keyword_token();
    assert_eq!(result.unwrap(), Token::BoolValue(false));
}

#[test]
fn test_null_token() {
    let mut tokenizer = Tokenizer::new(r#"null"#);
    let result = tokenizer.keyword_token();
    assert_eq!(result.unwrap(), Token::NullValue);
}

#[test]
fn test_tokenize_token() {
    let mut tokenizer =
        Tokenizer::new(r#"{"str": "hello", "num": 123, "array":[true, false, null]}"#);
    let result = tokenizer.tokenize();
    assert_eq!(
        result.unwrap(),
        [
            Token::CurlyOpen,
            Token::StringValue("str".into()),
            Token::Colon,
            Token::StringValue("hello".into()),
            Token::Coma,
            Token::StringValue("num".into()),
            Token::Colon,
            Token::NumberValue(123.0),
            Token::Coma,
            Token::StringValue("array".into()),
            Token::Colon,
            Token::SquareOpen,
            Token::BoolValue(true),
            Token::Coma,
            Token::BoolValue(false),
            Token::Coma,
            Token::NullValue,
            Token::SquareClose,
            Token::CurlyClose
        ]
    );
}