1use crate::error::Error;
2use crate::result::Result;
3use regex::Regex;
4use std::str::FromStr;
5use std::vec::IntoIter;
6use std::iter::{Iterator, Peekable};
7
8pub type PeekableIter<T> = Peekable<IntoIter<T>>;
9
10#[derive(Debug, PartialEq)]
11pub enum Token {
12 Coma,
13 Colon,
14 CurlyOpen,
15 CurlyClose,
16 SquareOpen,
17 SquareClose,
18 StringValue(String),
19 NumberValue(f64),
20 BoolValue(bool),
21 NullValue,
22}
23
24pub struct Tokenizer {
25 char_stream: PeekableIter<char>,
26}
27
28impl Tokenizer {
29 pub fn new(s: &str) -> Tokenizer {
30 let vec: Vec<char> = s.chars().collect();
31 let char_stream = vec.into_iter().peekable();
32 Tokenizer { char_stream }
33 }
34
35 fn take_until(&mut self, predicate: fn(char) -> bool) -> Result<Vec<char>> {
36 let mut res: Vec<char> = vec![];
37 while let Some(c) = self.char_stream.next() {
38 if !predicate(c) {
39 res.push(c);
40 } else {
41 return Ok(res);
42 }
43 }
44 let s: String = res.iter().collect();
45 Err(Error::Tokenize(
46 format!("unterminated token `{}`", s).into(),
47 ))
48 }
49
50 fn take_while(&mut self, predicate: fn(char) -> bool) -> Result<Vec<char>> {
51 let mut res = vec![];
52 while let Some(&c) = self.char_stream.peek() {
53 if predicate(c) {
54 self.char_stream.next();
55 res.push(c);
56 } else {
57 return Ok(res);
58 }
59 }
60 Ok(res)
61 }
62
63 fn skip(&mut self, ch: char) -> Result<()> {
64 match self.char_stream.next() {
65 Some(c) if c == ch => Ok(()),
66 _ => Err(Error::Tokenize(format!("expected token `{}`", ch).into())),
67 }
68 }
69
70 fn string_token(&mut self) -> Result<Token> {
71 self.skip('"')?;
72 let chars = self.take_until(|c| c == '"')?;
73 Ok(Token::StringValue(chars.iter().collect()))
74 }
75
76 fn number_token(&mut self) -> Result<Token> {
77 let chars = self.take_while(|c| Regex::new(r"^\d$").unwrap().is_match(&c.to_string()))?;
78 let num_string: String = chars.iter().collect();
79 match num_string.parse() {
80 Ok(num) => Ok(Token::NumberValue(num)),
81 Err(pfe) => Err(Error::Tokenize(pfe.to_string())),
82 }
83 }
84
85 fn keyword_token(&mut self) -> Result<Token> {
86 let chars = self.take_while(|c| {
87 Regex::new(r"^[a-zA-Z_\d]$")
88 .unwrap()
89 .is_match(&c.to_string())
90 })?;
91 let token: String = chars.iter().collect();
92 match &token[..] {
93 "true" => Ok(Token::BoolValue(true)),
94 "false" => Ok(Token::BoolValue(false)),
95 "null" => Ok(Token::NullValue),
96 _ => Err(Error::Tokenize(
97 format!("unrecognized token {}", token).into(),
98 )),
99 }
100 }
101
102 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
103 let mut v: Vec<Token> = vec![];
104 while let Some(c) = self.char_stream.peek() {
105 match c {
106 ' ' | '\n' | '\t' => {
107 self.char_stream.next();
108 }
109 '{' => {
110 v.push(Token::CurlyOpen);
111 self.char_stream.next();
112 }
113 '}' => {
114 v.push(Token::CurlyClose);
115 self.char_stream.next();
116 }
117 '[' => {
118 v.push(Token::SquareOpen);
119 self.char_stream.next();
120 }
121 ']' => {
122 v.push(Token::SquareClose);
123 self.char_stream.next();
124 }
125 ',' => {
126 v.push(Token::Coma);
127 self.char_stream.next();
128 }
129 ':' => {
130 v.push(Token::Colon);
131 self.char_stream.next();
132 }
133 '"' => v.push(self.string_token()?),
134 '0'...'9' => v.push(self.number_token()?),
135 _ => v.push(self.keyword_token()?),
136 }
137 }
138 Ok(v)
139 }
140}
141
142impl FromStr for Tokenizer {
143 type Err = Error;
144 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
145 Ok(Tokenizer::new(s))
146 }
147}
148
149#[test]
150fn test_string_token() {
151 let mut tokenizer: Tokenizer = (r#""hello""#).parse().unwrap();
152 let result = tokenizer.string_token();
153 assert_eq!(result.unwrap(), Token::StringValue("hello".into()));
154}
155
156#[test]
157fn test_number_token() {
158 let mut tokenizer = Tokenizer::new(r#"123"#);
159 let result = tokenizer.number_token();
160 assert_eq!(result.unwrap(), Token::NumberValue(123.0));
161}
162
163#[test]
164fn test_true_token() {
165 let mut tokenizer = Tokenizer::new(r#"true"#);
166 let result = tokenizer.keyword_token();
167 assert_eq!(result.unwrap(), Token::BoolValue(true));
168}
169
170#[test]
171fn test_false_token() {
172 let mut tokenizer = Tokenizer::new(r#"false"#);
173 let result = tokenizer.keyword_token();
174 assert_eq!(result.unwrap(), Token::BoolValue(false));
175}
176
177#[test]
178fn test_null_token() {
179 let mut tokenizer = Tokenizer::new(r#"null"#);
180 let result = tokenizer.keyword_token();
181 assert_eq!(result.unwrap(), Token::NullValue);
182}
183
184#[test]
185fn test_tokenize_token() {
186 let mut tokenizer =
187 Tokenizer::new(r#"{"str": "hello", "num": 123, "array":[true, false, null]}"#);
188 let result = tokenizer.tokenize();
189 assert_eq!(
190 result.unwrap(),
191 [
192 Token::CurlyOpen,
193 Token::StringValue("str".into()),
194 Token::Colon,
195 Token::StringValue("hello".into()),
196 Token::Coma,
197 Token::StringValue("num".into()),
198 Token::Colon,
199 Token::NumberValue(123.0),
200 Token::Coma,
201 Token::StringValue("array".into()),
202 Token::Colon,
203 Token::SquareOpen,
204 Token::BoolValue(true),
205 Token::Coma,
206 Token::BoolValue(false),
207 Token::Coma,
208 Token::NullValue,
209 Token::SquareClose,
210 Token::CurlyClose
211 ]
212 );
213}