jsode/
parser.rs

1use std::collections::{HashMap, VecDeque};
2
3use jsode_macro::reflection;
4
5use crate::{
6    common, constant::msg, core::{
7        JsonBlock, JsonOutput, JsonToken,
8        JsonType, JsonValue, Punct, Span,
9    }, error::JsonError, lexer::Tokenizer
10};
11
12#[derive(PartialEq, Debug)]
13pub struct JsonParser<'tk> {
14    iter: Tokenizer<'tk>,
15}
16
17impl<'tk> JsonParser<'tk> {
18    #[inline]
19    pub fn new(src: &'tk str) -> Self {
20        Self {
21            iter: Tokenizer::from(src),
22        }
23    }
24}
25
26impl<'tk> JsonParser<'tk> {
27    pub fn parse(&'_ mut self) -> crate::Result<JsonOutput<'_>> {
28        let mut cursor = JsonCursor::init(self)?;
29
30        let init_block = match cursor.roots.back() {
31            Some(State::Object(_, _)) => JsonBlock::new(0, JsonValue::Object(HashMap::with_capacity(10), Span::default())),
32            Some(State::Array(_, _)) => JsonBlock::new(0, JsonValue::Array(Vec::with_capacity(10), Span::default())),
33            Some(State::Value(JsonType::Ident, span)) => return Err(JsonError::custom("Invalid JSON", span.clone())),
34            Some(State::Value(_, value_span)) => {
35                if self.next_token().is_some() {
36                    return Err(JsonError::custom("Invalid JSON", value_span.clone()));
37                }
38                let State::Value(ty, span) = cursor.roots.pop_back().unwrap() else {
39                    return Err(JsonError::custom("Invalid JSON", Span::default())); 
40                };
41
42                return Ok(JsonOutput::new(self, Vec::<JsonBlock>::from_iter([
43                    JsonBlock::new(0, JsonValue::Value(ty, span))
44                ])));
45            },
46            None => return Err(JsonError::custom("Invalid JSON", Span::default())),
47        };
48        cursor.level += 1;
49
50        let mut ast = Vec::<JsonBlock>::from_iter([init_block]);
51
52        while let Some(state) = cursor.roots.back() {
53            let block = match state {
54                State::Object(_,_) => cursor.parse_object_prop(self, ast.as_mut()),
55                State::Array(_,_) => cursor.parse_array_item(self, ast.as_mut()),
56                State::Value(_,_) => cursor.parse_value(self),
57            }?;
58
59            let next_token = match (!cursor.roots.is_empty(), self.next_token()) {
60                (true, token @ Some(_)) => token,
61                (false, None) => None,
62                _ => return Err(JsonError::custom("Invalid JSON", Span::default())),
63            };
64
65            let Some(block_value) = block else { match &next_token {
66                Some(JsonToken::Punct(Punct::Comma, _)) => 0,
67                None => 0,
68                Some(JsonToken::Punct(Punct::CloseCurly | Punct::CloseSquare, span)) => self.iter.step_back_nth(span.gap()),
69                Some(other) => return Err(JsonError::custom("expect comma or close-square, found other", other.get_span())),
70            }; continue; };
71
72            match (&block_value.value, next_token) {
73                (JsonValue::Prop(_,_,_) | JsonValue::Value(_,_), Some(JsonToken::Punct(Punct::Comma, _))) => 0,
74                (JsonValue::Prop(_,_,_) | JsonValue::Value(_,_), Some(JsonToken::Punct(Punct::CloseCurly | Punct::CloseSquare, span))) => self.iter.step_back_nth(span.gap()),
75                (JsonValue::Prop(_,_,_) | JsonValue::Value(_,_), Some(other)) => return Err(JsonError::custom("expect comma or close-square, found other", other.get_span())),
76                (JsonValue::Prop(_,_,_) | JsonValue::Value(_,_), None) => return Err(JsonError::custom("parsing prop value but reaching None", Span::default())),
77                (_, Some(other)) => self.iter.step_back_nth(other.get_span().gap()),
78                (_, None) => return Err(JsonError::custom("expect comma or close-square, found other", Span::default())),
79            };
80
81            ast.push(block_value);
82        }
83
84        Ok(JsonOutput::new(self, ast))
85    }
86}
87
88impl<'tk> JsonParser<'tk> {
89    #[inline]
90    pub const fn take_raw(&self, span: Span) -> &[u8] {
91        self.iter.take_raw(span)
92    }
93
94    #[inline]
95    pub fn take_slice(&self, span: Span) -> Result<&str, JsonError> {
96        self.iter.take_slice(span)
97    }
98
99    // fetching next token, skip all 'whitespace'
100    #[inline]
101    pub fn next_token(&mut self) -> Option<JsonToken> {
102        loop {
103            let token = self.iter.next()?;
104            match token {
105                JsonToken::Punct(Punct::WhiteSpace, _) => continue,
106                _ => break Some(token),
107            };
108        }
109    }
110
111    #[inline]
112    pub fn next_token_skip(&mut self, predicate: impl Fn(&JsonToken) -> bool) -> Option<JsonToken> {
113        loop {
114            let token = self.iter.next()?;
115            if (predicate)(&token) {
116                continue;
117            }
118            return Some(token);
119        }
120    }
121}
122
123// state represent for the parent's type
124#[derive(Debug)]
125pub(crate) enum State {
126    // the `usize` is the position in ast
127    // the `HashMap` is indexes of their children
128    // everytime a new property parsed successfully, `HashMap` will inserted a new key-value
129    Object(usize, HashMap<usize, usize>),
130    // the first `usize` is the position in ast,
131    // the second `Vec<usize>` are position of each item
132    // everytime a new item parsed successfully, second `usize` will increased by one
133    Array(usize, Vec<usize>),
134    // Prop,
135    Value(JsonType, Span),
136    // EOF,
137}
138
139#[derive(Debug)]
140pub(crate) struct JsonCursor {
141    level: usize,
142    roots: VecDeque<State>,
143}
144
145impl JsonCursor {
146    pub fn new(state: State) -> Self {
147        Self {
148            level: 0,
149            roots: VecDeque::from_iter([state]),
150        }
151    }
152
153    /// going up a level
154    /// pop back latest state out of stack
155    fn pop_state(&mut self) -> Option<State> {
156        self.level = self.level.saturating_sub(1);
157        self.roots.pop_back()
158    }
159
160    /// add new prop's index to PARENT
161    /// note: PARENT should be an object
162    #[inline]
163    #[reflection]
164    fn update_prop_index(&mut self, key: Span, parser: &JsonParser<'_>, block_pos: usize) -> crate::Result<()> {
165        let Some(State::Object(anchor, ref mut prop_indexes)) = self.roots.back_mut() else {
166            return Err(JsonError::custom(format!("[{__fn_ident}] {}", msg::SOON_EOS), Span::default()));
167        };
168        // insert new item to object indexes
169        let key_slice = parser.take_slice(key)?;
170        let key_hashed = common::hash_str(key_slice) as usize;
171        // we should use relative instead absolute position here
172        // because lately when we index value, the origin size of ast is hard to trace
173        prop_indexes.insert(key_hashed, block_pos - *anchor);
174
175        Ok(())
176    }
177
178    /// increase PARENT length by 1
179    /// note: PARENT should be an array
180    #[inline]
181    #[reflection]
182    fn update_array_length(&mut self, pos: usize) -> crate::Result<()> {
183        let Some(State::Array(anchor, ref mut item_indexes)) = self.roots.back_mut() else {
184            return Err(JsonError::custom(format!("[{__fn_ident}] {}", msg::SOON_EOS), Span::default()));
185        };
186        // push array's item related position
187        item_indexes.push(pos - *anchor);
188
189        Ok(())
190    }
191
192    #[inline]
193    fn create_object_block(&mut self, position: usize, span: Span) -> JsonBlock {
194        let block = JsonBlock {
195            level: self.level,
196            value: JsonValue::Object(HashMap::with_capacity(10), span),
197        };
198        self.level += 1;
199        self.roots.push_back(State::Object(position, HashMap::with_capacity(10)));
200        block
201    }
202
203    #[inline]
204    fn create_array_block(&mut self, position: usize, span: Span) -> JsonBlock {
205        let block = JsonBlock {
206            level: self.level,
207            value: JsonValue::Array(Vec::with_capacity(10), span),
208        };
209        self.level += 1;
210        self.roots.push_back(State::Array(position, Vec::with_capacity(10)));
211        block
212    }
213
214    #[reflection]
215    fn create_prop_block(&mut self, key: Span, value: JsonType, value_span: Span, parser: &JsonParser<'_>, block_pos: usize) -> crate::Result<JsonBlock> {
216        let Some(State::Object(anchor, ref mut prop_indexes)) = self.roots.back_mut() else {
217            return Err(JsonError::custom(format!("[{__fn_ident}] {}", msg::SOON_EOS), Span::default()));
218        };
219        // insert new item to object indexes
220        let key_slice = parser.take_slice(key.clone())?;
221        let key_hashed = common::hash_str(key_slice) as usize;
222
223        prop_indexes.insert(key_hashed, block_pos - *anchor);
224
225        Ok(JsonBlock {
226            level: self.level,
227            value: JsonValue::Prop(value, value_span.clone(), key.extend(value_span)),
228        })
229    }
230
231    #[reflection]
232    fn create_item_block(&mut self, pos: usize, value: JsonType, value_span: Span) -> crate::Result<JsonBlock> {
233        let Some(State::Array(anchor, ref mut item_indexes)) = self.roots.back_mut() else {
234            return Err(JsonError::custom(format!("[{__fn_ident}] {}", msg::SOON_EOS), Span::default()));
235        };
236        // increase length of parent array by `1`
237        item_indexes.push(pos - *anchor);
238
239        Ok(JsonBlock {
240            level: self.level,
241            value: JsonValue::Value(value, value_span),
242        })
243    }
244
245    #[inline]
246    const fn create_value_block(&self, value: JsonType, value_span: Span) -> JsonBlock {
247        JsonBlock {
248            level: self.level,
249            value: JsonValue::Value(value, value_span),
250        }
251    }
252
253    // jump to higher level and update it's indexes
254    // it also mean jump to the block that represent for the parent of those items,
255    // tell him that all your children were born and you need to know their name (index).
256    #[reflection]
257    fn rollup_indexes(&mut self, ast: &mut [JsonBlock], end: usize) -> crate::Result<()> {
258        // the `state` holding the position of the parent object/array
259        let Some(state) = self.pop_state() else {
260            return Err(JsonError::custom(format!("[{__fn_ident}] {}", msg::SOON_EOS), Span::default()));
261        };
262
263        match state {
264            // take the block locate at `pos`
265            // if block's type is an Array, then process update its indexes
266            State::Array(pos, indexes) => match ast.get_mut(pos) {
267                Some(block) => if let JsonValue::Array(item_indexes, array_span) = &mut block.value {
268                    item_indexes.extend(indexes);
269                    array_span.end = end;
270                },
271                _ => return Err(JsonError::custom(format!("[{__fn_ident}] the JsonBlock at index {pos} is not an Array, cannot update indexes"), Span::default())),
272            },
273            // take the block locate at `pos`
274            // if block's type is an Object, then process update its indexes
275            State::Object(pos, indexes) => match ast.get_mut(pos) {
276                Some(block) => if let JsonValue::Object(prop_indexes, obj_span) = &mut block.value {
277                    prop_indexes.extend(indexes);
278                    obj_span.end = end;
279                },
280                _ => return Err(JsonError::custom(format!("[{__fn_ident}] the JsonBlock at index {pos} is not an Object, cannot update indexes"), Span::default())),
281            },
282            // only Object and Array is allow to have items and nested children
283            // Value should be one of primitive JSON supported's type
284            _ => return Err(JsonError::custom(format!("[{__fn_ident}] not allow State::Value when rollup indexes"), Span::default())),
285        };
286
287        Ok(())
288    }
289}
290
291impl JsonCursor {
292    pub fn init(parser: &mut JsonParser<'_>) -> crate::Result<Self> {
293        let Some(token) = parser.next_token() else {
294            return Err(JsonError::custom("Reach the end of token stream, soon EOF", Span::default()));
295        };
296
297        match token {
298            JsonToken::Punct(Punct::OpenCurly, _) => Ok(Self::new(State::Object(0, HashMap::new()))),
299            JsonToken::Punct(Punct::OpenSquare, _) => Ok(Self::new(State::Array(0, Vec::new()))),
300            JsonToken::Data(ty, span) => Ok(Self::new(State::Value(ty, span))),
301            other_type => Err(JsonError::custom("Invalid JSON, should be comment, value, open-curly, open-square", other_type.get_span())),
302        }
303    }
304
305    #[reflection]
306    pub fn parse_object_prop(&mut self, parser: &mut JsonParser<'_>, ast: &mut [JsonBlock]) -> crate::Result<Option<JsonBlock>> {
307        let key_span = match parser.next_token_skip(|tk| matches!(tk, JsonToken::Punct(Punct::WhiteSpace, _) | JsonToken::Comment(_))) {
308            Some(JsonToken::Data(JsonType::Str(_), span)) => span.collapse(1),
309            Some(JsonToken::Data(JsonType::Ident, span)) => span,
310            // hitting the end of this object
311            Some(JsonToken::Punct(Punct::CloseCurly, span)) => {
312                self.rollup_indexes(ast, span.end)?;
313                return Ok(None);
314            }
315            Some(JsonToken::Error(err, span)) => return Err(JsonError::custom(format!("[{__fn_ident}] {}", err), span)),
316            Some(tk) => return Err(JsonError::custom(format!("[{__fn_ident}] expect JSON's key is a str/ident, found other"), tk.get_span())),
317            None => return Err(JsonError::custom(format!("[{__fn_ident}] `key` should not be None"), Span::default()))
318        };
319
320        let _colon = match parser.next_token() {
321            Some(JsonToken::Punct(Punct::Colon, cspan)) => cspan,
322            Some(JsonToken::Error(err, span)) => return Err(JsonError::custom(format!("[{__fn_ident}] {}", err), span)),
323            Some(tk) => return Err(JsonError::custom(format!("[{__fn_ident}] expect next token is a colon, found other"), tk.get_span())),
324            None => return Err(JsonError::custom(format!("[{__fn_ident}] `colon` should not be None"), Span::default()))
325        };
326
327        let value = match parser.next_token_skip(|tk| matches!(tk, JsonToken::Punct(Punct::WhiteSpace | Punct::Plus | Punct::Minus, _))) {
328            Some(JsonToken::Punct(Punct::OpenCurly, span)) => {
329                self.update_prop_index(key_span, parser, ast.len())?;
330                self.create_object_block(ast.len(), span)
331            },
332            Some(JsonToken::Punct(Punct::OpenSquare, span)) => {
333                self.update_prop_index(key_span, parser, ast.len())?;
334                self.create_array_block(ast.len(), span)
335            },
336            Some(JsonToken::Data(data @ JsonType::Str(_), data_span)) => self.create_prop_block(key_span, data, data_span.collapse(1), parser, ast.len())?,
337            Some(JsonToken::Data(data, data_span)) => self.create_prop_block(key_span, data, data_span, parser, ast.len())?,
338            Some(JsonToken::Error(err, span)) => return Err(JsonError::custom(format!("[{__fn_ident}] {}", err), span)),
339            Some(tk) => return Err(JsonError::custom(format!("[{__fn_ident}] expect next token is primitive value, open-curly or open-square, found other"), tk.get_span())),
340            None => return Err(JsonError::custom(format!("[{__fn_ident}] parsing prop value but reaching None"), Span::default())),
341        };
342
343        Ok(Some(value))
344    }
345
346    #[reflection]
347    pub fn parse_array_item(&mut self, parser: &mut JsonParser<'_>, ast: &mut [JsonBlock]) -> crate::Result<Option<JsonBlock>> {
348        let item_value = match parser.next_token_skip(|tk| matches!(tk, JsonToken::Punct(Punct::WhiteSpace | Punct::Plus | Punct::Minus, _) | JsonToken::Comment(_))) {
349            Some(JsonToken::Data(data, data_span)) => self.create_item_block(ast.len(), data, data_span)?,
350            Some(JsonToken::Punct(Punct::OpenCurly, span)) => {
351                self.update_array_length(ast.len())?;
352                self.create_object_block(ast.len(), span)
353            },
354            Some(JsonToken::Punct(Punct::OpenSquare, span)) => {
355                self.update_array_length(ast.len())?;
356                self.create_array_block(ast.len(), span)
357            },
358            // hitting the end of this array
359            Some(JsonToken::Punct(Punct::CloseSquare, span)) =>  {
360                self.rollup_indexes(ast, span.end)?;
361                return Ok(None)
362            },
363            Some(JsonToken::Punct(_, span)) => return Err(JsonError::invalid_array(span)),
364            Some(JsonToken::Error(err, span)) => return Err(JsonError::custom(format!("[{__fn_ident}] {}", err), span)),
365            Some(JsonToken::Comment(span)) => return Err(JsonError::custom(format!("[{__fn_ident}] should not reaching this state, because all comments must be stripped all"), span)),
366            None => return Err(JsonError::custom(format!("[{__fn_ident}] reaching None when parsing"), Span::default()))
367        };
368
369        Ok(Some(item_value))
370    }
371
372    // the whole source is single-value
373    #[reflection]
374    pub fn parse_value(&mut self, parser: &mut JsonParser<'_>) -> crate::Result<Option<JsonBlock>> {
375        let next_item = parser.next_token_skip(|tk| matches!(tk, JsonToken::Punct(Punct::WhiteSpace | Punct::Plus | Punct::Minus, _) | JsonToken::Comment(_)));
376        let item_value = match next_item {
377            Some(JsonToken::Data(data, data_span)) => self.create_value_block(data, data_span),
378            _ => return Err(JsonError::custom(format!("[{__fn_ident}] invalid json value"), Span::default())),
379        };
380        Ok(Some(item_value))
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    #[test]
389    fn parse_simple_json() {
390        let mut parser =
391            JsonParser::new("{ a: 1, b: { c: 0x0F }, d: \"\", f: true, g: [1,[2,3],{h:1}]}");
392        let out = parser.parse();
393
394        assert!(out.inspect_err(|err| eprintln!("{err}")).is_ok());
395    }
396
397    #[test]
398    fn parse_complex_json() {
399        let mut parser =
400            JsonParser::new("{ a: 1, b: [1,2, { d: { e: [{f:1}] } }], g: \"\n\t\", h: 0x9F }");
401        let out = parser.parse();
402
403        assert!(out.inspect_err(|err| eprintln!("{err}")).is_ok());
404    }
405}