Skip to main content

pipa/builtins/
json_parser.rs

1use crate::object::array_obj::JSArrayObject;
2use crate::object::object::JSObject;
3use crate::runtime::context::JSContext;
4use crate::util::memchr::memchr2;
5use crate::value::JSValue;
6
7#[inline(always)]
8fn find_string_end(input: &[u8], pos: usize) -> usize {
9    memchr2(b'"', b'\\', &input[pos..])
10        .map(|o| pos + o)
11        .unwrap_or(input.len())
12}
13
14#[inline(always)]
15fn skip_ws_bulk(input: &[u8], mut pos: usize) -> usize {
16    let len = input.len();
17    loop {
18        if pos >= len {
19            return len;
20        }
21        match input[pos] {
22            b' ' | b'\t' | b'\r' | b'\n' => pos += 1,
23            _ => return pos,
24        }
25    }
26}
27
28pub struct JsonParser<'a> {
29    input: &'a [u8],
30    pos: usize,
31}
32
33impl<'a> JsonParser<'a> {
34    pub fn new(input: &'a str) -> Self {
35        JsonParser {
36            input: input.as_bytes(),
37            pos: 0,
38        }
39    }
40
41    #[inline]
42    fn peek(&self) -> Option<u8> {
43        self.input.get(self.pos).copied()
44    }
45
46    #[inline]
47    fn advance(&mut self) -> Option<u8> {
48        let ch = self.input.get(self.pos).copied();
49        self.pos += 1;
50        ch
51    }
52
53    #[inline]
54    fn skip_whitespace(&mut self) {
55        self.pos = skip_ws_bulk(self.input, self.pos);
56    }
57
58    #[inline]
59    fn expect_byte(&mut self, expected: u8) -> Result<(), String> {
60        match self.advance() {
61            Some(b) if b == expected => Ok(()),
62            Some(b) => Err(format!(
63                "JSON: expected '{}', got '{}' at pos {}",
64                expected as char,
65                b as char,
66                self.pos - 1
67            )),
68            None => Err("JSON: unexpected end of input".to_string()),
69        }
70    }
71
72    pub fn parse_value(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
73        self.skip_whitespace();
74        let ch = self.peek().ok_or("JSON: unexpected end of input")?;
75        match ch {
76            b'"' => self.parse_string(ctx),
77            b'{' => self.parse_object(ctx),
78            b'[' => self.parse_array(ctx),
79            b't' => self.parse_literal(b"true", JSValue::bool(true)),
80            b'f' => self.parse_literal(b"false", JSValue::bool(false)),
81            b'n' => self.parse_literal(b"null", JSValue::null()),
82            b'-' | b'0'..=b'9' => self.parse_number(),
83            _ => Err(format!(
84                "JSON: unexpected character '{}' at pos {}",
85                ch as char, self.pos
86            )),
87        }
88    }
89
90    pub fn parse_root(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
91        let val = self.parse_value(ctx)?;
92        self.skip_whitespace();
93        if self.pos != self.input.len() {
94            return Err(format!(
95                "JSON: unexpected trailing content at pos {}",
96                self.pos
97            ));
98        }
99        Ok(val)
100    }
101
102    fn parse_literal(&mut self, expected: &[u8], value: JSValue) -> Result<JSValue, String> {
103        let end = self.pos + expected.len();
104        if end <= self.input.len() && &self.input[self.pos..end] == expected {
105            self.pos = end;
106            Ok(value)
107        } else {
108            Err(format!("JSON: invalid literal at pos {}", self.pos))
109        }
110    }
111
112    fn parse_number(&mut self) -> Result<JSValue, String> {
113        let start = self.pos;
114
115        if self.peek() == Some(b'-') {
116            self.pos += 1;
117        }
118
119        if self.peek() == Some(b'0') {
120            self.pos += 1;
121
122            if let Some(b'0'..=b'9') = self.peek() {
123                return Err("JSON: leading zeros not allowed".to_string());
124            }
125        } else {
126            while let Some(b'0'..=b'9') = self.peek() {
127                self.pos += 1;
128            }
129        }
130
131        let has_fraction;
132        let has_exponent;
133
134        if self.peek() == Some(b'.') {
135            self.pos += 1;
136            if !matches!(self.peek(), Some(b'0'..=b'9')) {
137                return Err("JSON: expected digit after decimal point".to_string());
138            }
139            while let Some(b'0'..=b'9') = self.peek() {
140                self.pos += 1;
141            }
142            has_fraction = true;
143        } else {
144            has_fraction = false;
145        }
146
147        if matches!(self.peek(), Some(b'e') | Some(b'E')) {
148            self.pos += 1;
149            if matches!(self.peek(), Some(b'+') | Some(b'-')) {
150                self.pos += 1;
151            }
152            if !matches!(self.peek(), Some(b'0'..=b'9')) {
153                return Err("JSON: expected digit in exponent".to_string());
154            }
155            while let Some(b'0'..=b'9') = self.peek() {
156                self.pos += 1;
157            }
158            has_exponent = true;
159        } else {
160            has_exponent = false;
161        }
162
163        if !has_fraction && !has_exponent {
164            let bytes = &self.input[start..self.pos];
165            let (negative, digits) = if bytes[0] == b'-' {
166                (true, &bytes[1..])
167            } else {
168                (false, bytes)
169            };
170
171            if digits.len() <= 18 {
172                let mut n: u64 = 0;
173                for &b in digits {
174                    n = n * 10 + (b - b'0') as u64;
175                }
176                let i = if negative {
177                    if n == 0 {
178                        return Ok(JSValue::new_float(-0.0f64));
179                    }
180                    if n <= i64::MAX as u64 + 1 {
181                        n.wrapping_neg() as i64
182                    } else {
183                        let s = unsafe { std::str::from_utf8_unchecked(bytes) };
184                        let f: f64 = s.parse().map_err(|_| "JSON: invalid number".to_string())?;
185                        return Ok(JSValue::new_float(f));
186                    }
187                } else if n <= i64::MAX as u64 {
188                    n as i64
189                } else {
190                    let s = unsafe { std::str::from_utf8_unchecked(bytes) };
191                    let f: f64 = s.parse().map_err(|_| "JSON: invalid number".to_string())?;
192                    return Ok(JSValue::new_float(f));
193                };
194                return Ok(JSValue::new_int(i));
195            }
196
197            let num_str = unsafe { std::str::from_utf8_unchecked(&self.input[start..self.pos]) };
198            if let Ok(i) = num_str.parse::<i64>() {
199                return Ok(JSValue::new_int(i));
200            }
201            let f: f64 = num_str
202                .parse()
203                .map_err(|_| "JSON: invalid number".to_string())?;
204            return Ok(JSValue::new_float(f));
205        }
206
207        let num_str = unsafe { std::str::from_utf8_unchecked(&self.input[start..self.pos]) };
208        let f: f64 = num_str
209            .parse()
210            .map_err(|_| "JSON: invalid number".to_string())?;
211        Ok(JSValue::new_float(f))
212    }
213
214    fn parse_string(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
215        self.expect_byte(b'"')?;
216        let start = self.pos;
217
218        let first_special = find_string_end(self.input, self.pos);
219
220        if first_special < self.input.len() && self.input[first_special] == b'"' {
221            let slice = &self.input[start..first_special];
222            for &b in slice {
223                if b < 0x20 {
224                    return Err(format!(
225                        "JSON: unescaped control character U+{:04X} at pos {}",
226                        b, start
227                    ));
228                }
229            }
230            let s = unsafe { std::str::from_utf8_unchecked(slice) };
231            let atom = ctx.intern(s);
232            self.pos = first_special + 1;
233            return Ok(JSValue::new_string(atom));
234        }
235
236        let mut buf = if first_special > start {
237            let slice = &self.input[start..first_special];
238            for &b in slice {
239                if b < 0x20 {
240                    return Err(format!(
241                        "JSON: unescaped control character U+{:04X} at pos {}",
242                        b, start
243                    ));
244                }
245            }
246            let s = unsafe { std::str::from_utf8_unchecked(slice) };
247            s.to_string()
248        } else {
249            String::new()
250        };
251        self.pos = first_special;
252
253        loop {
254            let b = self.advance().ok_or("JSON: unterminated string")?;
255            match b {
256                b'"' => {
257                    let atom = ctx.intern(&buf);
258                    return Ok(JSValue::new_string(atom));
259                }
260                b'\\' => {
261                    let escaped = self.advance().ok_or("JSON: unterminated escape")?;
262                    match escaped {
263                        b'"' => buf.push('"'),
264                        b'\\' => buf.push('\\'),
265                        b'/' => buf.push('/'),
266                        b'b' => buf.push('\x08'),
267                        b'f' => buf.push('\x0c'),
268                        b'n' => buf.push('\n'),
269                        b'r' => buf.push('\r'),
270                        b't' => buf.push('\t'),
271                        b'u' => {
272                            let hex = self.parse_hex_escape()?;
273                            buf.push(hex);
274                        }
275                        _ => return Err(format!("JSON: invalid escape '\\{}'", escaped as char)),
276                    }
277
278                    let next_special = find_string_end(self.input, self.pos);
279                    let span = &self.input[self.pos..next_special];
280                    for &b in span {
281                        if b < 0x20 {
282                            return Err(format!(
283                                "JSON: unescaped control character U+{:04X} at pos {}",
284                                b, self.pos
285                            ));
286                        }
287                    }
288
289                    buf.push_str(unsafe { std::str::from_utf8_unchecked(span) });
290                    self.pos = next_special;
291                }
292                _ => unreachable!(
293                    "find_string_end guarantees self.pos stops at '\"' or '\\', got 0x{:02X}",
294                    b
295                ),
296            }
297        }
298    }
299
300    fn parse_hex_escape(&mut self) -> Result<char, String> {
301        let mut code = 0u32;
302        for _ in 0..4 {
303            let b = self.advance().ok_or("JSON: unterminated unicode escape")?;
304            let digit = match b {
305                b'0'..=b'9' => (b - b'0') as u32,
306                b'a'..=b'f' => (b - b'a') as u32 + 10,
307                b'A'..=b'F' => (b - b'A') as u32 + 10,
308                _ => return Err(format!("JSON: invalid hex digit '{}'", b as char)),
309            };
310            code = (code << 4) | digit;
311        }
312
313        if (0xD800..=0xDBFF).contains(&code) {
314            if self.advance() != Some(b'\\') || self.advance() != Some(b'u') {
315                return Err("JSON: expected low surrogate after high surrogate".to_string());
316            }
317            let mut low = 0u32;
318            for _ in 0..4 {
319                let b = self
320                    .advance()
321                    .ok_or("JSON: unterminated surrogate escape")?;
322                let digit = match b {
323                    b'0'..=b'9' => (b - b'0') as u32,
324                    b'a'..=b'f' => (b - b'a') as u32 + 10,
325                    b'A'..=b'F' => (b - b'A') as u32 + 10,
326                    _ => {
327                        return Err(format!(
328                            "JSON: invalid hex digit in surrogate '{}'",
329                            b as char
330                        ));
331                    }
332                };
333                low = (low << 4) | digit;
334            }
335            if !(0xDC00..=0xDFFF).contains(&low) {
336                return Err("JSON: invalid low surrogate".to_string());
337            }
338            let combined = 0x10000 + ((code - 0xD800) << 10) + (low - 0xDC00);
339            char::from_u32(combined)
340                .ok_or("JSON: invalid codepoint from surrogate pair".to_string())
341        } else {
342            char::from_u32(code).ok_or("JSON: invalid unicode codepoint".to_string())
343        }
344    }
345
346    fn parse_array(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
347        self.expect_byte(b'[')?;
348        self.skip_whitespace();
349
350        let mut elements = Vec::new();
351
352        if self.peek() != Some(b']') {
353            loop {
354                self.skip_whitespace();
355                let val = self.parse_value(ctx)?;
356                elements.push(val);
357                self.skip_whitespace();
358                match self.peek() {
359                    Some(b',') => {
360                        self.pos += 1;
361                    }
362                    Some(b']') => break,
363                    _ => return Err("JSON: expected ',' or ']' in array".to_string()),
364                }
365            }
366        }
367        self.expect_byte(b']')?;
368
369        let len = elements.len();
370        let mut arr = JSArrayObject::from_elements(elements);
371        if let Some(proto_ptr) = ctx.get_array_prototype() {
372            arr.header.set_prototype_raw(proto_ptr);
373        }
374        let len_atom = ctx.common_atoms.length;
375        arr.header.set(len_atom, JSValue::new_int(len as i64));
376
377        let ptr = Box::into_raw(Box::new(arr)) as usize;
378        ctx.runtime_mut().gc_heap_mut().track_array(ptr);
379        Ok(JSValue::new_object(ptr))
380    }
381
382    fn parse_object(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
383        self.expect_byte(b'{')?;
384        self.skip_whitespace();
385
386        let mut obj = JSObject::new();
387        if let Some(obj_proto_ptr) = ctx.get_object_prototype() {
388            obj.prototype = Some(obj_proto_ptr);
389        }
390
391        if self.peek() != Some(b'}') {
392            loop {
393                self.skip_whitespace();
394
395                if self.peek() != Some(b'"') {
396                    return Err("JSON: expected string key in object".to_string());
397                }
398                let key_val = self.parse_string(ctx)?;
399                let key_atom = key_val.get_atom();
400
401                self.skip_whitespace();
402                self.expect_byte(b':')?;
403
404                self.skip_whitespace();
405                let val = self.parse_value(ctx)?;
406                obj.set(key_atom, val);
407
408                self.skip_whitespace();
409                match self.peek() {
410                    Some(b',') => {
411                        self.pos += 1;
412                    }
413                    Some(b'}') => break,
414                    _ => return Err("JSON: expected ',' or '}' in object".to_string()),
415                }
416            }
417        }
418        self.expect_byte(b'}')?;
419
420        let ptr = Box::into_raw(Box::new(obj)) as usize;
421        Ok(JSValue::new_object(ptr))
422    }
423}