Skip to main content

pipa/builtins/
json_parser.rs

1use crate::object::array_obj::JSArrayObject;
2use crate::object::object::JSObject;
3use crate::runtime::context::JSContext;
4use crate::util::memchr::memchr2;
5use crate::value::JSValue;
6
7#[inline(always)]
8fn find_string_end(input: &[u8], pos: usize) -> usize {
9    memchr2(b'"', b'\\', &input[pos..])
10        .map(|o| pos + o)
11        .unwrap_or(input.len())
12}
13
14#[inline(always)]
15fn skip_ws_bulk(input: &[u8], mut pos: usize) -> usize {
16    let len = input.len();
17    loop {
18        if pos >= len {
19            return len;
20        }
21        match input[pos] {
22            b' ' | b'\t' | b'\r' | b'\n' => pos += 1,
23            _ => return pos,
24        }
25    }
26}
27
28pub struct JsonParser<'a> {
29    input: &'a [u8],
30    pos: usize,
31}
32
33impl<'a> JsonParser<'a> {
34    pub fn new(input: &'a str) -> Self {
35        JsonParser {
36            input: input.as_bytes(),
37            pos: 0,
38        }
39    }
40
41    #[inline]
42    fn peek(&self) -> Option<u8> {
43        self.input.get(self.pos).copied()
44    }
45
46    #[inline]
47    fn advance(&mut self) -> Option<u8> {
48        let ch = self.input.get(self.pos).copied();
49        self.pos += 1;
50        ch
51    }
52
53    #[inline]
54    fn skip_whitespace(&mut self) {
55        self.pos = skip_ws_bulk(self.input, self.pos);
56    }
57
58    #[inline]
59    fn expect_byte(&mut self, expected: u8) -> Result<(), String> {
60        match self.advance() {
61            Some(b) if b == expected => Ok(()),
62            Some(b) => Err(format!(
63                "JSON: expected '{}', got '{}' at pos {}",
64                expected as char,
65                b as char,
66                self.pos - 1
67            )),
68            None => Err("JSON: unexpected end of input".to_string()),
69        }
70    }
71
72    pub fn parse_value(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
73        self.skip_whitespace();
74        let ch = self.peek().ok_or("JSON: unexpected end of input")?;
75        match ch {
76            b'"' => self.parse_string(ctx),
77            b'{' => self.parse_object(ctx),
78            b'[' => self.parse_array(ctx),
79            b't' => self.parse_literal(b"true", JSValue::bool(true)),
80            b'f' => self.parse_literal(b"false", JSValue::bool(false)),
81            b'n' => self.parse_literal(b"null", JSValue::null()),
82            b'-' | b'0'..=b'9' => self.parse_number(),
83            _ => Err(format!(
84                "JSON: unexpected character '{}' at pos {}",
85                ch as char, self.pos
86            )),
87        }
88    }
89
90    fn parse_literal(&mut self, expected: &[u8], value: JSValue) -> Result<JSValue, String> {
91        let end = self.pos + expected.len();
92        if end <= self.input.len() && &self.input[self.pos..end] == expected {
93            self.pos = end;
94            Ok(value)
95        } else {
96            Err(format!("JSON: invalid literal at pos {}", self.pos))
97        }
98    }
99
100    fn parse_number(&mut self) -> Result<JSValue, String> {
101        let start = self.pos;
102
103        if self.peek() == Some(b'-') {
104            self.pos += 1;
105        }
106
107        if self.peek() == Some(b'0') {
108            self.pos += 1;
109
110            if let Some(b'0'..=b'9') = self.peek() {
111                return Err("JSON: leading zeros not allowed".to_string());
112            }
113        } else {
114            while let Some(b'0'..=b'9') = self.peek() {
115                self.pos += 1;
116            }
117        }
118
119        let has_fraction;
120        let has_exponent;
121
122        if self.peek() == Some(b'.') {
123            self.pos += 1;
124            if !matches!(self.peek(), Some(b'0'..=b'9')) {
125                return Err("JSON: expected digit after decimal point".to_string());
126            }
127            while let Some(b'0'..=b'9') = self.peek() {
128                self.pos += 1;
129            }
130            has_fraction = true;
131        } else {
132            has_fraction = false;
133        }
134
135        if matches!(self.peek(), Some(b'e') | Some(b'E')) {
136            self.pos += 1;
137            if matches!(self.peek(), Some(b'+') | Some(b'-')) {
138                self.pos += 1;
139            }
140            if !matches!(self.peek(), Some(b'0'..=b'9')) {
141                return Err("JSON: expected digit in exponent".to_string());
142            }
143            while let Some(b'0'..=b'9') = self.peek() {
144                self.pos += 1;
145            }
146            has_exponent = true;
147        } else {
148            has_exponent = false;
149        }
150
151        if !has_fraction && !has_exponent {
152            let bytes = &self.input[start..self.pos];
153            let (negative, digits) = if bytes[0] == b'-' {
154                (true, &bytes[1..])
155            } else {
156                (false, bytes)
157            };
158
159            if digits.len() <= 18 {
160                let mut n: u64 = 0;
161                for &b in digits {
162                    n = n * 10 + (b - b'0') as u64;
163                }
164                let i = if negative {
165                    if n <= i64::MAX as u64 + 1 {
166                        n.wrapping_neg() as i64
167                    } else {
168                        let s = unsafe { std::str::from_utf8_unchecked(bytes) };
169                        let f: f64 = s.parse().map_err(|_| "JSON: invalid number".to_string())?;
170                        return Ok(JSValue::new_float(f));
171                    }
172                } else if n <= i64::MAX as u64 {
173                    n as i64
174                } else {
175                    let s = unsafe { std::str::from_utf8_unchecked(bytes) };
176                    let f: f64 = s.parse().map_err(|_| "JSON: invalid number".to_string())?;
177                    return Ok(JSValue::new_float(f));
178                };
179                return Ok(JSValue::new_int(i));
180            }
181
182            let num_str = unsafe { std::str::from_utf8_unchecked(&self.input[start..self.pos]) };
183            if let Ok(i) = num_str.parse::<i64>() {
184                return Ok(JSValue::new_int(i));
185            }
186            let f: f64 = num_str
187                .parse()
188                .map_err(|_| "JSON: invalid number".to_string())?;
189            return Ok(JSValue::new_float(f));
190        }
191
192        let num_str = unsafe { std::str::from_utf8_unchecked(&self.input[start..self.pos]) };
193        let f: f64 = num_str
194            .parse()
195            .map_err(|_| "JSON: invalid number".to_string())?;
196        Ok(JSValue::new_float(f))
197    }
198
199    fn parse_string(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
200        self.expect_byte(b'"')?;
201        let start = self.pos;
202
203        let first_special = find_string_end(self.input, self.pos);
204
205        if first_special < self.input.len() && self.input[first_special] == b'"' {
206            let slice = &self.input[start..first_special];
207            let s = unsafe { std::str::from_utf8_unchecked(slice) };
208            let atom = ctx.intern(s);
209            self.pos = first_special + 1;
210            return Ok(JSValue::new_string(atom));
211        }
212
213        let mut buf = if first_special > start {
214            let slice = &self.input[start..first_special];
215            let s = unsafe { std::str::from_utf8_unchecked(slice) };
216            s.to_string()
217        } else {
218            String::new()
219        };
220        self.pos = first_special;
221
222        loop {
223            let b = self.advance().ok_or("JSON: unterminated string")?;
224            match b {
225                b'"' => {
226                    let atom = ctx.intern(&buf);
227                    return Ok(JSValue::new_string(atom));
228                }
229                b'\\' => {
230                    let escaped = self.advance().ok_or("JSON: unterminated escape")?;
231                    match escaped {
232                        b'"' => buf.push('"'),
233                        b'\\' => buf.push('\\'),
234                        b'/' => buf.push('/'),
235                        b'b' => buf.push('\x08'),
236                        b'f' => buf.push('\x0c'),
237                        b'n' => buf.push('\n'),
238                        b'r' => buf.push('\r'),
239                        b't' => buf.push('\t'),
240                        b'u' => {
241                            let hex = self.parse_hex_escape()?;
242                            buf.push(hex);
243                        }
244                        _ => return Err(format!("JSON: invalid escape '\\{}'", escaped as char)),
245                    }
246
247                    let next_special = find_string_end(self.input, self.pos);
248                    let span = &self.input[self.pos..next_special];
249
250                    buf.push_str(unsafe { std::str::from_utf8_unchecked(span) });
251                    self.pos = next_special;
252                }
253                _ => unreachable!(
254                    "find_string_end guarantees self.pos stops at '\"' or '\\', got 0x{:02X}",
255                    b
256                ),
257            }
258        }
259    }
260
261    fn parse_hex_escape(&mut self) -> Result<char, String> {
262        let mut code = 0u32;
263        for _ in 0..4 {
264            let b = self.advance().ok_or("JSON: unterminated unicode escape")?;
265            let digit = match b {
266                b'0'..=b'9' => (b - b'0') as u32,
267                b'a'..=b'f' => (b - b'a') as u32 + 10,
268                b'A'..=b'F' => (b - b'A') as u32 + 10,
269                _ => return Err(format!("JSON: invalid hex digit '{}'", b as char)),
270            };
271            code = (code << 4) | digit;
272        }
273
274        if (0xD800..=0xDBFF).contains(&code) {
275            if self.advance() != Some(b'\\') || self.advance() != Some(b'u') {
276                return Err("JSON: expected low surrogate after high surrogate".to_string());
277            }
278            let mut low = 0u32;
279            for _ in 0..4 {
280                let b = self
281                    .advance()
282                    .ok_or("JSON: unterminated surrogate escape")?;
283                let digit = match b {
284                    b'0'..=b'9' => (b - b'0') as u32,
285                    b'a'..=b'f' => (b - b'a') as u32 + 10,
286                    b'A'..=b'F' => (b - b'A') as u32 + 10,
287                    _ => {
288                        return Err(format!(
289                            "JSON: invalid hex digit in surrogate '{}'",
290                            b as char
291                        ));
292                    }
293                };
294                low = (low << 4) | digit;
295            }
296            if !(0xDC00..=0xDFFF).contains(&low) {
297                return Err("JSON: invalid low surrogate".to_string());
298            }
299            let combined = 0x10000 + ((code - 0xD800) << 10) + (low - 0xDC00);
300            char::from_u32(combined)
301                .ok_or("JSON: invalid codepoint from surrogate pair".to_string())
302        } else {
303            char::from_u32(code).ok_or("JSON: invalid unicode codepoint".to_string())
304        }
305    }
306
307    fn parse_array(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
308        self.expect_byte(b'[')?;
309        self.skip_whitespace();
310
311        let mut elements = Vec::new();
312
313        if self.peek() != Some(b']') {
314            loop {
315                self.skip_whitespace();
316                let val = self.parse_value(ctx)?;
317                elements.push(val);
318                self.skip_whitespace();
319                match self.peek() {
320                    Some(b',') => {
321                        self.pos += 1;
322                    }
323                    Some(b']') => break,
324                    _ => return Err("JSON: expected ',' or ']' in array".to_string()),
325                }
326            }
327        }
328        self.expect_byte(b']')?;
329
330        let len = elements.len();
331        let mut arr = JSArrayObject::from_elements(elements);
332        if let Some(proto_ptr) = ctx.get_array_prototype() {
333            arr.header.set_prototype_raw(proto_ptr);
334        }
335        let len_atom = ctx.common_atoms.length;
336        arr.header.set(len_atom, JSValue::new_int(len as i64));
337
338        let ptr = Box::into_raw(Box::new(arr)) as usize;
339        ctx.runtime_mut().gc_heap_mut().track_array(ptr);
340        Ok(JSValue::new_object(ptr))
341    }
342
343    fn parse_object(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
344        self.expect_byte(b'{')?;
345        self.skip_whitespace();
346
347        let mut obj = JSObject::new();
348
349        if self.peek() != Some(b'}') {
350            loop {
351                self.skip_whitespace();
352
353                if self.peek() != Some(b'"') {
354                    return Err("JSON: expected string key in object".to_string());
355                }
356                let key_val = self.parse_string(ctx)?;
357                let key_atom = key_val.get_atom();
358
359                self.skip_whitespace();
360                self.expect_byte(b':')?;
361
362                self.skip_whitespace();
363                let val = self.parse_value(ctx)?;
364                obj.set(key_atom, val);
365
366                self.skip_whitespace();
367                match self.peek() {
368                    Some(b',') => {
369                        self.pos += 1;
370                    }
371                    Some(b'}') => break,
372                    _ => return Err("JSON: expected ',' or '}' in object".to_string()),
373                }
374            }
375        }
376        self.expect_byte(b'}')?;
377
378        let ptr = Box::into_raw(Box::new(obj)) as usize;
379        Ok(JSValue::new_object(ptr))
380    }
381}