1use crate::object::array_obj::JSArrayObject;
2use crate::object::object::JSObject;
3use crate::runtime::context::JSContext;
4use crate::util::memchr::memchr2;
5use crate::value::JSValue;
6
7#[inline(always)]
8fn find_string_end(input: &[u8], pos: usize) -> usize {
9 memchr2(b'"', b'\\', &input[pos..])
10 .map(|o| pos + o)
11 .unwrap_or(input.len())
12}
13
14#[inline(always)]
15fn skip_ws_bulk(input: &[u8], mut pos: usize) -> usize {
16 let len = input.len();
17 loop {
18 if pos >= len {
19 return len;
20 }
21 match input[pos] {
22 b' ' | b'\t' | b'\r' | b'\n' => pos += 1,
23 _ => return pos,
24 }
25 }
26}
27
28pub struct JsonParser<'a> {
29 input: &'a [u8],
30 pos: usize,
31}
32
33impl<'a> JsonParser<'a> {
34 pub fn new(input: &'a str) -> Self {
35 JsonParser {
36 input: input.as_bytes(),
37 pos: 0,
38 }
39 }
40
41 #[inline]
42 fn peek(&self) -> Option<u8> {
43 self.input.get(self.pos).copied()
44 }
45
46 #[inline]
47 fn advance(&mut self) -> Option<u8> {
48 let ch = self.input.get(self.pos).copied();
49 self.pos += 1;
50 ch
51 }
52
53 #[inline]
54 fn skip_whitespace(&mut self) {
55 self.pos = skip_ws_bulk(self.input, self.pos);
56 }
57
58 #[inline]
59 fn expect_byte(&mut self, expected: u8) -> Result<(), String> {
60 match self.advance() {
61 Some(b) if b == expected => Ok(()),
62 Some(b) => Err(format!(
63 "JSON: expected '{}', got '{}' at pos {}",
64 expected as char,
65 b as char,
66 self.pos - 1
67 )),
68 None => Err("JSON: unexpected end of input".to_string()),
69 }
70 }
71
72 pub fn parse_value(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
73 self.skip_whitespace();
74 let ch = self.peek().ok_or("JSON: unexpected end of input")?;
75 match ch {
76 b'"' => self.parse_string(ctx),
77 b'{' => self.parse_object(ctx),
78 b'[' => self.parse_array(ctx),
79 b't' => self.parse_literal(b"true", JSValue::bool(true)),
80 b'f' => self.parse_literal(b"false", JSValue::bool(false)),
81 b'n' => self.parse_literal(b"null", JSValue::null()),
82 b'-' | b'0'..=b'9' => self.parse_number(),
83 _ => Err(format!(
84 "JSON: unexpected character '{}' at pos {}",
85 ch as char, self.pos
86 )),
87 }
88 }
89
90 fn parse_literal(&mut self, expected: &[u8], value: JSValue) -> Result<JSValue, String> {
91 let end = self.pos + expected.len();
92 if end <= self.input.len() && &self.input[self.pos..end] == expected {
93 self.pos = end;
94 Ok(value)
95 } else {
96 Err(format!("JSON: invalid literal at pos {}", self.pos))
97 }
98 }
99
100 fn parse_number(&mut self) -> Result<JSValue, String> {
101 let start = self.pos;
102
103 if self.peek() == Some(b'-') {
104 self.pos += 1;
105 }
106
107 if self.peek() == Some(b'0') {
108 self.pos += 1;
109
110 if let Some(b'0'..=b'9') = self.peek() {
111 return Err("JSON: leading zeros not allowed".to_string());
112 }
113 } else {
114 while let Some(b'0'..=b'9') = self.peek() {
115 self.pos += 1;
116 }
117 }
118
119 let has_fraction;
120 let has_exponent;
121
122 if self.peek() == Some(b'.') {
123 self.pos += 1;
124 if !matches!(self.peek(), Some(b'0'..=b'9')) {
125 return Err("JSON: expected digit after decimal point".to_string());
126 }
127 while let Some(b'0'..=b'9') = self.peek() {
128 self.pos += 1;
129 }
130 has_fraction = true;
131 } else {
132 has_fraction = false;
133 }
134
135 if matches!(self.peek(), Some(b'e') | Some(b'E')) {
136 self.pos += 1;
137 if matches!(self.peek(), Some(b'+') | Some(b'-')) {
138 self.pos += 1;
139 }
140 if !matches!(self.peek(), Some(b'0'..=b'9')) {
141 return Err("JSON: expected digit in exponent".to_string());
142 }
143 while let Some(b'0'..=b'9') = self.peek() {
144 self.pos += 1;
145 }
146 has_exponent = true;
147 } else {
148 has_exponent = false;
149 }
150
151 if !has_fraction && !has_exponent {
152 let bytes = &self.input[start..self.pos];
153 let (negative, digits) = if bytes[0] == b'-' {
154 (true, &bytes[1..])
155 } else {
156 (false, bytes)
157 };
158
159 if digits.len() <= 18 {
160 let mut n: u64 = 0;
161 for &b in digits {
162 n = n * 10 + (b - b'0') as u64;
163 }
164 let i = if negative {
165 if n <= i64::MAX as u64 + 1 {
166 n.wrapping_neg() as i64
167 } else {
168 let s = unsafe { std::str::from_utf8_unchecked(bytes) };
169 let f: f64 = s.parse().map_err(|_| "JSON: invalid number".to_string())?;
170 return Ok(JSValue::new_float(f));
171 }
172 } else if n <= i64::MAX as u64 {
173 n as i64
174 } else {
175 let s = unsafe { std::str::from_utf8_unchecked(bytes) };
176 let f: f64 = s.parse().map_err(|_| "JSON: invalid number".to_string())?;
177 return Ok(JSValue::new_float(f));
178 };
179 return Ok(JSValue::new_int(i));
180 }
181
182 let num_str = unsafe { std::str::from_utf8_unchecked(&self.input[start..self.pos]) };
183 if let Ok(i) = num_str.parse::<i64>() {
184 return Ok(JSValue::new_int(i));
185 }
186 let f: f64 = num_str
187 .parse()
188 .map_err(|_| "JSON: invalid number".to_string())?;
189 return Ok(JSValue::new_float(f));
190 }
191
192 let num_str = unsafe { std::str::from_utf8_unchecked(&self.input[start..self.pos]) };
193 let f: f64 = num_str
194 .parse()
195 .map_err(|_| "JSON: invalid number".to_string())?;
196 Ok(JSValue::new_float(f))
197 }
198
199 fn parse_string(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
200 self.expect_byte(b'"')?;
201 let start = self.pos;
202
203 let first_special = find_string_end(self.input, self.pos);
204
205 if first_special < self.input.len() && self.input[first_special] == b'"' {
206 let slice = &self.input[start..first_special];
207 let s = unsafe { std::str::from_utf8_unchecked(slice) };
208 let atom = ctx.intern(s);
209 self.pos = first_special + 1;
210 return Ok(JSValue::new_string(atom));
211 }
212
213 let mut buf = if first_special > start {
214 let slice = &self.input[start..first_special];
215 let s = unsafe { std::str::from_utf8_unchecked(slice) };
216 s.to_string()
217 } else {
218 String::new()
219 };
220 self.pos = first_special;
221
222 loop {
223 let b = self.advance().ok_or("JSON: unterminated string")?;
224 match b {
225 b'"' => {
226 let atom = ctx.intern(&buf);
227 return Ok(JSValue::new_string(atom));
228 }
229 b'\\' => {
230 let escaped = self.advance().ok_or("JSON: unterminated escape")?;
231 match escaped {
232 b'"' => buf.push('"'),
233 b'\\' => buf.push('\\'),
234 b'/' => buf.push('/'),
235 b'b' => buf.push('\x08'),
236 b'f' => buf.push('\x0c'),
237 b'n' => buf.push('\n'),
238 b'r' => buf.push('\r'),
239 b't' => buf.push('\t'),
240 b'u' => {
241 let hex = self.parse_hex_escape()?;
242 buf.push(hex);
243 }
244 _ => return Err(format!("JSON: invalid escape '\\{}'", escaped as char)),
245 }
246
247 let next_special = find_string_end(self.input, self.pos);
248 let span = &self.input[self.pos..next_special];
249
250 buf.push_str(unsafe { std::str::from_utf8_unchecked(span) });
251 self.pos = next_special;
252 }
253 _ => unreachable!(
254 "find_string_end guarantees self.pos stops at '\"' or '\\', got 0x{:02X}",
255 b
256 ),
257 }
258 }
259 }
260
261 fn parse_hex_escape(&mut self) -> Result<char, String> {
262 let mut code = 0u32;
263 for _ in 0..4 {
264 let b = self.advance().ok_or("JSON: unterminated unicode escape")?;
265 let digit = match b {
266 b'0'..=b'9' => (b - b'0') as u32,
267 b'a'..=b'f' => (b - b'a') as u32 + 10,
268 b'A'..=b'F' => (b - b'A') as u32 + 10,
269 _ => return Err(format!("JSON: invalid hex digit '{}'", b as char)),
270 };
271 code = (code << 4) | digit;
272 }
273
274 if (0xD800..=0xDBFF).contains(&code) {
275 if self.advance() != Some(b'\\') || self.advance() != Some(b'u') {
276 return Err("JSON: expected low surrogate after high surrogate".to_string());
277 }
278 let mut low = 0u32;
279 for _ in 0..4 {
280 let b = self
281 .advance()
282 .ok_or("JSON: unterminated surrogate escape")?;
283 let digit = match b {
284 b'0'..=b'9' => (b - b'0') as u32,
285 b'a'..=b'f' => (b - b'a') as u32 + 10,
286 b'A'..=b'F' => (b - b'A') as u32 + 10,
287 _ => {
288 return Err(format!(
289 "JSON: invalid hex digit in surrogate '{}'",
290 b as char
291 ));
292 }
293 };
294 low = (low << 4) | digit;
295 }
296 if !(0xDC00..=0xDFFF).contains(&low) {
297 return Err("JSON: invalid low surrogate".to_string());
298 }
299 let combined = 0x10000 + ((code - 0xD800) << 10) + (low - 0xDC00);
300 char::from_u32(combined)
301 .ok_or("JSON: invalid codepoint from surrogate pair".to_string())
302 } else {
303 char::from_u32(code).ok_or("JSON: invalid unicode codepoint".to_string())
304 }
305 }
306
307 fn parse_array(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
308 self.expect_byte(b'[')?;
309 self.skip_whitespace();
310
311 let mut elements = Vec::new();
312
313 if self.peek() != Some(b']') {
314 loop {
315 self.skip_whitespace();
316 let val = self.parse_value(ctx)?;
317 elements.push(val);
318 self.skip_whitespace();
319 match self.peek() {
320 Some(b',') => {
321 self.pos += 1;
322 }
323 Some(b']') => break,
324 _ => return Err("JSON: expected ',' or ']' in array".to_string()),
325 }
326 }
327 }
328 self.expect_byte(b']')?;
329
330 let len = elements.len();
331 let mut arr = JSArrayObject::from_elements(elements);
332 if let Some(proto_ptr) = ctx.get_array_prototype() {
333 arr.header.set_prototype_raw(proto_ptr);
334 }
335 let len_atom = ctx.common_atoms.length;
336 arr.header.set(len_atom, JSValue::new_int(len as i64));
337
338 let ptr = Box::into_raw(Box::new(arr)) as usize;
339 ctx.runtime_mut().gc_heap_mut().track_array(ptr);
340 Ok(JSValue::new_object(ptr))
341 }
342
343 fn parse_object(&mut self, ctx: &mut JSContext) -> Result<JSValue, String> {
344 self.expect_byte(b'{')?;
345 self.skip_whitespace();
346
347 let mut obj = JSObject::new();
348
349 if self.peek() != Some(b'}') {
350 loop {
351 self.skip_whitespace();
352
353 if self.peek() != Some(b'"') {
354 return Err("JSON: expected string key in object".to_string());
355 }
356 let key_val = self.parse_string(ctx)?;
357 let key_atom = key_val.get_atom();
358
359 self.skip_whitespace();
360 self.expect_byte(b':')?;
361
362 self.skip_whitespace();
363 let val = self.parse_value(ctx)?;
364 obj.set(key_atom, val);
365
366 self.skip_whitespace();
367 match self.peek() {
368 Some(b',') => {
369 self.pos += 1;
370 }
371 Some(b'}') => break,
372 _ => return Err("JSON: expected ',' or '}' in object".to_string()),
373 }
374 }
375 }
376 self.expect_byte(b'}')?;
377
378 let ptr = Box::into_raw(Box::new(obj)) as usize;
379 Ok(JSValue::new_object(ptr))
380 }
381}