1use alloc::collections::BTreeSet;
4use alloc::string::{String, ToString};
5use alloc::vec::Vec;
6
7use crate::error::{JsonError, JsonErrorKind, JsonLimitKind, JsonPath, JsonPathSegment};
8use crate::limits::JsonLimits;
9use crate::number::{is_valid_json_number, JsonNumber};
10use crate::value::{JsonObject, JsonValue};
11
12pub fn parse(input: &[u8]) -> Result<JsonValue, JsonError> {
15 parse_with_limits(input, JsonLimits::new())
16}
17
18pub fn parse_str(input: &str) -> Result<JsonValue, JsonError> {
20 parse_with_limits(input.as_bytes(), JsonLimits::new())
21}
22
23pub fn parse_with_limits(input: &[u8], limits: JsonLimits) -> Result<JsonValue, JsonError> {
25 if input.len() > limits.max_input_bytes {
26 return Err(JsonError::new(
27 JsonErrorKind::LimitExceeded(JsonLimitKind::InputBytes),
28 0,
29 1,
30 1,
31 )
32 .with_path(JsonPath::default()));
33 }
34
35 if let Err(e) = core::str::from_utf8(input) {
36 let offset = e.valid_up_to();
37 let (line, column) = line_column(input, offset);
38 return Err(
39 JsonError::new(JsonErrorKind::InvalidUtf8, offset, line, column)
40 .with_path(JsonPath::default()),
41 );
42 }
43
44 if input.starts_with(&[0xEF, 0xBB, 0xBF]) {
46 return Err(
47 JsonError::new(JsonErrorKind::InvalidUtf8, 0, 1, 1).with_path(JsonPath::default())
48 );
49 }
50
51 let mut parser = Parser::new(input, limits);
52 parser.skip_ws();
53 let value = parser.parse_value(0)?;
54 parser.skip_ws();
55 if parser.pos != parser.input.len() {
56 return Err(parser.error(JsonErrorKind::TrailingData));
57 }
58 Ok(value)
59}
60
61fn line_column(input: &[u8], offset: usize) -> (usize, usize) {
62 let mut line = 1;
63 let mut column = 1;
64 for &b in &input[..offset.min(input.len())] {
65 if b == b'\n' {
66 line += 1;
67 column = 1;
68 } else {
69 column += 1;
70 }
71 }
72 (line, column)
73}
74
75struct Parser<'a> {
76 input: &'a [u8],
77 pos: usize,
78 line: usize,
79 column: usize,
80 limits: JsonLimits,
81 nodes: usize,
82 decoded_string_bytes: usize,
83 path: Vec<JsonPathSegment>,
84}
85
86impl<'a> Parser<'a> {
87 fn new(input: &'a [u8], limits: JsonLimits) -> Self {
88 Self {
89 input,
90 pos: 0,
91 line: 1,
92 column: 1,
93 limits,
94 nodes: 0,
95 decoded_string_bytes: 0,
96 path: Vec::new(),
97 }
98 }
99
100 fn peek(&self) -> Option<u8> {
101 self.input.get(self.pos).copied()
102 }
103
104 fn bump(&mut self) -> u8 {
105 let b = self.input[self.pos];
106 self.pos += 1;
107 if b == b'\n' {
108 self.line += 1;
109 self.column = 1;
110 } else {
111 self.column += 1;
112 }
113 b
114 }
115
116 fn skip_ws(&mut self) {
117 while let Some(b) = self.peek() {
118 if matches!(b, b' ' | b'\t' | b'\n' | b'\r') {
119 self.bump();
120 } else {
121 break;
122 }
123 }
124 }
125
126 fn error(&self, kind: JsonErrorKind) -> JsonError {
127 JsonError::new(kind, self.pos, self.line, self.column)
128 .with_path(JsonPath::from_segments(self.path.clone()))
129 }
130
131 fn error_at(&self, kind: JsonErrorKind, pos: usize, line: usize, column: usize) -> JsonError {
132 JsonError::new(kind, pos, line, column)
133 .with_path(JsonPath::from_segments(self.path.clone()))
134 }
135
136 fn limit(&self, kind: JsonLimitKind) -> JsonError {
137 self.error(JsonErrorKind::LimitExceeded(kind))
138 }
139
140 fn parse_value(&mut self, depth: usize) -> Result<JsonValue, JsonError> {
141 self.nodes += 1;
142 if self.nodes > self.limits.max_total_nodes {
143 return Err(self.limit(JsonLimitKind::TotalNodes));
144 }
145 match self.peek() {
146 Some(b'{') => {
147 let d = depth + 1;
148 if d > self.limits.max_depth {
149 return Err(self.limit(JsonLimitKind::Depth));
150 }
151 self.parse_object(d)
152 }
153 Some(b'[') => {
154 let d = depth + 1;
155 if d > self.limits.max_depth {
156 return Err(self.limit(JsonLimitKind::Depth));
157 }
158 self.parse_array(d)
159 }
160 Some(b'"') => {
161 let s =
162 self.parse_string(self.limits.max_string_bytes, JsonLimitKind::StringBytes)?;
163 Ok(JsonValue::String(s))
164 }
165 Some(b't') => self.parse_literal(b"true", JsonValue::Bool(true)),
166 Some(b'f') => self.parse_literal(b"false", JsonValue::Bool(false)),
167 Some(b'n') => self.parse_literal(b"null", JsonValue::Null),
168 Some(b'-') | Some(b'0'..=b'9') => self.parse_number(),
169 Some(_) => Err(self.error(JsonErrorKind::UnexpectedByte)),
170 None => Err(self.error(JsonErrorKind::UnexpectedEof)),
171 }
172 }
173
174 fn parse_literal(&mut self, word: &[u8], value: JsonValue) -> Result<JsonValue, JsonError> {
175 for &expected in word {
176 match self.peek() {
177 Some(b) if b == expected => {
178 self.bump();
179 }
180 Some(_) => return Err(self.error(JsonErrorKind::UnexpectedByte)),
181 None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
182 }
183 }
184 Ok(value)
185 }
186
187 fn parse_object(&mut self, depth: usize) -> Result<JsonValue, JsonError> {
188 self.bump(); let mut object = JsonObject::new();
190 let mut seen: BTreeSet<String> = BTreeSet::new();
191
192 self.skip_ws();
193 if self.peek() == Some(b'}') {
194 self.bump();
195 return Ok(JsonValue::Object(object));
196 }
197
198 loop {
199 self.skip_ws();
200 if self.peek() != Some(b'"') {
202 return match self.peek() {
203 None => Err(self.error(JsonErrorKind::UnexpectedEof)),
204 _ => Err(self.error(JsonErrorKind::UnexpectedByte)),
205 };
206 }
207
208 let key_pos = self.pos;
209 let key_line = self.line;
210 let key_column = self.column;
211 let key = self.parse_string(self.limits.max_key_bytes, JsonLimitKind::KeyBytes)?;
212
213 if !seen.insert(key.clone()) {
214 return Err(self.error_at(
215 JsonErrorKind::DuplicateKey,
216 key_pos,
217 key_line,
218 key_column,
219 ));
220 }
221 if seen.len() > self.limits.max_object_members {
222 return Err(self.limit(JsonLimitKind::ObjectMembers));
223 }
224
225 self.skip_ws();
226 if self.peek() != Some(b':') {
227 return match self.peek() {
228 None => Err(self.error(JsonErrorKind::UnexpectedEof)),
229 _ => Err(self.error(JsonErrorKind::UnexpectedByte)),
230 };
231 }
232 self.bump(); self.skip_ws();
234
235 self.path.push(JsonPathSegment::Key(key.clone()));
236 let value = self.parse_value(depth)?;
237 self.path.pop();
238 object.push_unique(key, value);
239
240 self.skip_ws();
241 match self.peek() {
242 Some(b',') => {
243 self.bump();
244 }
245 Some(b'}') => {
246 self.bump();
247 return Ok(JsonValue::Object(object));
248 }
249 None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
250 _ => return Err(self.error(JsonErrorKind::UnexpectedByte)),
251 }
252 }
253 }
254
255 fn parse_array(&mut self, depth: usize) -> Result<JsonValue, JsonError> {
256 self.bump(); let mut items: Vec<JsonValue> = Vec::new();
258
259 self.skip_ws();
260 if self.peek() == Some(b']') {
261 self.bump();
262 return Ok(JsonValue::Array(items));
263 }
264
265 loop {
266 self.skip_ws();
267 if items.len() >= self.limits.max_array_items {
268 return Err(self.limit(JsonLimitKind::ArrayItems));
269 }
270
271 self.path.push(JsonPathSegment::Index(items.len()));
272 let value = self.parse_value(depth)?;
273 self.path.pop();
274 items.push(value);
275
276 self.skip_ws();
277 match self.peek() {
278 Some(b',') => {
279 self.bump();
280 }
281 Some(b']') => {
282 self.bump();
283 return Ok(JsonValue::Array(items));
284 }
285 None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
286 _ => return Err(self.error(JsonErrorKind::UnexpectedByte)),
287 }
288 }
289 }
290
291 fn parse_string(
292 &mut self,
293 max_bytes: usize,
294 limit_kind: JsonLimitKind,
295 ) -> Result<String, JsonError> {
296 self.bump(); let mut out = String::new();
298
299 loop {
300 match self.peek() {
301 None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
302 Some(b'"') => {
303 self.bump();
304 self.decoded_string_bytes = self.decoded_string_bytes.saturating_add(out.len());
305 if self.decoded_string_bytes > self.limits.max_total_decoded_string_bytes {
306 return Err(self.limit(JsonLimitKind::TotalDecodedStringBytes));
307 }
308 return Ok(out);
309 }
310 Some(b'\\') => {
311 self.bump();
312 self.parse_escape(&mut out)?;
313 }
314 Some(b) if b < 0x20 => {
315 return Err(self.error(JsonErrorKind::UnescapedControlCharacter));
316 }
317 Some(b) => {
318 let len = utf8_len(b);
319 let scalar = &self.input[self.pos..self.pos + len];
321 out.push_str(core::str::from_utf8(scalar).expect("validated UTF-8"));
322 for _ in 0..len {
323 self.bump();
324 }
325 }
326 }
327
328 if out.len() > max_bytes {
329 return Err(self.limit(limit_kind));
330 }
331 }
332 }
333
334 fn parse_escape(&mut self, out: &mut String) -> Result<(), JsonError> {
335 match self.peek() {
336 None => Err(self.error(JsonErrorKind::UnexpectedEof)),
337 Some(b'"') => {
338 out.push('"');
339 self.bump();
340 Ok(())
341 }
342 Some(b'\\') => {
343 out.push('\\');
344 self.bump();
345 Ok(())
346 }
347 Some(b'/') => {
348 out.push('/');
349 self.bump();
350 Ok(())
351 }
352 Some(b'b') => {
353 out.push('\u{08}');
354 self.bump();
355 Ok(())
356 }
357 Some(b'f') => {
358 out.push('\u{0C}');
359 self.bump();
360 Ok(())
361 }
362 Some(b'n') => {
363 out.push('\n');
364 self.bump();
365 Ok(())
366 }
367 Some(b'r') => {
368 out.push('\r');
369 self.bump();
370 Ok(())
371 }
372 Some(b't') => {
373 out.push('\t');
374 self.bump();
375 Ok(())
376 }
377 Some(b'u') => {
378 self.bump();
379 let hi = self.parse_hex4()?;
380 if (0xD800..=0xDBFF).contains(&hi) {
381 if self.peek() != Some(b'\\') {
383 return Err(self.error(JsonErrorKind::LoneSurrogate));
384 }
385 self.bump();
386 if self.peek() != Some(b'u') {
387 return Err(self.error(JsonErrorKind::LoneSurrogate));
388 }
389 self.bump();
390 let lo = self.parse_hex4()?;
391 if !(0xDC00..=0xDFFF).contains(&lo) {
392 return Err(self.error(JsonErrorKind::LoneSurrogate));
393 }
394 let scalar = 0x10000 + (((hi as u32) - 0xD800) << 10) + ((lo as u32) - 0xDC00);
395 out.push(char::from_u32(scalar).expect("valid scalar from surrogate pair"));
396 Ok(())
397 } else if (0xDC00..=0xDFFF).contains(&hi) {
398 Err(self.error(JsonErrorKind::LoneSurrogate))
399 } else {
400 out.push(char::from_u32(hi as u32).expect("non-surrogate is a valid scalar"));
401 Ok(())
402 }
403 }
404 Some(_) => Err(self.error(JsonErrorKind::InvalidEscape)),
405 }
406 }
407
408 fn parse_hex4(&mut self) -> Result<u16, JsonError> {
409 let mut value: u16 = 0;
410 for _ in 0..4 {
411 match self.peek() {
412 None => return Err(self.error(JsonErrorKind::UnexpectedEof)),
413 Some(b) => match hex_value(b) {
414 Some(digit) => {
415 value = (value << 4) | digit;
416 self.bump();
417 }
418 None => return Err(self.error(JsonErrorKind::InvalidUnicodeEscape)),
419 },
420 }
421 }
422 Ok(value)
423 }
424
425 fn parse_number(&mut self) -> Result<JsonValue, JsonError> {
426 let start = self.pos;
427 let start_line = self.line;
428 let start_column = self.column;
429 while let Some(b) = self.peek() {
430 if matches!(b, b'-' | b'+' | b'.' | b'e' | b'E' | b'0'..=b'9') {
431 self.bump();
432 } else {
433 break;
434 }
435 }
436 let token = &self.input[start..self.pos];
437 if token.len() > self.limits.max_number_bytes {
438 return Err(self.error_at(
439 JsonErrorKind::LimitExceeded(JsonLimitKind::NumberBytes),
440 start,
441 start_line,
442 start_column,
443 ));
444 }
445 let text = core::str::from_utf8(token).expect("validated UTF-8");
446 if !is_valid_json_number(text) {
447 return Err(self.error_at(
448 JsonErrorKind::InvalidNumber,
449 start,
450 start_line,
451 start_column,
452 ));
453 }
454 Ok(JsonValue::Number(JsonNumber::from_validated(
455 text.to_string(),
456 )))
457 }
458}
459
460fn utf8_len(lead: u8) -> usize {
461 if lead < 0x80 {
462 1
463 } else if lead < 0xE0 {
464 2
465 } else if lead < 0xF0 {
466 3
467 } else {
468 4
469 }
470}
471
472fn hex_value(b: u8) -> Option<u16> {
473 match b {
474 b'0'..=b'9' => Some((b - b'0') as u16),
475 b'a'..=b'f' => Some((b - b'a' + 10) as u16),
476 b'A'..=b'F' => Some((b - b'A' + 10) as u16),
477 _ => None,
478 }
479}