1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::rc::Rc;
4
5use super::ast::*;
6use super::common::Range;
7use super::errors::*;
8use super::scanner::Scanner;
9use super::scanner::ScannerOptions;
10use super::tokens::Token;
11use super::tokens::TokenAndRange;
12
13pub type CommentMap<'a> = HashMap<usize, Rc<Vec<Comment<'a>>>>;
17
18#[derive(Default, Debug, PartialEq, Clone)]
23pub enum CommentCollectionStrategy {
24 #[default]
26 Off,
27 Separate,
33 AsTokens,
38}
39
40#[derive(Default, Clone)]
42pub struct CollectOptions {
43 pub comments: CommentCollectionStrategy,
45 pub tokens: bool,
47}
48
49#[derive(Clone)]
51pub struct ParseOptions {
52 pub allow_comments: bool,
54 pub allow_loose_object_property_names: bool,
56 pub allow_trailing_commas: bool,
58 pub allow_missing_commas: bool,
60 pub allow_single_quoted_strings: bool,
62 pub allow_hexadecimal_numbers: bool,
64 pub allow_unary_plus_numbers: bool,
66}
67
68impl Default for ParseOptions {
69 fn default() -> Self {
70 Self {
71 allow_comments: true,
72 allow_loose_object_property_names: true,
73 allow_trailing_commas: true,
74 allow_missing_commas: true,
75 allow_single_quoted_strings: true,
76 allow_hexadecimal_numbers: true,
77 allow_unary_plus_numbers: true,
78 }
79 }
80}
81
82pub struct ParseResult<'a> {
84 pub comments: Option<CommentMap<'a>>,
90 pub value: Option<Value<'a>>,
92 pub tokens: Option<Vec<TokenAndRange<'a>>>,
96}
97
98struct Context<'a> {
99 scanner: Scanner<'a>,
100 comments: Option<CommentMap<'a>>,
101 current_comments: Option<Vec<Comment<'a>>>,
102 last_token_end: usize,
103 range_stack: Vec<Range>,
104 tokens: Option<Vec<TokenAndRange<'a>>>,
105 collect_comments_as_tokens: bool,
106 allow_comments: bool,
107 allow_trailing_commas: bool,
108 allow_missing_commas: bool,
109 allow_loose_object_property_names: bool,
110 maximum_nesting_depth: usize,
111}
112
113impl<'a> Context<'a> {
114 pub fn scan(&mut self) -> Result<Option<Token<'a>>, ParseError> {
115 let previous_end = self.last_token_end;
116 let token = self.scan_handling_comments()?;
117 self.last_token_end = self.scanner.token_end();
118
119 if let Some(comments) = self.comments.as_mut()
121 && let Some(current_comments) = self.current_comments.take()
122 {
123 let current_comments = Rc::new(current_comments);
124 comments.insert(previous_end, current_comments.clone());
125 comments.insert(self.scanner.token_start(), current_comments);
126 }
127
128 if let Some(token) = &token
129 && self.tokens.is_some()
130 {
131 self.capture_token(token.clone());
132 }
133
134 Ok(token)
135 }
136
137 pub fn token(&self) -> Option<Token<'a>> {
138 self.scanner.token()
139 }
140
141 pub fn start_range(&mut self) {
142 self.range_stack.push(Range {
143 start: self.scanner.token_start(),
144 end: 0,
145 });
146 }
147
148 pub fn end_range(&mut self) -> Range {
149 let mut range = self
150 .range_stack
151 .pop()
152 .expect("Range was popped from the stack, but the stack was empty.");
153 range.end = self.scanner.token_end();
154 range
155 }
156
157 pub fn create_range_from_last_token(&self) -> Range {
158 Range {
159 start: self.scanner.token_start(),
160 end: self.scanner.token_end(),
161 }
162 }
163
164 pub fn create_error(&self, kind: ParseErrorKind) -> ParseError {
165 self.scanner.create_error_for_current_token(kind)
166 }
167
168 pub fn create_error_for_current_range(&mut self, kind: ParseErrorKind) -> ParseError {
169 let range = self.end_range();
170 self.create_error_for_range(range, kind)
171 }
172
173 pub fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
174 self.scanner.create_error_for_range(range, kind)
175 }
176
177 fn scan_handling_comments(&mut self) -> Result<Option<Token<'a>>, ParseError> {
178 loop {
179 let token = self.scanner.scan()?;
180 match token {
181 Some(token @ Token::CommentLine(_) | token @ Token::CommentBlock(_)) if self.collect_comments_as_tokens => {
182 self.capture_token(token);
183 }
184 Some(Token::CommentLine(text)) => {
185 self.handle_comment(Comment::Line(CommentLine {
186 range: self.create_range_from_last_token(),
187 text,
188 }))?;
189 }
190 Some(Token::CommentBlock(text)) => {
191 self.handle_comment(Comment::Block(CommentBlock {
192 range: self.create_range_from_last_token(),
193 text,
194 }))?;
195 }
196 _ => return Ok(token),
197 }
198 }
199 }
200
201 fn capture_token(&mut self, token: Token<'a>) {
202 let range = self.create_range_from_last_token();
203 if let Some(tokens) = self.tokens.as_mut() {
204 tokens.push(TokenAndRange {
205 token: token.clone(),
206 range,
207 });
208 }
209 }
210
211 fn handle_comment(&mut self, comment: Comment<'a>) -> Result<(), ParseError> {
212 if !self.allow_comments {
213 return Err(self.create_error(ParseErrorKind::CommentsNotAllowed));
214 }
215
216 if self.comments.is_some() {
217 if let Some(comments) = self.current_comments.as_mut() {
218 comments.push(comment);
219 } else {
220 self.current_comments = Some(vec![comment]);
221 }
222 }
223
224 Ok(())
225 }
226}
227
228pub fn parse_to_ast<'a>(
245 text: &'a str,
246 collect_options: &CollectOptions,
247 parse_options: &ParseOptions,
248) -> Result<ParseResult<'a>, ParseError> {
249 let mut context = Context {
250 scanner: Scanner::new(
251 text,
252 &ScannerOptions {
253 allow_single_quoted_strings: parse_options.allow_single_quoted_strings,
254 allow_hexadecimal_numbers: parse_options.allow_hexadecimal_numbers,
255 allow_unary_plus_numbers: parse_options.allow_unary_plus_numbers,
256 },
257 ),
258 comments: match collect_options.comments {
259 CommentCollectionStrategy::Separate => Some(Default::default()),
260 CommentCollectionStrategy::Off | CommentCollectionStrategy::AsTokens => None,
261 },
262 current_comments: None,
263 last_token_end: 0,
264 range_stack: Vec::new(),
265 tokens: if collect_options.tokens { Some(Vec::new()) } else { None },
266 collect_comments_as_tokens: collect_options.comments == CommentCollectionStrategy::AsTokens,
267 allow_comments: parse_options.allow_comments,
268 allow_trailing_commas: parse_options.allow_trailing_commas,
269 allow_missing_commas: parse_options.allow_missing_commas,
270 allow_loose_object_property_names: parse_options.allow_loose_object_property_names,
271 maximum_nesting_depth: 512,
272 };
273 context.scan()?;
274 let value = parse_value(&mut context)?;
275
276 if context.scan()?.is_some() {
277 return Err(context.create_error(ParseErrorKind::MultipleRootJsonValues));
278 }
279
280 debug_assert!(context.range_stack.is_empty());
281
282 Ok(ParseResult {
283 comments: context.comments,
284 tokens: context.tokens,
285 value,
286 })
287}
288
289fn parse_value<'a>(context: &mut Context<'a>) -> Result<Option<Value<'a>>, ParseError> {
290 if context.range_stack.len() > context.maximum_nesting_depth {
291 return Err(context.create_error_for_current_range(ParseErrorKind::NestingDepthExceeded));
292 }
293
294 match context.token() {
295 None => Ok(None),
296 Some(token) => match token {
297 Token::OpenBrace => Ok(Some(Value::Object(parse_object(context)?))),
298 Token::OpenBracket => Ok(Some(Value::Array(parse_array(context)?))),
299 Token::String(value) => Ok(Some(Value::StringLit(create_string_lit(context, value)))),
300 Token::Boolean(value) => Ok(Some(Value::BooleanLit(create_boolean_lit(context, value)))),
301 Token::Number(value) => Ok(Some(Value::NumberLit(create_number_lit(context, value)))),
302 Token::Null => Ok(Some(Value::NullKeyword(create_null_keyword(context)))),
303 Token::CloseBracket => Err(context.create_error(ParseErrorKind::UnexpectedCloseBracket)),
304 Token::CloseBrace => Err(context.create_error(ParseErrorKind::UnexpectedCloseBrace)),
305 Token::Comma => Err(context.create_error(ParseErrorKind::UnexpectedComma)),
306 Token::Colon => Err(context.create_error(ParseErrorKind::UnexpectedColon)),
307 Token::Word(_) => Err(context.create_error(ParseErrorKind::UnexpectedWord)),
308 Token::CommentLine(_) => unreachable!(),
309 Token::CommentBlock(_) => unreachable!(),
310 },
311 }
312}
313
314fn parse_object<'a>(context: &mut Context<'a>) -> Result<Object<'a>, ParseError> {
315 debug_assert!(context.token() == Some(Token::OpenBrace));
316 let mut properties = Vec::new();
317
318 context.start_range();
319 context.scan()?;
320
321 loop {
322 match context.token() {
323 Some(Token::CloseBrace) => break,
324 Some(Token::String(prop_name)) => {
325 properties.push(parse_object_property(context, PropName::String(prop_name))?);
326 }
327 Some(Token::Word(prop_name)) | Some(Token::Number(prop_name)) => {
328 properties.push(parse_object_property(context, PropName::Word(prop_name))?);
329 }
330 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedObject)),
331 _ => return Err(context.create_error(ParseErrorKind::UnexpectedTokenInObject)),
332 }
333
334 let after_value_end = context.last_token_end;
336 match context.scan()? {
337 Some(Token::Comma) => {
338 let comma_range = context.create_range_from_last_token();
339 if let Some(Token::CloseBrace) = context.scan()?
340 && !context.allow_trailing_commas
341 {
342 return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
343 }
344 }
345 Some(Token::String(_) | Token::Word(_) | Token::Number(_)) if !context.allow_missing_commas => {
346 let range = Range {
347 start: after_value_end,
348 end: after_value_end,
349 };
350 return Err(context.create_error_for_range(range, ParseErrorKind::ExpectedComma));
351 }
352 _ => {}
353 }
354 }
355
356 Ok(Object {
357 range: context.end_range(),
358 properties,
359 })
360}
361
362enum PropName<'a> {
363 String(Cow<'a, str>),
364 Word(&'a str),
365}
366
367fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>) -> Result<ObjectProp<'a>, ParseError> {
368 context.start_range();
369
370 let name = match prop_name {
371 PropName::String(prop_name) => ObjectPropName::String(create_string_lit(context, prop_name)),
372 PropName::Word(prop_name) => {
373 if context.allow_loose_object_property_names {
374 ObjectPropName::Word(create_word(context, prop_name))
375 } else {
376 return Err(context.create_error(ParseErrorKind::ExpectedStringObjectProperty));
377 }
378 }
379 };
380
381 match context.scan()? {
382 Some(Token::Colon) => {}
383 _ => return Err(context.create_error(ParseErrorKind::ExpectedColonAfterObjectKey)),
384 }
385
386 context.scan()?;
387 let value = parse_value(context)?;
388
389 match value {
390 Some(value) => Ok(ObjectProp {
391 range: context.end_range(),
392 name,
393 value,
394 }),
395 None => Err(context.create_error(ParseErrorKind::ExpectedObjectValue)),
396 }
397}
398
399fn parse_array<'a>(context: &mut Context<'a>) -> Result<Array<'a>, ParseError> {
400 debug_assert!(context.token() == Some(Token::OpenBracket));
401 let mut elements = Vec::new();
402
403 context.start_range();
404 context.scan()?;
405
406 loop {
407 match context.token() {
408 Some(Token::CloseBracket) => break,
409 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
410 _ => match parse_value(context)? {
411 Some(value) => elements.push(value),
412 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
413 },
414 }
415
416 if let Some(Token::Comma) = context.scan()? {
418 let comma_range = context.create_range_from_last_token();
419 if let Some(Token::CloseBracket) = context.scan()?
420 && !context.allow_trailing_commas
421 {
422 return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
423 }
424 }
425 }
426
427 Ok(Array {
428 range: context.end_range(),
429 elements,
430 })
431}
432
433fn create_string_lit<'a>(context: &Context<'a>, value: Cow<'a, str>) -> StringLit<'a> {
436 StringLit {
437 range: context.create_range_from_last_token(),
438 value,
439 }
440}
441
442fn create_word<'a>(context: &Context<'a>, value: &'a str) -> WordLit<'a> {
443 WordLit {
444 range: context.create_range_from_last_token(),
445 value,
446 }
447}
448
449fn create_boolean_lit(context: &Context, value: bool) -> BooleanLit {
450 BooleanLit {
451 range: context.create_range_from_last_token(),
452 value,
453 }
454}
455
456fn create_number_lit<'a>(context: &Context<'a>, value: &'a str) -> NumberLit<'a> {
457 NumberLit {
458 range: context.create_range_from_last_token(),
459 value,
460 }
461}
462
463fn create_null_keyword(context: &Context) -> NullKeyword {
464 NullKeyword {
465 range: context.create_range_from_last_token(),
466 }
467}
468
469#[cfg(test)]
470mod tests {
471 use super::*;
472 use pretty_assertions::assert_eq;
473
474 #[test]
475 fn it_should_error_when_has_multiple_values() {
476 assert_has_error(
477 "[][]",
478 "Text cannot contain more than one JSON value on line 1 column 3",
479 );
480 }
481
482 #[test]
483 fn it_should_error_when_object_is_not_terminated() {
484 assert_has_error("{", "Unterminated object on line 1 column 1");
485 }
486
487 #[test]
488 fn it_should_error_when_object_has_unexpected_token() {
489 assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3");
490 }
491
492 #[test]
493 fn it_should_error_when_object_has_two_non_string_tokens() {
494 assert_has_error(
495 "{ asdf asdf: 5 }",
496 "Expected colon after the string or word in object property on line 1 column 8",
497 );
498 }
499
500 #[test]
501 fn it_should_error_when_array_is_not_terminated() {
502 assert_has_error("[", "Unterminated array on line 1 column 1");
503 }
504
505 #[test]
506 fn it_should_error_when_array_has_unexpected_token() {
507 assert_has_error("[:]", "Unexpected colon on line 1 column 2");
508 }
509
510 #[test]
511 fn it_should_error_when_comment_block_not_closed() {
512 assert_has_error("/* test", "Unterminated comment block on line 1 column 1");
513 }
514
515 #[test]
516 fn it_should_error_when_string_lit_not_closed() {
517 assert_has_error("\" test", "Unterminated string literal on line 1 column 1");
518 }
519
520 fn assert_has_error(text: &str, message: &str) {
521 let result = parse_to_ast(text, &Default::default(), &Default::default());
522 match result {
523 Ok(_) => panic!("Expected error, but did not find one."),
524 Err(err) => assert_eq!(err.to_string(), message),
525 }
526 }
527
528 #[test]
529 fn strict_should_error_object_trailing_comma() {
530 assert_has_strict_error(
531 r#"{ "test": 5, }"#,
532 "Trailing commas are not allowed on line 1 column 12",
533 );
534 }
535
536 #[test]
537 fn strict_should_error_array_trailing_comma() {
538 assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9");
539 }
540
541 #[test]
542 fn strict_should_error_comment_line() {
543 assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12");
544 }
545
546 #[test]
547 fn strict_should_error_comment_block() {
548 assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10");
549 }
550
551 #[test]
552 fn strict_should_error_word_property() {
553 assert_has_strict_error(
554 r#"{ word: 5 }"#,
555 "Expected string for object property on line 1 column 3",
556 );
557 }
558
559 #[test]
560 fn strict_should_error_single_quoted_string() {
561 assert_has_strict_error(
562 r#"{ "key": 'value' }"#,
563 "Single-quoted strings are not allowed on line 1 column 10",
564 );
565 }
566
567 #[test]
568 fn strict_should_error_hexadecimal_number() {
569 assert_has_strict_error(
570 r#"{ "key": 0xFF }"#,
571 "Hexadecimal numbers are not allowed on line 1 column 10",
572 );
573 }
574
575 #[test]
576 fn strict_should_error_unary_plus_number() {
577 assert_has_strict_error(
578 r#"{ "key": +42 }"#,
579 "Unary plus on numbers is not allowed on line 1 column 10",
580 );
581 }
582
583 #[track_caller]
584 fn assert_has_strict_error(text: &str, message: &str) {
585 let result = parse_to_ast(
586 text,
587 &Default::default(),
588 &ParseOptions {
589 allow_comments: false,
590 allow_loose_object_property_names: false,
591 allow_trailing_commas: false,
592 allow_missing_commas: false,
593 allow_single_quoted_strings: false,
594 allow_hexadecimal_numbers: false,
595 allow_unary_plus_numbers: false,
596 },
597 );
598 match result {
599 Ok(_) => panic!("Expected error, but did not find one."),
600 Err(err) => assert_eq!(err.to_string(), message),
601 }
602 }
603
604 #[test]
605 fn it_should_not_include_tokens_by_default() {
606 let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
607 assert!(result.tokens.is_none());
608 }
609
610 #[test]
611 fn it_should_include_tokens_when_specified() {
612 let result = parse_to_ast(
613 "{}",
614 &CollectOptions {
615 tokens: true,
616 ..Default::default()
617 },
618 &Default::default(),
619 )
620 .unwrap();
621 let tokens = result.tokens.unwrap();
622 assert_eq!(tokens.len(), 2);
623 }
624
625 #[test]
626 fn it_should_not_include_comments_by_default() {
627 let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
628 assert!(result.comments.is_none());
629 }
630
631 #[test]
632 fn it_should_include_comments_when_specified() {
633 let result = parse_to_ast(
634 "{} // 2",
635 &CollectOptions {
636 comments: CommentCollectionStrategy::Separate,
637 ..Default::default()
638 },
639 &Default::default(),
640 )
641 .unwrap();
642 let comments = result.comments.unwrap();
643 assert_eq!(comments.len(), 2); }
645
646 #[cfg(not(feature = "error_unicode_width"))]
647 #[test]
648 fn error_correct_line_column_unicode_width() {
649 assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 9");
650 }
651
652 #[cfg(feature = "error_unicode_width")]
653 #[test]
654 fn error_correct_line_column_unicode_width() {
655 assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 10");
656 }
657
658 #[test]
659 fn it_should_parse_unquoted_keys_with_hex_and_trailing_comma() {
660 let text = r#"{
661 CP_CanFuncReqId: 0x7DF, // 2015
662 }"#;
663 {
664 let parse_result = parse_to_ast(text, &Default::default(), &Default::default()).unwrap();
665
666 let value = parse_result.value.unwrap();
667 let obj = value.as_object().unwrap();
668 assert_eq!(obj.properties.len(), 1);
669 assert_eq!(obj.properties[0].name.as_str(), "CP_CanFuncReqId");
670
671 let number_value = obj.properties[0].value.as_number_lit().unwrap();
672 assert_eq!(number_value.value, "0x7DF");
673 }
674 #[cfg(feature = "serde")]
675 {
676 let value: serde_json::Value = crate::parse_to_serde_value(text, &Default::default()).unwrap();
677 assert_eq!(
679 value,
680 serde_json::json!({
681 "CP_CanFuncReqId": 2015
682 })
683 );
684 }
685 }
686
687 #[test]
688 fn it_should_parse_unary_plus_numbers() {
689 let result = parse_to_ast(r#"{ "test": +42 }"#, &Default::default(), &Default::default()).unwrap();
690
691 let value = result.value.unwrap();
692 let obj = value.as_object().unwrap();
693 assert_eq!(obj.properties.len(), 1);
694 assert_eq!(obj.properties[0].name.as_str(), "test");
695
696 let number_value = obj.properties[0].value.as_number_lit().unwrap();
697 assert_eq!(number_value.value, "+42");
698 }
699
700 #[test]
701 fn missing_comma_between_properties() {
702 let text = r#"{
703 "name": "alice"
704 "age": 25
705}"#;
706 let result = parse_to_ast(text, &Default::default(), &Default::default()).unwrap();
707 assert_eq!(
708 result
709 .value
710 .unwrap()
711 .as_object()
712 .unwrap()
713 .get_number("age")
714 .unwrap()
715 .value,
716 "25"
717 );
718
719 assert_has_strict_error(text, "Expected comma on line 2 column 18");
721 }
722
723 #[test]
724 fn missing_comma_with_comment_between_properties() {
725 let result = parse_to_ast(
728 r#"{
729 "name": "alice" // comment here
730 "age": 25
731}"#,
732 &Default::default(),
733 &ParseOptions {
734 allow_comments: true,
735 allow_missing_commas: false,
736 ..Default::default()
737 },
738 );
739 match result {
740 Ok(_) => panic!("Expected error, but did not find one."),
741 Err(err) => assert_eq!(err.to_string(), "Expected comma on line 2 column 18"),
742 }
743 }
744
745 #[test]
746 fn it_should_error_when_arrays_are_deeply_nested() {
747 let mut json = String::new();
749 let depth = 30_000;
750
751 for _ in 0..depth {
752 json += "[";
753 }
754
755 for _ in 0..depth {
756 json += "]";
757 }
758
759 let result = parse_to_ast(&json, &Default::default(), &ParseOptions::default());
760
761 match result {
762 Ok(_) => panic!("Expected error, but did not find one."),
763 Err(err) => assert_eq!(err.to_string(), "Maximum nesting depth exceeded on line 1 column 513"),
764 }
765 }
766
767 #[test]
768 fn it_should_error_when_objects_are_deeply_nested() {
769 let mut json = String::new();
771 let depth = 30_000;
772
773 for _ in 0..depth {
774 json += "{\"q\":";
775 }
776
777 for _ in 0..depth {
778 json += "}";
779 }
780
781 let result = parse_to_ast(&json, &Default::default(), &ParseOptions::default());
782
783 match result {
784 Ok(_) => panic!("Expected error, but did not find one."),
785 Err(err) => assert_eq!(err.to_string(), "Maximum nesting depth exceeded on line 1 column 1282"),
786 }
787 }
788
789 #[test]
790 fn it_should_parse_large_shallow_objects() {
791 let mut json = "{\"q\":[".to_string();
793 let size = 1_000;
794
795 for _ in 0..size {
796 json += "{\"q\":[{}]}, [\"hello\"], ";
797 }
798
799 json += "]}";
800
801 let result = parse_to_ast(&json, &Default::default(), &ParseOptions::default());
802
803 match result {
804 Ok(_) => {}
805 Err(_) => panic!("Expected Ok, but did not find one."),
806 }
807 }
808}