1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::rc::Rc;
4
5use super::ast::*;
6use super::common::Range;
7use super::errors::*;
8use super::scanner::Scanner;
9use super::scanner::ScannerOptions;
10use super::tokens::Token;
11use super::tokens::TokenAndRange;
12
13pub type CommentMap<'a> = HashMap<usize, Rc<Vec<Comment<'a>>>>;
17
18#[derive(Default, Debug, PartialEq, Clone)]
23pub enum CommentCollectionStrategy {
24 #[default]
26 Off,
27 Separate,
33 AsTokens,
38}
39
40#[derive(Default, Clone)]
42pub struct CollectOptions {
43 pub comments: CommentCollectionStrategy,
45 pub tokens: bool,
47}
48
49#[derive(Clone)]
51pub struct ParseOptions {
52 pub allow_comments: bool,
54 pub allow_loose_object_property_names: bool,
56 pub allow_trailing_commas: bool,
58 pub allow_missing_commas: bool,
60 pub allow_single_quoted_strings: bool,
62 pub allow_hexadecimal_numbers: bool,
64 pub allow_unary_plus_numbers: bool,
66}
67
68impl Default for ParseOptions {
69 fn default() -> Self {
70 Self {
71 allow_comments: true,
72 allow_loose_object_property_names: true,
73 allow_trailing_commas: true,
74 allow_missing_commas: true,
75 allow_single_quoted_strings: true,
76 allow_hexadecimal_numbers: true,
77 allow_unary_plus_numbers: true,
78 }
79 }
80}
81
82pub struct ParseResult<'a> {
84 pub comments: Option<CommentMap<'a>>,
90 pub value: Option<Value<'a>>,
92 pub tokens: Option<Vec<TokenAndRange<'a>>>,
96}
97
98struct Context<'a> {
99 scanner: Scanner<'a>,
100 comments: Option<CommentMap<'a>>,
101 current_comments: Option<Vec<Comment<'a>>>,
102 last_token_end: usize,
103 range_stack: Vec<Range>,
104 tokens: Option<Vec<TokenAndRange<'a>>>,
105 collect_comments_as_tokens: bool,
106 allow_comments: bool,
107 allow_trailing_commas: bool,
108 allow_missing_commas: bool,
109 allow_loose_object_property_names: bool,
110}
111
112impl<'a> Context<'a> {
113 pub fn scan(&mut self) -> Result<Option<Token<'a>>, ParseError> {
114 let previous_end = self.last_token_end;
115 let token = self.scan_handling_comments()?;
116 self.last_token_end = self.scanner.token_end();
117
118 if let Some(comments) = self.comments.as_mut()
120 && let Some(current_comments) = self.current_comments.take()
121 {
122 let current_comments = Rc::new(current_comments);
123 comments.insert(previous_end, current_comments.clone());
124 comments.insert(self.scanner.token_start(), current_comments);
125 }
126
127 if let Some(token) = &token
128 && self.tokens.is_some()
129 {
130 self.capture_token(token.clone());
131 }
132
133 Ok(token)
134 }
135
136 pub fn token(&self) -> Option<Token<'a>> {
137 self.scanner.token()
138 }
139
140 pub fn start_range(&mut self) {
141 self.range_stack.push(Range {
142 start: self.scanner.token_start(),
143 end: 0,
144 });
145 }
146
147 pub fn end_range(&mut self) -> Range {
148 let mut range = self
149 .range_stack
150 .pop()
151 .expect("Range was popped from the stack, but the stack was empty.");
152 range.end = self.scanner.token_end();
153 range
154 }
155
156 pub fn create_range_from_last_token(&self) -> Range {
157 Range {
158 start: self.scanner.token_start(),
159 end: self.scanner.token_end(),
160 }
161 }
162
163 pub fn create_error(&self, kind: ParseErrorKind) -> ParseError {
164 self.scanner.create_error_for_current_token(kind)
165 }
166
167 pub fn create_error_for_current_range(&mut self, kind: ParseErrorKind) -> ParseError {
168 let range = self.end_range();
169 self.create_error_for_range(range, kind)
170 }
171
172 pub fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
173 self.scanner.create_error_for_range(range, kind)
174 }
175
176 fn scan_handling_comments(&mut self) -> Result<Option<Token<'a>>, ParseError> {
177 loop {
178 let token = self.scanner.scan()?;
179 match token {
180 Some(token @ Token::CommentLine(_) | token @ Token::CommentBlock(_)) if self.collect_comments_as_tokens => {
181 self.capture_token(token);
182 }
183 Some(Token::CommentLine(text)) => {
184 self.handle_comment(Comment::Line(CommentLine {
185 range: self.create_range_from_last_token(),
186 text,
187 }))?;
188 }
189 Some(Token::CommentBlock(text)) => {
190 self.handle_comment(Comment::Block(CommentBlock {
191 range: self.create_range_from_last_token(),
192 text,
193 }))?;
194 }
195 _ => return Ok(token),
196 }
197 }
198 }
199
200 fn capture_token(&mut self, token: Token<'a>) {
201 let range = self.create_range_from_last_token();
202 if let Some(tokens) = self.tokens.as_mut() {
203 tokens.push(TokenAndRange {
204 token: token.clone(),
205 range,
206 });
207 }
208 }
209
210 fn handle_comment(&mut self, comment: Comment<'a>) -> Result<(), ParseError> {
211 if !self.allow_comments {
212 return Err(self.create_error(ParseErrorKind::CommentsNotAllowed));
213 }
214
215 if self.comments.is_some() {
216 if let Some(comments) = self.current_comments.as_mut() {
217 comments.push(comment);
218 } else {
219 self.current_comments = Some(vec![comment]);
220 }
221 }
222
223 Ok(())
224 }
225}
226
227pub fn parse_to_ast<'a>(
244 text: &'a str,
245 collect_options: &CollectOptions,
246 parse_options: &ParseOptions,
247) -> Result<ParseResult<'a>, ParseError> {
248 let mut context = Context {
249 scanner: Scanner::new(
250 text,
251 &ScannerOptions {
252 allow_single_quoted_strings: parse_options.allow_single_quoted_strings,
253 allow_hexadecimal_numbers: parse_options.allow_hexadecimal_numbers,
254 allow_unary_plus_numbers: parse_options.allow_unary_plus_numbers,
255 },
256 ),
257 comments: match collect_options.comments {
258 CommentCollectionStrategy::Separate => Some(Default::default()),
259 CommentCollectionStrategy::Off | CommentCollectionStrategy::AsTokens => None,
260 },
261 current_comments: None,
262 last_token_end: 0,
263 range_stack: Vec::new(),
264 tokens: if collect_options.tokens { Some(Vec::new()) } else { None },
265 collect_comments_as_tokens: collect_options.comments == CommentCollectionStrategy::AsTokens,
266 allow_comments: parse_options.allow_comments,
267 allow_trailing_commas: parse_options.allow_trailing_commas,
268 allow_missing_commas: parse_options.allow_missing_commas,
269 allow_loose_object_property_names: parse_options.allow_loose_object_property_names,
270 };
271 context.scan()?;
272 let value = parse_value(&mut context)?;
273
274 if context.scan()?.is_some() {
275 return Err(context.create_error(ParseErrorKind::MultipleRootJsonValues));
276 }
277
278 debug_assert!(context.range_stack.is_empty());
279
280 Ok(ParseResult {
281 comments: context.comments,
282 tokens: context.tokens,
283 value,
284 })
285}
286
287fn parse_value<'a>(context: &mut Context<'a>) -> Result<Option<Value<'a>>, ParseError> {
288 match context.token() {
289 None => Ok(None),
290 Some(token) => match token {
291 Token::OpenBrace => Ok(Some(Value::Object(parse_object(context)?))),
292 Token::OpenBracket => Ok(Some(Value::Array(parse_array(context)?))),
293 Token::String(value) => Ok(Some(Value::StringLit(create_string_lit(context, value)))),
294 Token::Boolean(value) => Ok(Some(Value::BooleanLit(create_boolean_lit(context, value)))),
295 Token::Number(value) => Ok(Some(Value::NumberLit(create_number_lit(context, value)))),
296 Token::Null => Ok(Some(Value::NullKeyword(create_null_keyword(context)))),
297 Token::CloseBracket => Err(context.create_error(ParseErrorKind::UnexpectedCloseBracket)),
298 Token::CloseBrace => Err(context.create_error(ParseErrorKind::UnexpectedCloseBrace)),
299 Token::Comma => Err(context.create_error(ParseErrorKind::UnexpectedComma)),
300 Token::Colon => Err(context.create_error(ParseErrorKind::UnexpectedColon)),
301 Token::Word(_) => Err(context.create_error(ParseErrorKind::UnexpectedWord)),
302 Token::CommentLine(_) => unreachable!(),
303 Token::CommentBlock(_) => unreachable!(),
304 },
305 }
306}
307
308fn parse_object<'a>(context: &mut Context<'a>) -> Result<Object<'a>, ParseError> {
309 debug_assert!(context.token() == Some(Token::OpenBrace));
310 let mut properties = Vec::new();
311
312 context.start_range();
313 context.scan()?;
314
315 loop {
316 match context.token() {
317 Some(Token::CloseBrace) => break,
318 Some(Token::String(prop_name)) => {
319 properties.push(parse_object_property(context, PropName::String(prop_name))?);
320 }
321 Some(Token::Word(prop_name)) | Some(Token::Number(prop_name)) => {
322 properties.push(parse_object_property(context, PropName::Word(prop_name))?);
323 }
324 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedObject)),
325 _ => return Err(context.create_error(ParseErrorKind::UnexpectedTokenInObject)),
326 }
327
328 let after_value_end = context.last_token_end;
330 match context.scan()? {
331 Some(Token::Comma) => {
332 let comma_range = context.create_range_from_last_token();
333 if let Some(Token::CloseBrace) = context.scan()?
334 && !context.allow_trailing_commas
335 {
336 return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
337 }
338 }
339 Some(Token::String(_) | Token::Word(_) | Token::Number(_)) if !context.allow_missing_commas => {
340 let range = Range {
341 start: after_value_end,
342 end: after_value_end,
343 };
344 return Err(context.create_error_for_range(range, ParseErrorKind::ExpectedComma));
345 }
346 _ => {}
347 }
348 }
349
350 Ok(Object {
351 range: context.end_range(),
352 properties,
353 })
354}
355
356enum PropName<'a> {
357 String(Cow<'a, str>),
358 Word(&'a str),
359}
360
361fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>) -> Result<ObjectProp<'a>, ParseError> {
362 context.start_range();
363
364 let name = match prop_name {
365 PropName::String(prop_name) => ObjectPropName::String(create_string_lit(context, prop_name)),
366 PropName::Word(prop_name) => {
367 if context.allow_loose_object_property_names {
368 ObjectPropName::Word(create_word(context, prop_name))
369 } else {
370 return Err(context.create_error(ParseErrorKind::ExpectedStringObjectProperty));
371 }
372 }
373 };
374
375 match context.scan()? {
376 Some(Token::Colon) => {}
377 _ => return Err(context.create_error(ParseErrorKind::ExpectedColonAfterObjectKey)),
378 }
379
380 context.scan()?;
381 let value = parse_value(context)?;
382
383 match value {
384 Some(value) => Ok(ObjectProp {
385 range: context.end_range(),
386 name,
387 value,
388 }),
389 None => Err(context.create_error(ParseErrorKind::ExpectedObjectValue)),
390 }
391}
392
393fn parse_array<'a>(context: &mut Context<'a>) -> Result<Array<'a>, ParseError> {
394 debug_assert!(context.token() == Some(Token::OpenBracket));
395 let mut elements = Vec::new();
396
397 context.start_range();
398 context.scan()?;
399
400 loop {
401 match context.token() {
402 Some(Token::CloseBracket) => break,
403 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
404 _ => match parse_value(context)? {
405 Some(value) => elements.push(value),
406 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
407 },
408 }
409
410 if let Some(Token::Comma) = context.scan()? {
412 let comma_range = context.create_range_from_last_token();
413 if let Some(Token::CloseBracket) = context.scan()?
414 && !context.allow_trailing_commas
415 {
416 return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
417 }
418 }
419 }
420
421 Ok(Array {
422 range: context.end_range(),
423 elements,
424 })
425}
426
427fn create_string_lit<'a>(context: &Context<'a>, value: Cow<'a, str>) -> StringLit<'a> {
430 StringLit {
431 range: context.create_range_from_last_token(),
432 value,
433 }
434}
435
436fn create_word<'a>(context: &Context<'a>, value: &'a str) -> WordLit<'a> {
437 WordLit {
438 range: context.create_range_from_last_token(),
439 value,
440 }
441}
442
443fn create_boolean_lit(context: &Context, value: bool) -> BooleanLit {
444 BooleanLit {
445 range: context.create_range_from_last_token(),
446 value,
447 }
448}
449
450fn create_number_lit<'a>(context: &Context<'a>, value: &'a str) -> NumberLit<'a> {
451 NumberLit {
452 range: context.create_range_from_last_token(),
453 value,
454 }
455}
456
457fn create_null_keyword(context: &Context) -> NullKeyword {
458 NullKeyword {
459 range: context.create_range_from_last_token(),
460 }
461}
462
463#[cfg(test)]
464mod tests {
465 use super::*;
466 use pretty_assertions::assert_eq;
467
468 #[test]
469 fn it_should_error_when_has_multiple_values() {
470 assert_has_error(
471 "[][]",
472 "Text cannot contain more than one JSON value on line 1 column 3",
473 );
474 }
475
476 #[test]
477 fn it_should_error_when_object_is_not_terminated() {
478 assert_has_error("{", "Unterminated object on line 1 column 1");
479 }
480
481 #[test]
482 fn it_should_error_when_object_has_unexpected_token() {
483 assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3");
484 }
485
486 #[test]
487 fn it_should_error_when_object_has_two_non_string_tokens() {
488 assert_has_error(
489 "{ asdf asdf: 5 }",
490 "Expected colon after the string or word in object property on line 1 column 8",
491 );
492 }
493
494 #[test]
495 fn it_should_error_when_array_is_not_terminated() {
496 assert_has_error("[", "Unterminated array on line 1 column 1");
497 }
498
499 #[test]
500 fn it_should_error_when_array_has_unexpected_token() {
501 assert_has_error("[:]", "Unexpected colon on line 1 column 2");
502 }
503
504 #[test]
505 fn it_should_error_when_comment_block_not_closed() {
506 assert_has_error("/* test", "Unterminated comment block on line 1 column 1");
507 }
508
509 #[test]
510 fn it_should_error_when_string_lit_not_closed() {
511 assert_has_error("\" test", "Unterminated string literal on line 1 column 1");
512 }
513
514 fn assert_has_error(text: &str, message: &str) {
515 let result = parse_to_ast(text, &Default::default(), &Default::default());
516 match result {
517 Ok(_) => panic!("Expected error, but did not find one."),
518 Err(err) => assert_eq!(err.to_string(), message),
519 }
520 }
521
522 #[test]
523 fn strict_should_error_object_trailing_comma() {
524 assert_has_strict_error(
525 r#"{ "test": 5, }"#,
526 "Trailing commas are not allowed on line 1 column 12",
527 );
528 }
529
530 #[test]
531 fn strict_should_error_array_trailing_comma() {
532 assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9");
533 }
534
535 #[test]
536 fn strict_should_error_comment_line() {
537 assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12");
538 }
539
540 #[test]
541 fn strict_should_error_comment_block() {
542 assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10");
543 }
544
545 #[test]
546 fn strict_should_error_word_property() {
547 assert_has_strict_error(
548 r#"{ word: 5 }"#,
549 "Expected string for object property on line 1 column 3",
550 );
551 }
552
553 #[test]
554 fn strict_should_error_single_quoted_string() {
555 assert_has_strict_error(
556 r#"{ "key": 'value' }"#,
557 "Single-quoted strings are not allowed on line 1 column 10",
558 );
559 }
560
561 #[test]
562 fn strict_should_error_hexadecimal_number() {
563 assert_has_strict_error(
564 r#"{ "key": 0xFF }"#,
565 "Hexadecimal numbers are not allowed on line 1 column 10",
566 );
567 }
568
569 #[test]
570 fn strict_should_error_unary_plus_number() {
571 assert_has_strict_error(
572 r#"{ "key": +42 }"#,
573 "Unary plus on numbers is not allowed on line 1 column 10",
574 );
575 }
576
577 #[track_caller]
578 fn assert_has_strict_error(text: &str, message: &str) {
579 let result = parse_to_ast(
580 text,
581 &Default::default(),
582 &ParseOptions {
583 allow_comments: false,
584 allow_loose_object_property_names: false,
585 allow_trailing_commas: false,
586 allow_missing_commas: false,
587 allow_single_quoted_strings: false,
588 allow_hexadecimal_numbers: false,
589 allow_unary_plus_numbers: false,
590 },
591 );
592 match result {
593 Ok(_) => panic!("Expected error, but did not find one."),
594 Err(err) => assert_eq!(err.to_string(), message),
595 }
596 }
597
598 #[test]
599 fn it_should_not_include_tokens_by_default() {
600 let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
601 assert!(result.tokens.is_none());
602 }
603
604 #[test]
605 fn it_should_include_tokens_when_specified() {
606 let result = parse_to_ast(
607 "{}",
608 &CollectOptions {
609 tokens: true,
610 ..Default::default()
611 },
612 &Default::default(),
613 )
614 .unwrap();
615 let tokens = result.tokens.unwrap();
616 assert_eq!(tokens.len(), 2);
617 }
618
619 #[test]
620 fn it_should_not_include_comments_by_default() {
621 let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
622 assert!(result.comments.is_none());
623 }
624
625 #[test]
626 fn it_should_include_comments_when_specified() {
627 let result = parse_to_ast(
628 "{} // 2",
629 &CollectOptions {
630 comments: CommentCollectionStrategy::Separate,
631 ..Default::default()
632 },
633 &Default::default(),
634 )
635 .unwrap();
636 let comments = result.comments.unwrap();
637 assert_eq!(comments.len(), 2); }
639
640 #[cfg(not(feature = "error_unicode_width"))]
641 #[test]
642 fn error_correct_line_column_unicode_width() {
643 assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 9");
644 }
645
646 #[cfg(feature = "error_unicode_width")]
647 #[test]
648 fn error_correct_line_column_unicode_width() {
649 assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 10");
650 }
651
652 #[test]
653 fn it_should_parse_unquoted_keys_with_hex_and_trailing_comma() {
654 let text = r#"{
655 CP_CanFuncReqId: 0x7DF, // 2015
656 }"#;
657 {
658 let parse_result = parse_to_ast(text, &Default::default(), &Default::default()).unwrap();
659
660 let value = parse_result.value.unwrap();
661 let obj = value.as_object().unwrap();
662 assert_eq!(obj.properties.len(), 1);
663 assert_eq!(obj.properties[0].name.as_str(), "CP_CanFuncReqId");
664
665 let number_value = obj.properties[0].value.as_number_lit().unwrap();
666 assert_eq!(number_value.value, "0x7DF");
667 }
668 #[cfg(feature = "serde")]
669 {
670 let value = crate::parse_to_serde_value(text, &Default::default()).unwrap().unwrap();
671 assert_eq!(
673 value,
674 serde_json::json!({
675 "CP_CanFuncReqId": 2015
676 })
677 );
678 }
679 }
680
681 #[test]
682 fn it_should_parse_unary_plus_numbers() {
683 let result = parse_to_ast(r#"{ "test": +42 }"#, &Default::default(), &Default::default()).unwrap();
684
685 let value = result.value.unwrap();
686 let obj = value.as_object().unwrap();
687 assert_eq!(obj.properties.len(), 1);
688 assert_eq!(obj.properties[0].name.as_str(), "test");
689
690 let number_value = obj.properties[0].value.as_number_lit().unwrap();
691 assert_eq!(number_value.value, "+42");
692 }
693
694 #[test]
695 fn missing_comma_between_properties() {
696 let text = r#"{
697 "name": "alice"
698 "age": 25
699}"#;
700 let result = parse_to_ast(text, &Default::default(), &Default::default()).unwrap();
701 assert_eq!(
702 result
703 .value
704 .unwrap()
705 .as_object()
706 .unwrap()
707 .get_number("age")
708 .unwrap()
709 .value,
710 "25"
711 );
712
713 assert_has_strict_error(text, "Expected comma on line 2 column 18");
715 }
716
717 #[test]
718 fn missing_comma_with_comment_between_properties() {
719 let result = parse_to_ast(
722 r#"{
723 "name": "alice" // comment here
724 "age": 25
725}"#,
726 &Default::default(),
727 &ParseOptions {
728 allow_comments: true,
729 allow_missing_commas: false,
730 ..Default::default()
731 },
732 );
733 match result {
734 Ok(_) => panic!("Expected error, but did not find one."),
735 Err(err) => assert_eq!(err.to_string(), "Expected comma on line 2 column 18"),
736 }
737 }
738}