1use std::borrow::Cow;
2use std::collections::HashMap;
3use std::rc::Rc;
4
5use super::ast::*;
6use super::common::Range;
7use super::errors::*;
8use super::scanner::Scanner;
9use super::scanner::ScannerOptions;
10use super::tokens::Token;
11use super::tokens::TokenAndRange;
12
13pub type CommentMap<'a> = HashMap<usize, Rc<Vec<Comment<'a>>>>;
17
18#[derive(Default, Debug, PartialEq, Clone)]
23pub enum CommentCollectionStrategy {
24 #[default]
26 Off,
27 Separate,
33 AsTokens,
38}
39
40#[derive(Default, Clone)]
42pub struct CollectOptions {
43 pub comments: CommentCollectionStrategy,
45 pub tokens: bool,
47}
48
49#[derive(Clone)]
51pub struct ParseOptions {
52 pub allow_comments: bool,
54 pub allow_loose_object_property_names: bool,
56 pub allow_trailing_commas: bool,
58 pub allow_single_quoted_strings: bool,
60 pub allow_hexadecimal_numbers: bool,
62 pub allow_unary_plus_numbers: bool,
64}
65
66impl Default for ParseOptions {
67 fn default() -> Self {
68 Self {
69 allow_comments: true,
70 allow_loose_object_property_names: true,
71 allow_trailing_commas: true,
72 allow_single_quoted_strings: true,
73 allow_hexadecimal_numbers: true,
74 allow_unary_plus_numbers: true,
75 }
76 }
77}
78
79pub struct ParseResult<'a> {
81 pub comments: Option<CommentMap<'a>>,
87 pub value: Option<Value<'a>>,
89 pub tokens: Option<Vec<TokenAndRange<'a>>>,
93}
94
95struct Context<'a> {
96 scanner: Scanner<'a>,
97 comments: Option<CommentMap<'a>>,
98 current_comments: Option<Vec<Comment<'a>>>,
99 last_token_end: usize,
100 range_stack: Vec<Range>,
101 tokens: Option<Vec<TokenAndRange<'a>>>,
102 collect_comments_as_tokens: bool,
103 allow_comments: bool,
104 allow_trailing_commas: bool,
105 allow_loose_object_property_names: bool,
106}
107
108impl<'a> Context<'a> {
109 pub fn scan(&mut self) -> Result<Option<Token<'a>>, ParseError> {
110 let previous_end = self.last_token_end;
111 let token = self.scan_handling_comments()?;
112 self.last_token_end = self.scanner.token_end();
113
114 if let Some(comments) = self.comments.as_mut()
116 && let Some(current_comments) = self.current_comments.take()
117 {
118 let current_comments = Rc::new(current_comments);
119 comments.insert(previous_end, current_comments.clone());
120 comments.insert(self.scanner.token_start(), current_comments);
121 }
122
123 if let Some(token) = &token
124 && self.tokens.is_some()
125 {
126 self.capture_token(token.clone());
127 }
128
129 Ok(token)
130 }
131
132 pub fn token(&self) -> Option<Token<'a>> {
133 self.scanner.token()
134 }
135
136 pub fn start_range(&mut self) {
137 self.range_stack.push(Range {
138 start: self.scanner.token_start(),
139 end: 0,
140 });
141 }
142
143 pub fn end_range(&mut self) -> Range {
144 let mut range = self
145 .range_stack
146 .pop()
147 .expect("Range was popped from the stack, but the stack was empty.");
148 range.end = self.scanner.token_end();
149 range
150 }
151
152 pub fn create_range_from_last_token(&self) -> Range {
153 Range {
154 start: self.scanner.token_start(),
155 end: self.scanner.token_end(),
156 }
157 }
158
159 pub fn create_error(&self, kind: ParseErrorKind) -> ParseError {
160 self.scanner.create_error_for_current_token(kind)
161 }
162
163 pub fn create_error_for_current_range(&mut self, kind: ParseErrorKind) -> ParseError {
164 let range = self.end_range();
165 self.create_error_for_range(range, kind)
166 }
167
168 pub fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
169 self.scanner.create_error_for_range(range, kind)
170 }
171
172 fn scan_handling_comments(&mut self) -> Result<Option<Token<'a>>, ParseError> {
173 loop {
174 let token = self.scanner.scan()?;
175 match token {
176 Some(token @ Token::CommentLine(_) | token @ Token::CommentBlock(_)) if self.collect_comments_as_tokens => {
177 self.capture_token(token);
178 }
179 Some(Token::CommentLine(text)) => {
180 self.handle_comment(Comment::Line(CommentLine {
181 range: self.create_range_from_last_token(),
182 text,
183 }))?;
184 }
185 Some(Token::CommentBlock(text)) => {
186 self.handle_comment(Comment::Block(CommentBlock {
187 range: self.create_range_from_last_token(),
188 text,
189 }))?;
190 }
191 _ => return Ok(token),
192 }
193 }
194 }
195
196 fn capture_token(&mut self, token: Token<'a>) {
197 let range = self.create_range_from_last_token();
198 if let Some(tokens) = self.tokens.as_mut() {
199 tokens.push(TokenAndRange {
200 token: token.clone(),
201 range,
202 });
203 }
204 }
205
206 fn handle_comment(&mut self, comment: Comment<'a>) -> Result<(), ParseError> {
207 if !self.allow_comments {
208 return Err(self.create_error(ParseErrorKind::CommentsNotAllowed));
209 }
210
211 if self.comments.is_some() {
212 if let Some(comments) = self.current_comments.as_mut() {
213 comments.push(comment);
214 } else {
215 self.current_comments = Some(vec![comment]);
216 }
217 }
218
219 Ok(())
220 }
221}
222
223pub fn parse_to_ast<'a>(
240 text: &'a str,
241 collect_options: &CollectOptions,
242 parse_options: &ParseOptions,
243) -> Result<ParseResult<'a>, ParseError> {
244 let mut context = Context {
245 scanner: Scanner::new(
246 text,
247 &ScannerOptions {
248 allow_single_quoted_strings: parse_options.allow_single_quoted_strings,
249 allow_hexadecimal_numbers: parse_options.allow_hexadecimal_numbers,
250 allow_unary_plus_numbers: parse_options.allow_unary_plus_numbers,
251 },
252 ),
253 comments: match collect_options.comments {
254 CommentCollectionStrategy::Separate => Some(Default::default()),
255 CommentCollectionStrategy::Off | CommentCollectionStrategy::AsTokens => None,
256 },
257 current_comments: None,
258 last_token_end: 0,
259 range_stack: Vec::new(),
260 tokens: if collect_options.tokens { Some(Vec::new()) } else { None },
261 collect_comments_as_tokens: collect_options.comments == CommentCollectionStrategy::AsTokens,
262 allow_comments: parse_options.allow_comments,
263 allow_trailing_commas: parse_options.allow_trailing_commas,
264 allow_loose_object_property_names: parse_options.allow_loose_object_property_names,
265 };
266 context.scan()?;
267 let value = parse_value(&mut context)?;
268
269 if context.scan()?.is_some() {
270 return Err(context.create_error(ParseErrorKind::MultipleRootJsonValues));
271 }
272
273 debug_assert!(context.range_stack.is_empty());
274
275 Ok(ParseResult {
276 comments: context.comments,
277 tokens: context.tokens,
278 value,
279 })
280}
281
282fn parse_value<'a>(context: &mut Context<'a>) -> Result<Option<Value<'a>>, ParseError> {
283 match context.token() {
284 None => Ok(None),
285 Some(token) => match token {
286 Token::OpenBrace => Ok(Some(Value::Object(parse_object(context)?))),
287 Token::OpenBracket => Ok(Some(Value::Array(parse_array(context)?))),
288 Token::String(value) => Ok(Some(Value::StringLit(create_string_lit(context, value)))),
289 Token::Boolean(value) => Ok(Some(Value::BooleanLit(create_boolean_lit(context, value)))),
290 Token::Number(value) => Ok(Some(Value::NumberLit(create_number_lit(context, value)))),
291 Token::Null => Ok(Some(Value::NullKeyword(create_null_keyword(context)))),
292 Token::CloseBracket => Err(context.create_error(ParseErrorKind::UnexpectedCloseBracket)),
293 Token::CloseBrace => Err(context.create_error(ParseErrorKind::UnexpectedCloseBrace)),
294 Token::Comma => Err(context.create_error(ParseErrorKind::UnexpectedComma)),
295 Token::Colon => Err(context.create_error(ParseErrorKind::UnexpectedColon)),
296 Token::Word(_) => Err(context.create_error(ParseErrorKind::UnexpectedWord)),
297 Token::CommentLine(_) => unreachable!(),
298 Token::CommentBlock(_) => unreachable!(),
299 },
300 }
301}
302
303fn parse_object<'a>(context: &mut Context<'a>) -> Result<Object<'a>, ParseError> {
304 debug_assert!(context.token() == Some(Token::OpenBrace));
305 let mut properties = Vec::new();
306
307 context.start_range();
308 context.scan()?;
309
310 loop {
311 match context.token() {
312 Some(Token::CloseBrace) => break,
313 Some(Token::String(prop_name)) => {
314 properties.push(parse_object_property(context, PropName::String(prop_name))?);
315 }
316 Some(Token::Word(prop_name)) | Some(Token::Number(prop_name)) => {
317 properties.push(parse_object_property(context, PropName::Word(prop_name))?);
318 }
319 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedObject)),
320 _ => return Err(context.create_error(ParseErrorKind::UnexpectedTokenInObject)),
321 }
322
323 if let Some(Token::Comma) = context.scan()? {
325 let comma_range = context.create_range_from_last_token();
326 if let Some(Token::CloseBrace) = context.scan()?
327 && !context.allow_trailing_commas
328 {
329 return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
330 }
331 }
332 }
333
334 Ok(Object {
335 range: context.end_range(),
336 properties,
337 })
338}
339
340enum PropName<'a> {
341 String(Cow<'a, str>),
342 Word(&'a str),
343}
344
345fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>) -> Result<ObjectProp<'a>, ParseError> {
346 context.start_range();
347
348 let name = match prop_name {
349 PropName::String(prop_name) => ObjectPropName::String(create_string_lit(context, prop_name)),
350 PropName::Word(prop_name) => {
351 if context.allow_loose_object_property_names {
352 ObjectPropName::Word(create_word(context, prop_name))
353 } else {
354 return Err(context.create_error(ParseErrorKind::ExpectedStringObjectProperty));
355 }
356 }
357 };
358
359 match context.scan()? {
360 Some(Token::Colon) => {}
361 _ => return Err(context.create_error(ParseErrorKind::ExpectedColonAfterObjectKey)),
362 }
363
364 context.scan()?;
365 let value = parse_value(context)?;
366
367 match value {
368 Some(value) => Ok(ObjectProp {
369 range: context.end_range(),
370 name,
371 value,
372 }),
373 None => Err(context.create_error(ParseErrorKind::ExpectedObjectValue)),
374 }
375}
376
377fn parse_array<'a>(context: &mut Context<'a>) -> Result<Array<'a>, ParseError> {
378 debug_assert!(context.token() == Some(Token::OpenBracket));
379 let mut elements = Vec::new();
380
381 context.start_range();
382 context.scan()?;
383
384 loop {
385 match context.token() {
386 Some(Token::CloseBracket) => break,
387 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
388 _ => match parse_value(context)? {
389 Some(value) => elements.push(value),
390 None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
391 },
392 }
393
394 if let Some(Token::Comma) = context.scan()? {
396 let comma_range = context.create_range_from_last_token();
397 if let Some(Token::CloseBracket) = context.scan()?
398 && !context.allow_trailing_commas
399 {
400 return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
401 }
402 }
403 }
404
405 Ok(Array {
406 range: context.end_range(),
407 elements,
408 })
409}
410
411fn create_string_lit<'a>(context: &Context<'a>, value: Cow<'a, str>) -> StringLit<'a> {
414 StringLit {
415 range: context.create_range_from_last_token(),
416 value,
417 }
418}
419
420fn create_word<'a>(context: &Context<'a>, value: &'a str) -> WordLit<'a> {
421 WordLit {
422 range: context.create_range_from_last_token(),
423 value,
424 }
425}
426
427fn create_boolean_lit(context: &Context, value: bool) -> BooleanLit {
428 BooleanLit {
429 range: context.create_range_from_last_token(),
430 value,
431 }
432}
433
434fn create_number_lit<'a>(context: &Context<'a>, value: &'a str) -> NumberLit<'a> {
435 NumberLit {
436 range: context.create_range_from_last_token(),
437 value,
438 }
439}
440
441fn create_null_keyword(context: &Context) -> NullKeyword {
442 NullKeyword {
443 range: context.create_range_from_last_token(),
444 }
445}
446
447#[cfg(test)]
448mod tests {
449 use super::*;
450 use pretty_assertions::assert_eq;
451
452 #[test]
453 fn it_should_error_when_has_multiple_values() {
454 assert_has_error(
455 "[][]",
456 "Text cannot contain more than one JSON value on line 1 column 3",
457 );
458 }
459
460 #[test]
461 fn it_should_error_when_object_is_not_terminated() {
462 assert_has_error("{", "Unterminated object on line 1 column 1");
463 }
464
465 #[test]
466 fn it_should_error_when_object_has_unexpected_token() {
467 assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3");
468 }
469
470 #[test]
471 fn it_should_error_when_object_has_two_non_string_tokens() {
472 assert_has_error(
473 "{ asdf asdf: 5 }",
474 "Expected colon after the string or word in object property on line 1 column 8",
475 );
476 }
477
478 #[test]
479 fn it_should_error_when_array_is_not_terminated() {
480 assert_has_error("[", "Unterminated array on line 1 column 1");
481 }
482
483 #[test]
484 fn it_should_error_when_array_has_unexpected_token() {
485 assert_has_error("[:]", "Unexpected colon on line 1 column 2");
486 }
487
488 #[test]
489 fn it_should_error_when_comment_block_not_closed() {
490 assert_has_error("/* test", "Unterminated comment block on line 1 column 1");
491 }
492
493 #[test]
494 fn it_should_error_when_string_lit_not_closed() {
495 assert_has_error("\" test", "Unterminated string literal on line 1 column 1");
496 }
497
498 fn assert_has_error(text: &str, message: &str) {
499 let result = parse_to_ast(text, &Default::default(), &Default::default());
500 match result {
501 Ok(_) => panic!("Expected error, but did not find one."),
502 Err(err) => assert_eq!(err.to_string(), message),
503 }
504 }
505
506 #[test]
507 fn strict_should_error_object_trailing_comma() {
508 assert_has_strict_error(
509 r#"{ "test": 5, }"#,
510 "Trailing commas are not allowed on line 1 column 12",
511 );
512 }
513
514 #[test]
515 fn strict_should_error_array_trailing_comma() {
516 assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9");
517 }
518
519 #[test]
520 fn strict_should_error_comment_line() {
521 assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12");
522 }
523
524 #[test]
525 fn strict_should_error_comment_block() {
526 assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10");
527 }
528
529 #[test]
530 fn strict_should_error_word_property() {
531 assert_has_strict_error(
532 r#"{ word: 5 }"#,
533 "Expected string for object property on line 1 column 3",
534 );
535 }
536
537 #[test]
538 fn strict_should_error_single_quoted_string() {
539 assert_has_strict_error(
540 r#"{ "key": 'value' }"#,
541 "Single-quoted strings are not allowed on line 1 column 10",
542 );
543 }
544
545 #[test]
546 fn strict_should_error_hexadecimal_number() {
547 assert_has_strict_error(
548 r#"{ "key": 0xFF }"#,
549 "Hexadecimal numbers are not allowed on line 1 column 10",
550 );
551 }
552
553 #[test]
554 fn strict_should_error_unary_plus_number() {
555 assert_has_strict_error(
556 r#"{ "key": +42 }"#,
557 "Unary plus on numbers is not allowed on line 1 column 10",
558 );
559 }
560
561 #[track_caller]
562 fn assert_has_strict_error(text: &str, message: &str) {
563 let result = parse_to_ast(
564 text,
565 &Default::default(),
566 &ParseOptions {
567 allow_comments: false,
568 allow_loose_object_property_names: false,
569 allow_trailing_commas: false,
570 allow_single_quoted_strings: false,
571 allow_hexadecimal_numbers: false,
572 allow_unary_plus_numbers: false,
573 },
574 );
575 match result {
576 Ok(_) => panic!("Expected error, but did not find one."),
577 Err(err) => assert_eq!(err.to_string(), message),
578 }
579 }
580
581 #[test]
582 fn it_should_not_include_tokens_by_default() {
583 let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
584 assert!(result.tokens.is_none());
585 }
586
587 #[test]
588 fn it_should_include_tokens_when_specified() {
589 let result = parse_to_ast(
590 "{}",
591 &CollectOptions {
592 tokens: true,
593 ..Default::default()
594 },
595 &Default::default(),
596 )
597 .unwrap();
598 let tokens = result.tokens.unwrap();
599 assert_eq!(tokens.len(), 2);
600 }
601
602 #[test]
603 fn it_should_not_include_comments_by_default() {
604 let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
605 assert!(result.comments.is_none());
606 }
607
608 #[test]
609 fn it_should_include_comments_when_specified() {
610 let result = parse_to_ast(
611 "{} // 2",
612 &CollectOptions {
613 comments: CommentCollectionStrategy::Separate,
614 ..Default::default()
615 },
616 &Default::default(),
617 )
618 .unwrap();
619 let comments = result.comments.unwrap();
620 assert_eq!(comments.len(), 2); }
622
623 #[cfg(not(feature = "error_unicode_width"))]
624 #[test]
625 fn error_correct_line_column_unicode_width() {
626 assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 9");
627 }
628
629 #[cfg(feature = "error_unicode_width")]
630 #[test]
631 fn error_correct_line_column_unicode_width() {
632 assert_has_strict_error(r#"["🧑🦰", ["#, "Unterminated array on line 1 column 10");
633 }
634
635 #[test]
636 fn it_should_parse_unquoted_keys_with_hex_and_trailing_comma() {
637 let text = r#"{
638 CP_CanFuncReqId: 0x7DF, // 2015
639 }"#;
640 {
641 let parse_result = parse_to_ast(text, &Default::default(), &Default::default()).unwrap();
642
643 let value = parse_result.value.unwrap();
644 let obj = value.as_object().unwrap();
645 assert_eq!(obj.properties.len(), 1);
646 assert_eq!(obj.properties[0].name.as_str(), "CP_CanFuncReqId");
647
648 let number_value = obj.properties[0].value.as_number_lit().unwrap();
649 assert_eq!(number_value.value, "0x7DF");
650 }
651 #[cfg(feature = "serde")]
652 {
653 let value = crate::parse_to_serde_value(text, &Default::default()).unwrap().unwrap();
654 assert_eq!(
656 value,
657 serde_json::json!({
658 "CP_CanFuncReqId": 2015
659 })
660 );
661 }
662 }
663
664 #[test]
665 fn it_should_parse_unary_plus_numbers() {
666 let result = parse_to_ast(r#"{ "test": +42 }"#, &Default::default(), &Default::default()).unwrap();
667
668 let value = result.value.unwrap();
669 let obj = value.as_object().unwrap();
670 assert_eq!(obj.properties.len(), 1);
671 assert_eq!(obj.properties[0].name.as_str(), "test");
672
673 let number_value = obj.properties[0].value.as_number_lit().unwrap();
674 assert_eq!(number_value.value, "+42");
675 }
676}