Skip to main content

jsonc_parser/
parser.rs

1use std::borrow::Cow;
2
3use crate::ParseOptions;
4use crate::common::Range;
5use crate::errors::*;
6use crate::scanner::Scanner;
7use crate::scanner::ScannerOptions;
8use crate::tokens::Token;
9
10pub(crate) enum ObjectKey<'a> {
11  String(Cow<'a, str>),
12  Word(&'a str),
13}
14
15impl<'a> ObjectKey<'a> {
16  pub fn into_string(self) -> String {
17    match self {
18      ObjectKey::String(s) => s.into_owned(),
19      ObjectKey::Word(s) => s.to_string(),
20    }
21  }
22}
23
24/// Shared JSONC parser infrastructure used by both `parse_to_value` and
25/// the serde deserializer. Handles scanning, comment skipping, depth
26/// tracking, and comma/separator logic.
27pub(crate) struct JsoncParser<'a> {
28  pub scanner: Scanner<'a>,
29  #[allow(dead_code)] // used by the serde feature
30  pub text: &'a str,
31  allow_comments: bool,
32  allow_trailing_commas: bool,
33  allow_missing_commas: bool,
34  allow_loose_object_property_names: bool,
35  depth: usize,
36  pending_token: Option<Token<'a>>,
37}
38
39impl<'a> JsoncParser<'a> {
40  pub fn new(text: &'a str, options: &ParseOptions) -> Self {
41    Self {
42      scanner: Scanner::new(
43        text,
44        &ScannerOptions {
45          allow_single_quoted_strings: options.allow_single_quoted_strings,
46          allow_hexadecimal_numbers: options.allow_hexadecimal_numbers,
47          allow_unary_plus_numbers: options.allow_unary_plus_numbers,
48        },
49      ),
50      text,
51      allow_comments: options.allow_comments,
52      allow_trailing_commas: options.allow_trailing_commas,
53      allow_missing_commas: options.allow_missing_commas,
54      allow_loose_object_property_names: options.allow_loose_object_property_names,
55      depth: 0,
56      pending_token: None,
57    }
58  }
59
60  /// Scans the next non-comment token. Returns a pending token if one
61  /// was put back via `put_back`.
62  pub fn scan(&mut self) -> Result<Option<Token<'a>>, ParseError> {
63    if let Some(token) = self.pending_token.take() {
64      return Ok(Some(token));
65    }
66    loop {
67      match self.scanner.scan()? {
68        Some(Token::CommentLine(_) | Token::CommentBlock(_)) => {
69          if !self.allow_comments {
70            return Err(
71              self
72                .scanner
73                .create_error_for_current_token(ParseErrorKind::CommentsNotAllowed),
74            );
75          }
76          continue;
77        }
78        token => return Ok(token),
79      }
80    }
81  }
82
83  /// Puts a token back so the next `scan()` returns it.
84  #[cfg(feature = "serde")]
85  pub fn put_back(&mut self, token: Token<'a>) {
86    debug_assert!(self.pending_token.is_none(), "put_back called with pending token");
87    self.pending_token = Some(token);
88  }
89
90  /// Increments depth and checks the nesting limit.
91  pub fn enter_container(&mut self) -> Result<(), ParseError> {
92    self.depth += 1;
93    if self.depth > 512 {
94      self.depth -= 1;
95      Err(
96        self
97          .scanner
98          .create_error_for_current_token(ParseErrorKind::NestingDepthExceeded),
99      )
100    } else {
101      Ok(())
102    }
103  }
104
105  /// Decrements depth.
106  pub fn exit_container(&mut self) {
107    self.depth -= 1;
108  }
109
110  /// Returns an error appropriate for an unexpected token.
111  pub fn unexpected_token_error(&self, token: &Token) -> ParseError {
112    let kind = match token {
113      Token::CloseBracket => ParseErrorKind::UnexpectedCloseBracket,
114      Token::CloseBrace => ParseErrorKind::UnexpectedCloseBrace,
115      Token::Comma => ParseErrorKind::UnexpectedComma,
116      Token::Colon => ParseErrorKind::UnexpectedColon,
117      Token::Word(_) => ParseErrorKind::UnexpectedWord,
118      _ => ParseErrorKind::UnexpectedToken,
119    };
120    self.scanner.create_error_for_current_token(kind)
121  }
122
123  /// Scans the next object entry (key or close brace), handling commas
124  /// between entries. Pass `first = true` for the first entry.
125  pub fn scan_object_entry(&mut self, first: bool) -> Result<Option<ObjectKey<'a>>, ParseError> {
126    if first {
127      return self.scan_object_key();
128    }
129
130    let after_value_end = self.scanner.token_end();
131    let token = self.scan()?;
132    match token {
133      Some(Token::Comma) => {
134        let comma_range = Range::new(self.scanner.token_start(), self.scanner.token_end());
135        let key = self.scan_object_key()?;
136        if key.is_none() && !self.allow_trailing_commas {
137          return Err(
138            self
139              .scanner
140              .create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed),
141          );
142        }
143        Ok(key)
144      }
145      Some(Token::CloseBrace) => Ok(None),
146      Some(Token::String(s)) if self.allow_missing_commas => Ok(Some(ObjectKey::String(s))),
147      Some(Token::Word(s) | Token::Number(s)) if self.allow_missing_commas => {
148        if !self.allow_loose_object_property_names {
149          return Err(
150            self
151              .scanner
152              .create_error_for_current_token(ParseErrorKind::ExpectedStringObjectProperty),
153          );
154        }
155        Ok(Some(ObjectKey::Word(s)))
156      }
157      Some(Token::String(_) | Token::Word(_) | Token::Number(_)) => {
158        let range = Range::new(after_value_end, after_value_end);
159        Err(
160          self
161            .scanner
162            .create_error_for_range(range, ParseErrorKind::ExpectedComma),
163        )
164      }
165      None => Err(
166        self
167          .scanner
168          .create_error_for_current_token(ParseErrorKind::UnterminatedObject),
169      ),
170      _ => Err(
171        self
172          .scanner
173          .create_error_for_current_token(ParseErrorKind::UnexpectedTokenInObject),
174      ),
175    }
176  }
177
178  /// Scans an object property colon separator.
179  pub fn scan_object_colon(&mut self) -> Result<(), ParseError> {
180    match self.scan()? {
181      Some(Token::Colon) => Ok(()),
182      _ => Err(
183        self
184          .scanner
185          .create_error_for_current_token(ParseErrorKind::ExpectedColonAfterObjectKey),
186      ),
187    }
188  }
189
190  /// After an array element, scans for the comma/close-bracket and
191  /// returns the next token.
192  pub fn scan_array_comma(&mut self) -> Result<Option<Token<'a>>, ParseError> {
193    let token = self.scan()?;
194    if matches!(&token, Some(Token::Comma)) {
195      let comma_range = Range::new(self.scanner.token_start(), self.scanner.token_end());
196      let next = self.scan()?;
197      if matches!(&next, Some(Token::CloseBracket)) && !self.allow_trailing_commas {
198        return Err(
199          self
200            .scanner
201            .create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed),
202        );
203      }
204      Ok(next)
205    } else {
206      Ok(token)
207    }
208  }
209
210  fn scan_object_key(&mut self) -> Result<Option<ObjectKey<'a>>, ParseError> {
211    match self.scan()? {
212      Some(Token::CloseBrace) => Ok(None),
213      Some(Token::String(s)) => Ok(Some(ObjectKey::String(s))),
214      Some(Token::Word(s) | Token::Number(s)) => {
215        if !self.allow_loose_object_property_names {
216          return Err(
217            self
218              .scanner
219              .create_error_for_current_token(ParseErrorKind::ExpectedStringObjectProperty),
220          );
221        }
222        Ok(Some(ObjectKey::Word(s)))
223      }
224      None => Err(
225        self
226          .scanner
227          .create_error_for_current_token(ParseErrorKind::UnterminatedObject),
228      ),
229      _ => Err(
230        self
231          .scanner
232          .create_error_for_current_token(ParseErrorKind::UnexpectedTokenInObject),
233      ),
234    }
235  }
236}