1use decoded_char::DecodedChar;
2use locspan::{Meta, Span};
3use std::{fmt, io};
4
5mod array;
6mod boolean;
7mod null;
8mod number;
9mod object;
10mod string;
11mod value;
12
13use crate::CodeMap;
14
15#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
17pub struct Options {
18 pub accept_truncated_surrogate_pair: bool,
24
25 pub accept_invalid_codepoints: bool,
30}
31
32impl Options {
33 pub fn strict() -> Self {
37 Self {
38 accept_truncated_surrogate_pair: false,
39 accept_invalid_codepoints: false,
40 }
41 }
42
43 pub fn flexible() -> Self {
47 Self {
48 accept_truncated_surrogate_pair: true,
49 accept_invalid_codepoints: true,
50 }
51 }
52}
53
54impl Default for Options {
55 fn default() -> Self {
56 Self::strict()
57 }
58}
59
60pub trait Parse: Sized {
61 fn parse_slice(content: &[u8]) -> Result<(Self, CodeMap), Error> {
62 Self::parse_utf8(utf8_decode::Decoder::new(content.iter().copied()))
63 .map_err(Error::io_into_utf8)
64 }
65
66 fn parse_slice_with(content: &[u8], options: Options) -> Result<(Self, CodeMap), Error> {
67 Self::parse_utf8_with(utf8_decode::Decoder::new(content.iter().copied()), options)
68 .map_err(Error::io_into_utf8)
69 }
70
71 fn parse_str(content: &str) -> Result<(Self, CodeMap), Error> {
72 Self::parse_utf8(content.chars().map(Ok))
73 }
74
75 fn parse_str_with(content: &str, options: Options) -> Result<(Self, CodeMap), Error> {
76 Self::parse_utf8_with(content.chars().map(Ok), options)
77 }
78
79 fn parse_infallible_utf8<C>(chars: C) -> Result<(Self, CodeMap), Error>
80 where
81 C: Iterator<Item = char>,
82 {
83 Self::parse_infallible(chars.map(DecodedChar::from_utf8))
84 }
85
86 fn parse_utf8_infallible_with<C>(chars: C, options: Options) -> Result<(Self, CodeMap), Error>
87 where
88 C: Iterator<Item = char>,
89 {
90 Self::parse_infallible_with(chars.map(DecodedChar::from_utf8), options)
91 }
92
93 fn parse_utf8<C, E>(chars: C) -> Result<(Self, CodeMap), Error<E>>
94 where
95 C: Iterator<Item = Result<char, E>>,
96 {
97 Self::parse(chars.map(|c| c.map(DecodedChar::from_utf8)))
98 }
99
100 fn parse_utf8_with<C, E>(chars: C, options: Options) -> Result<(Self, CodeMap), Error<E>>
101 where
102 C: Iterator<Item = Result<char, E>>,
103 {
104 Self::parse_with(chars.map(|c| c.map(DecodedChar::from_utf8)), options)
105 }
106
107 fn parse_infallible<C>(chars: C) -> Result<(Self, CodeMap), Error>
108 where
109 C: Iterator<Item = DecodedChar>,
110 {
111 let mut parser = Parser::new(chars.map(Ok));
112 let value = Self::parse_in(&mut parser, Context::None)?.into_value();
113 Ok((value, parser.code_map))
114 }
115
116 fn parse_infallible_with<C>(chars: C, options: Options) -> Result<(Self, CodeMap), Error>
117 where
118 C: Iterator<Item = DecodedChar>,
119 {
120 let mut parser = Parser::new_with(chars.map(Ok), options);
121 let value = Self::parse_in(&mut parser, Context::None)?.into_value();
122 Ok((value, parser.code_map))
123 }
124
125 fn parse<C, E>(chars: C) -> Result<(Self, CodeMap), Error<E>>
126 where
127 C: Iterator<Item = Result<DecodedChar, E>>,
128 {
129 let mut parser = Parser::new(chars);
130 let value = Self::parse_in(&mut parser, Context::None)?.into_value();
131 Ok((value, parser.code_map))
132 }
133
134 fn parse_with<C, E>(chars: C, options: Options) -> Result<(Self, CodeMap), Error<E>>
135 where
136 C: Iterator<Item = Result<DecodedChar, E>>,
137 {
138 let mut parser = Parser::new_with(chars, options);
139 let value = Self::parse_in(&mut parser, Context::None)?.into_value();
140 Ok((value, parser.code_map))
141 }
142
143 fn parse_in<C, E>(
144 parser: &mut Parser<C, E>,
145 context: Context,
146 ) -> Result<Meta<Self, usize>, Error<E>>
147 where
148 C: Iterator<Item = Result<DecodedChar, E>>;
149}
150
151pub struct Parser<C: Iterator<Item = Result<DecodedChar, E>>, E> {
153 chars: C,
155
156 pending: Option<DecodedChar>,
158
159 position: usize,
161
162 options: Options,
164
165 code_map: CodeMap,
167}
168
169#[inline(always)]
171pub fn is_whitespace(c: char) -> bool {
172 matches!(c, ' ' | '\t' | '\r' | '\n')
173}
174
175impl<C: Iterator<Item = Result<DecodedChar, E>>, E> Parser<C, E> {
176 pub fn new(chars: C) -> Self {
177 Self {
178 chars,
179 pending: None,
180 position: 0,
181 options: Options::default(),
182 code_map: CodeMap::default(),
183 }
184 }
185
186 pub fn new_with(chars: C, options: Options) -> Self {
187 Self {
188 chars,
189 pending: None,
190 position: 0,
191 options,
192 code_map: CodeMap::default(),
193 }
194 }
195
196 fn begin_fragment(&mut self) -> usize {
197 self.code_map.reserve(self.position)
198 }
199
200 fn end_fragment(&mut self, i: usize) {
201 let entry_count = self.code_map.len();
202 let entry = self.code_map.get_mut(i).unwrap();
203 entry.span.set_end(self.position);
204 entry.volume = entry_count - i;
205 }
206
207 fn peek_char(&mut self) -> Result<Option<char>, Error<E>> {
208 match self.pending {
209 Some(c) => Ok(Some(c.chr())),
210 None => match self.chars.next() {
211 Some(Ok(c)) => {
212 self.pending = Some(c);
213 Ok(Some(c.chr()))
214 }
215 Some(Err(e)) => Err(Error::Stream(self.position, e)),
216 None => Ok(None),
217 },
218 }
219 }
220
221 fn next_char(&mut self) -> Result<(usize, Option<char>), Error<E>> {
222 let c = match self.pending.take() {
223 Some(c) => Some(c),
224 None => self
225 .chars
226 .next()
227 .transpose()
228 .map_err(|e| Error::Stream(self.position, e))?,
229 };
230
231 let p = self.position;
232 let c = c.map(|c| {
233 self.position += c.len();
234 c.chr()
235 });
236
237 Ok((p, c))
238 }
239
240 fn skip_whitespaces(&mut self) -> Result<(), Error<E>> {
241 while let Some(c) = self.peek_char()? {
242 if is_whitespace(c) {
243 self.next_char()?;
244 } else {
245 break;
246 }
247 }
248
249 Ok(())
250 }
251}
252
253#[derive(Debug)]
255pub enum Error<E = core::convert::Infallible> {
256 Stream(usize, E),
260
261 Unexpected(usize, Option<char>),
265
266 InvalidUnicodeCodePoint(Span, u32),
270
271 MissingLowSurrogate(Span, u16),
275
276 InvalidLowSurrogate(Span, u16, u32),
280
281 InvalidUtf8(usize),
283}
284
285impl<E> Error<E> {
286 #[inline(always)]
288 fn unexpected(position: usize, c: Option<char>) -> Self {
289 Self::Unexpected(position, c)
291 }
292
293 pub fn position(&self) -> usize {
294 match self {
295 Self::Stream(p, _) => *p,
296 Self::Unexpected(p, _) => *p,
297 Self::InvalidUnicodeCodePoint(span, _) => span.start(),
298 Self::MissingLowSurrogate(span, _) => span.start(),
299 Self::InvalidLowSurrogate(span, _, _) => span.start(),
300 Self::InvalidUtf8(p) => *p,
301 }
302 }
303
304 pub fn span(&self) -> Span {
305 match self {
306 Self::Stream(p, _) => Span::new(*p, *p),
307 Self::Unexpected(p, _) => Span::new(*p, *p),
308 Self::InvalidUnicodeCodePoint(span, _) => *span,
309 Self::MissingLowSurrogate(span, _) => *span,
310 Self::InvalidLowSurrogate(span, _, _) => *span,
311 Self::InvalidUtf8(p) => Span::new(*p, *p),
312 }
313 }
314}
315
316impl Error<io::Error> {
317 fn io_into_utf8(self) -> Error {
318 match self {
319 Self::Stream(p, _) => Error::InvalidUtf8(p),
320 Self::Unexpected(p, e) => Error::Unexpected(p, e),
321 Self::InvalidUnicodeCodePoint(s, e) => Error::InvalidUnicodeCodePoint(s, e),
322 Self::MissingLowSurrogate(s, e) => Error::MissingLowSurrogate(s, e),
323 Self::InvalidLowSurrogate(s, a, b) => Error::InvalidLowSurrogate(s, a, b),
324 Self::InvalidUtf8(p) => Error::InvalidUtf8(p),
325 }
326 }
327}
328
329impl<E: fmt::Display> fmt::Display for Error<E> {
330 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
331 match self {
332 Self::Stream(_, e) => e.fmt(f),
333 Self::Unexpected(_, Some(c)) => write!(f, "unexpected character `{}`", c),
334 Self::Unexpected(_, None) => write!(f, "unexpected end of file"),
335 Self::InvalidUnicodeCodePoint(_, c) => write!(f, "invalid Unicode code point {:x}", *c),
336 Self::MissingLowSurrogate(_, _) => write!(f, "missing low surrogate"),
337 Self::InvalidLowSurrogate(_, _, _) => write!(f, "invalid low surrogate"),
338 Self::InvalidUtf8(_) => write!(f, "invalid UTF-8"),
339 }
340 }
341}
342
343impl<E: 'static + std::error::Error> std::error::Error for Error<E> {
344 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
345 match self {
346 Self::Stream(_, e) => Some(e),
347 _ => None,
348 }
349 }
350}
351
352#[derive(Clone, Copy, PartialEq, Eq, Debug)]
356pub enum Context {
357 None,
358 Array,
359 ObjectKey,
360 ObjectValue,
361}
362
363impl Context {
364 pub fn follows(&self, c: char) -> bool {
366 match self {
367 Self::None => is_whitespace(c),
368 Self::Array => is_whitespace(c) || matches!(c, ',' | ']'),
369 Self::ObjectKey => is_whitespace(c) || matches!(c, ':'),
370 Self::ObjectValue => is_whitespace(c) || matches!(c, ',' | '}'),
371 }
372 }
373}