Skip to main content

ocpi_tariffs/json/
parser.rs

1//! A JSON parse built to keep track of where each element came from in the input.
2use std::borrow::{Borrow, Cow};
3use std::fmt;
4use std::iter::Peekable;
5use std::num::TryFromIntError;
6use std::str::Bytes;
7use std::sync::atomic::AtomicUsize;
8use std::sync::Arc;
9
10use json_tools::{Buffer, BufferType};
11use tracing::{debug, trace};
12
13use crate::{warning, Caveat};
14
15use super::{
16    decode::{self, unescape_str},
17    Element, Field, ObjectKind, PathNode, PathNodeRef, Value, ValueKind,
18};
19use super::{ElemId, Path};
20
21/// We peek at the next `Token` when asserting on trailing commas.
22type Lexer<'buf> = Peekable<json_tools::Lexer<Bytes<'buf>>>;
23
24/// Parse the JSON into a tree of [`Element`]s.
25pub(crate) fn parse(json: &str) -> Result<Element<'_>, Error> {
26    let parser = Parser::new(json);
27
28    // When just parsing the JSON into an `Element` we only care about the final event
29    // when the JSON has been completely transformed into a root element.
30    for event in parser {
31        if let Event::Complete(element) = event? {
32            return Ok(element);
33        }
34    }
35
36    Err(ErrorKind::UnexpectedEOF
37        .into_partial_error_without_token()
38        .with_root_path())
39}
40
41/// A parsing event emitted for each call to the `<Parser as Iterator>::next` function.
42#[derive(Debug)]
43pub(crate) enum Event<'buf> {
44    /// An [`Element`] has been opened and it's construction is in progerss.
45    Open {
46        kind: ObjectKind,
47        parent_path: PathNodeRef<'buf>,
48    },
49
50    /// An [`Element`] has been created and added to it's parent [`Element`].
51    ///
52    /// If the kind is `Array` or `Object` that means that this element is closed: it's construction is complete.
53    Element {
54        /// The kind of JSON value the [`Element`] is.
55        kind: ValueKind,
56        /// The path to the parent [`Element`].
57        parent_path: PathNodeRef<'buf>,
58    },
59
60    /// The parse has completed creating the tree of [`Element`]s.
61    Complete(Element<'buf>),
62}
63
64/// The context needed to parse a single chunk of JSON.
65pub(crate) struct Parser<'buf> {
66    /// Used to assign a unique [`ElemId`] to each [`Element`].
67    elem_count: AtomicUsize,
68
69    /// True if the `Parser` is complete.
70    ///
71    /// Any further calls to [`Parser::next`] will return `None`.
72    complete: bool,
73
74    /// The source JSON we're parsing.
75    json: &'buf str,
76
77    /// The JSON lexer.
78    lexer: Lexer<'buf>,
79
80    /// The pool with pre-allocated `Path`s.
81    path_pool: PathPool<'buf>,
82
83    /// The stack to track nested objects.
84    stack: Stack<'buf>,
85
86    /// The previous token seen.
87    token: Option<Token>,
88}
89
90/// Define our own `TokenType` so Clone can be defined on it.
91///
92/// This can be removed when `json_tools::TokenType` impl's `Clone`.
93#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
94pub enum TokenType {
95    /// `false`
96    BooleanFalse,
97    /// `true`
98    BooleanTrue,
99
100    /// `]`
101    BracketClose,
102    /// `[`
103    BracketOpen,
104
105    /// `:`
106    Colon,
107
108    /// `,`
109    Comma,
110
111    /// `}`
112    CurlyClose,
113    /// `{`
114    CurlyOpen,
115
116    /// The type of the token could not be identified.
117    /// Should be removed if this lexer is ever to be feature complete
118    Invalid,
119
120    /// `null`
121    Null,
122
123    /// A Number, like `1.1234` or `123` or `-0.0` or `-1` or `.0` or `.`
124    Number,
125
126    /// A json string , like `"foo"`
127    String,
128}
129
130impl TokenType {
131    fn as_str(self) -> &'static str {
132        match self {
133            TokenType::BooleanFalse => "false",
134            TokenType::BooleanTrue => "true",
135            TokenType::BracketClose => "]",
136            TokenType::BracketOpen => "[",
137            TokenType::Colon => ":",
138            TokenType::Comma => ",",
139            TokenType::CurlyClose => "}",
140            TokenType::CurlyOpen => "{",
141            TokenType::Invalid => "<invalid>",
142            TokenType::Null => "null",
143            TokenType::Number => "<number>",
144            TokenType::String => "<string>",
145        }
146    }
147}
148
149impl fmt::Display for TokenType {
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        f.write_str(self.as_str())
152    }
153}
154
155impl From<json_tools::TokenType> for TokenType {
156    fn from(value: json_tools::TokenType) -> Self {
157        match value {
158            json_tools::TokenType::BooleanFalse => TokenType::BooleanFalse,
159            json_tools::TokenType::BooleanTrue => TokenType::BooleanTrue,
160            json_tools::TokenType::BracketClose => TokenType::BracketClose,
161            json_tools::TokenType::BracketOpen => TokenType::BracketOpen,
162            json_tools::TokenType::CurlyClose => TokenType::CurlyClose,
163            json_tools::TokenType::CurlyOpen => TokenType::CurlyOpen,
164            json_tools::TokenType::Colon => TokenType::Colon,
165            json_tools::TokenType::Comma => TokenType::Comma,
166            json_tools::TokenType::Invalid => TokenType::Invalid,
167            json_tools::TokenType::Null => TokenType::Null,
168            json_tools::TokenType::Number => TokenType::Number,
169            json_tools::TokenType::String => TokenType::String,
170        }
171    }
172}
173
174/// A lexical token, identifying its kind and span.
175///
176/// We define our own `Token` as the `json_tools::Token` defines a `Buffer` that can be heap allocated
177/// or a `Span`. We only use the `Span` variant.
178///
179/// Our `Token` can also impl `Copy` and `Clone` as the size and semantics are acceptable.
180#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
181pub struct Token {
182    /// The exact type of the token
183    pub kind: TokenType,
184
185    /// The span allows to reference back into the source byte stream
186    /// to obtain the string making up the token.
187    /// Please note that for control characters, booleans and null (i.e
188    pub span: Span,
189}
190
191impl Token {
192    /// Return true is the token is a opening brace.
193    fn is_opening(&self) -> bool {
194        matches!(self.kind, TokenType::CurlyOpen | TokenType::BracketOpen)
195    }
196
197    /// Return true is the token is a closing brace.
198    fn is_closing(&self) -> bool {
199        matches!(self.kind, TokenType::CurlyClose | TokenType::BracketClose)
200    }
201
202    /// Return true is the token is a comma.
203    fn is_comma(&self) -> bool {
204        matches!(self.kind, TokenType::Comma)
205    }
206}
207
208impl fmt::Display for Token {
209    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
210        write!(
211            f,
212            "token: {}, ({},{})",
213            self.kind, self.span.start, self.span.end
214        )
215    }
216}
217
218impl TryFrom<json_tools::Token> for Token {
219    type Error = PartialError;
220
221    fn try_from(token: json_tools::Token) -> Result<Self, Self::Error> {
222        let json_tools::Token { kind, buf } = token;
223        let kind = kind.into();
224        let Buffer::Span(span) = &buf else {
225            return Err(InternalError::BufferType.into_partial_error(None));
226        };
227
228        let span = span
229            .try_into()
230            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
231
232        Ok(Self { kind, span })
233    }
234}
235
236impl TryFrom<&json_tools::Token> for Token {
237    type Error = PartialError;
238
239    fn try_from(token: &json_tools::Token) -> Result<Self, Self::Error> {
240        let json_tools::Token { kind, buf } = token;
241        let kind = kind.clone().into();
242        let Buffer::Span(span) = &buf else {
243            return Err(InternalError::BufferType.into_partial_error(None));
244        };
245
246        let span = span
247            .try_into()
248            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
249
250        Ok(Self { kind, span })
251    }
252}
253
254impl<'buf> Parser<'buf> {
255    pub fn new(json: &'buf str) -> Self {
256        let lexer = json_tools::Lexer::new(json.bytes(), BufferType::Span).peekable();
257
258        Self {
259            elem_count: AtomicUsize::new(0),
260            complete: false,
261            json,
262            lexer,
263            path_pool: PathPool::default(),
264            stack: Stack::new(),
265            token: None,
266        }
267    }
268
269    fn next_elem_id(&self) -> ElemId {
270        let id = self
271            .elem_count
272            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
273
274        ElemId(id)
275    }
276
277    fn expect_next(&mut self) -> Result<Token, PartialError> {
278        let Some(token) = self.lexer.next() else {
279            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
280        };
281
282        let token = token.try_into()?;
283
284        Ok(token)
285    }
286
287    /// Return an `Err` if the next token is not the expected.
288    fn expect_token(&mut self, token_type: TokenType) -> Result<(), PartialError> {
289        let Some(token) = self.lexer.next() else {
290            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
291        };
292
293        let token: Token = token.try_into()?;
294
295        if token.kind == token_type {
296            Ok(())
297        } else {
298            Err(unexpected_token(token))
299        }
300    }
301
302    fn next_event(&mut self) -> Result<Option<Event<'buf>>, Error> {
303        if self.complete {
304            return Ok(None);
305        }
306
307        let head = self.stack.pop_head();
308
309        match head {
310            None => {
311                let token = self.expect_next().with_root_path()?;
312
313                trace!(?token);
314                self.comma_checks(&token).with_root_path()?;
315
316                match token.kind {
317                    TokenType::CurlyOpen => {
318                        let parent_path = self.path_pool.root();
319                        self.stack.push_new_object(
320                            self.next_elem_id(),
321                            Arc::clone(&parent_path),
322                            &token,
323                        );
324                        Ok(Some(Event::Open {
325                            kind: ObjectKind::Object,
326                            parent_path,
327                        }))
328                    }
329                    TokenType::BracketOpen => {
330                        let parent_path = self.path_pool.root();
331                        self.stack.push_new_array(
332                            self.next_elem_id(),
333                            Arc::clone(&parent_path),
334                            &token,
335                        );
336                        Ok(Some(Event::Open {
337                            kind: ObjectKind::Array,
338                            parent_path,
339                        }))
340                    }
341                    TokenType::Number => {
342                        let value = Value::Number(token_str(self.json, &token).with_root_path()?);
343                        self.exit_with_value(token, value).with_root_path()
344                    }
345                    TokenType::Null => self.exit_with_value(token, Value::Null).with_root_path(),
346                    TokenType::String => {
347                        let value =
348                            Value::String(token_str_as_string(self.json, token).with_root_path()?);
349                        self.exit_with_value(token, value).with_root_path()
350                    }
351                    TokenType::BooleanTrue => {
352                        self.exit_with_value(token, Value::True).with_root_path()
353                    }
354                    TokenType::BooleanFalse => {
355                        self.exit_with_value(token, Value::False).with_root_path()
356                    }
357                    _ => Err(unexpected_token(token).with_root_path()),
358                }
359            }
360            Some(mut head) => {
361                let token = self.expect_next().with_head(&head)?;
362
363                trace!(?token, head = ?head.elem_type);
364                let token = if self.comma_checks(&token).with_head(&head)? {
365                    self.expect_next().with_head(&head)?
366                } else {
367                    token
368                };
369
370                let (value, token, path) = match head.elem_type {
371                    ObjectKind::Object => {
372                        let key = match token.kind {
373                            TokenType::String => {
374                                token_str_as_string(self.json, token).with_head(&head)?
375                            }
376                            TokenType::CurlyClose => {
377                                let event = self.close_element(head, &token)?;
378                                return Ok(event);
379                            }
380                            _ => return Err(unexpected_token(token).with_root_path()),
381                        };
382
383                        self.expect_token(TokenType::Colon).with_head(&head)?;
384                        let token = self.expect_next().with_head(&head)?;
385
386                        let value = match token.kind {
387                            TokenType::CurlyOpen => {
388                                let parent_path = head.parent_is_object(&mut self.path_pool, key);
389                                self.stack.push_head(head);
390                                self.stack.push_new_object(
391                                    self.next_elem_id(),
392                                    Arc::clone(&parent_path),
393                                    &token,
394                                );
395                                return Ok(Some(Event::Open {
396                                    kind: ObjectKind::Object,
397                                    parent_path,
398                                }));
399                            }
400                            TokenType::BracketOpen => {
401                                let parent_path = head.parent_is_object(&mut self.path_pool, key);
402                                self.stack.push_head(head);
403                                self.stack.push_new_array(
404                                    self.next_elem_id(),
405                                    Arc::clone(&parent_path),
406                                    &token,
407                                );
408                                return Ok(Some(Event::Open {
409                                    kind: ObjectKind::Array,
410                                    parent_path,
411                                }));
412                            }
413                            TokenType::CurlyClose => {
414                                let event = self.close_element(head, &token)?;
415                                return Ok(event);
416                            }
417                            TokenType::String => Value::String(
418                                token_str_as_string(self.json, token).with_head(&head)?,
419                            ),
420                            TokenType::Number => {
421                                Value::Number(token_str(self.json, &token).with_head(&head)?)
422                            }
423                            TokenType::Null => Value::Null,
424                            TokenType::BooleanTrue => Value::True,
425                            TokenType::BooleanFalse => Value::False,
426                            _ => return Err(unexpected_token(token).with_head(&head)),
427                        };
428
429                        (
430                            value,
431                            token,
432                            head.parent_is_object(&mut self.path_pool, key),
433                        )
434                    }
435                    ObjectKind::Array => {
436                        let value = match token.kind {
437                            TokenType::CurlyOpen => {
438                                let parent_path = head.parent_is_array(&mut self.path_pool);
439                                self.stack.push_head(head);
440                                self.stack.push_new_object(
441                                    self.next_elem_id(),
442                                    Arc::clone(&parent_path),
443                                    &token,
444                                );
445                                return Ok(Some(Event::Open {
446                                    kind: ObjectKind::Object,
447                                    parent_path,
448                                }));
449                            }
450                            TokenType::BracketOpen => {
451                                let parent_path = head.parent_is_array(&mut self.path_pool);
452                                self.stack.push_head(head);
453                                self.stack.push_new_array(
454                                    self.next_elem_id(),
455                                    Arc::clone(&parent_path),
456                                    &token,
457                                );
458                                return Ok(Some(Event::Open {
459                                    kind: ObjectKind::Array,
460                                    parent_path,
461                                }));
462                            }
463                            TokenType::BracketClose => {
464                                let event = self.close_element(head, &token)?;
465                                return Ok(event);
466                            }
467
468                            TokenType::String => Value::String(
469                                token_str_as_string(self.json, token).with_head(&head)?,
470                            ),
471                            TokenType::Number => {
472                                Value::Number(token_str(self.json, &token).with_head(&head)?)
473                            }
474                            TokenType::Null => Value::Null,
475                            TokenType::BooleanTrue => Value::True,
476                            TokenType::BooleanFalse => Value::False,
477                            _ => return Err(unexpected_token(token).with_head(&head)),
478                        };
479                        (value, token, head.parent_is_array(&mut self.path_pool))
480                    }
481                };
482
483                let event = Event::Element {
484                    kind: value.kind(),
485                    parent_path: Arc::clone(&path),
486                };
487                head.push_field(self.next_elem_id(), path, value, &token);
488
489                let peek_token = self.peek(&token).with_head(&head)?;
490
491                if !(peek_token.is_comma() || peek_token.is_closing()) {
492                    return Err(unexpected_token(peek_token).with_head(&head));
493                }
494
495                self.token.replace(token);
496                self.stack.push_head(head);
497
498                Ok(Some(event))
499            }
500        }
501    }
502
503    /// Close a [`PartialElement`] which creates an [`Element`] and returns an [`Event`]
504    fn close_element(
505        &mut self,
506        head: PartialElement<'buf>,
507        token: &Token,
508    ) -> Result<Option<Event<'buf>>, Error> {
509        let event = self.stack.head_into_element(head, token);
510
511        match event {
512            Pop::Element { kind, parent_path } => Ok(Some(Event::Element { kind, parent_path })),
513            Pop::Complete(element) => {
514                if let Some(token) = self.lexer.next() {
515                    let token = token.try_into().with_root_path()?;
516                    return Err(unexpected_token(token).with_root_path());
517                }
518
519                Ok(Some(Event::Complete(element)))
520            }
521        }
522    }
523
524    fn exit_with_value(
525        &mut self,
526        token: Token,
527        value: Value<'buf>,
528    ) -> Result<Option<Event<'buf>>, PartialError> {
529        self.complete = true;
530        let span = element_span(&token, 0);
531        let elem = Element::new(self.next_elem_id(), Arc::new(PathNode::Root), span, value);
532
533        if let Some(token) = self.lexer.next() {
534            let token = token.try_into()?;
535            return Err(unexpected_token(token));
536        }
537
538        Ok(Some(Event::Complete(elem)))
539    }
540
541    fn peek(&mut self, token: &Token) -> Result<Token, PartialError> {
542        let Some(peek_token) = self.lexer.peek() else {
543            return Err(ErrorKind::UnexpectedEOF.into_partial_error(Some(*token)));
544        };
545
546        let peek_token = peek_token.try_into()?;
547        Ok(peek_token)
548    }
549
550    /// Perform comma position checks
551    ///
552    /// Return `Err(unexpected)` if a trailing or rogue comma is found.
553    fn comma_checks(&mut self, token: &Token) -> Result<bool, PartialError> {
554        trace!(?token, "comma_checks");
555        let is_comma = token.is_comma();
556
557        if is_comma {
558            let peek_token = self.peek(token)?;
559
560            // A comma can only be followed by an opening brace or a value.
561            if peek_token.is_closing() {
562                return Err(unexpected_token(*token));
563            }
564
565            if peek_token.is_comma() {
566                return Err(unexpected_token(peek_token));
567            }
568        } else if token.is_opening() {
569            let peek_token = self.peek(token)?;
570
571            // An opening brace should not be followed by a comma.
572            if peek_token.is_comma() {
573                return Err(unexpected_token(peek_token));
574            }
575        }
576
577        Ok(is_comma)
578    }
579}
580
581/// Create an [`PartialError`] with [`ErrorKind::UnexpectedToken`].
582#[track_caller]
583fn unexpected_token(token: Token) -> PartialError {
584    ErrorKind::UnexpectedToken.into_partial_error(Some(token))
585}
586
587impl<'buf> Iterator for Parser<'buf> {
588    type Item = Result<Event<'buf>, Error>;
589
590    fn next(&mut self) -> Option<Self::Item> {
591        match self.next_event() {
592            Ok(event) => event.map(Ok),
593            Err(err) => {
594                self.complete = true;
595                Some(Err(err))
596            }
597        }
598    }
599}
600
601/// An partial `Element` that we desend into and parse it's child `Element`s.
602#[derive(Debug)]
603struct PartialElement<'buf> {
604    /// The Id of the [`Element`] to be created.
605    elem_id: ElemId,
606
607    /// The type of [`Element`].
608    elem_type: ObjectKind,
609
610    /// The child [`Element`]s.
611    ///
612    /// This is filled as we parse the current JSON [`Element`].
613    elements: Vec<Element<'buf>>,
614
615    /// The path up to the [`Element`].
616    path: PathNodeRef<'buf>,
617
618    /// The index of the [`Element`]'s first byte.
619    span_start: usize,
620}
621
622impl<'buf> PartialElement<'buf> {
623    fn parent_is_object(
624        &self,
625        path_pool: &mut PathPool<'buf>,
626        key: RawStr<'buf>,
627    ) -> PathNodeRef<'buf> {
628        path_pool.object(Arc::clone(&self.path), key)
629    }
630
631    fn parent_is_array(&self, path_pool: &mut PathPool<'buf>) -> PathNodeRef<'buf> {
632        path_pool.array(Arc::clone(&self.path), self.elements.len())
633    }
634
635    fn push_field(
636        &mut self,
637        elem_id: ElemId,
638        path: PathNodeRef<'buf>,
639        value: Value<'buf>,
640        token: &Token,
641    ) {
642        let span = element_span(token, token.span.start);
643        let elem = Element::new(elem_id, path, span, value);
644        self.elements.push(elem);
645    }
646
647    /// Resolve the `PartialElement` to an `Element`.
648    fn into_element(self, token: &Token) -> Element<'buf> {
649        let span = element_span(token, self.span_start);
650
651        let PartialElement {
652            elem_type,
653            span_start: _,
654            elements,
655            path,
656            elem_id,
657        } = self;
658
659        let value = match elem_type {
660            ObjectKind::Object => {
661                let fields = elements.into_iter().map(Field).collect();
662                Value::Object(fields)
663            }
664            ObjectKind::Array => Value::Array(elements),
665        };
666
667        Element::new(elem_id, path, span, value)
668    }
669}
670
671/// `Path`s are added and never removed.
672struct PathPool<'buf> {
673    index: usize,
674    items: Vec<PathNodeRef<'buf>>,
675}
676
677impl Default for PathPool<'_> {
678    fn default() -> Self {
679        Self::with_capacity(1000)
680    }
681}
682
683impl<'buf> PathPool<'buf> {
684    fn with_capacity(capacity: usize) -> Self {
685        let capacity = capacity.max(1);
686        let mut items = Vec::with_capacity(capacity);
687        items.resize_with(capacity, Default::default);
688
689        Self { index: 0, items }
690    }
691
692    #[expect(
693        clippy::indexing_slicing,
694        reason = "The root Path is added in the constructor and the capacity is always at least 1"
695    )]
696    fn root(&self) -> PathNodeRef<'buf> {
697        Arc::clone(&self.items[0])
698    }
699
700    /// Add a new `Path::Array` with the given index.
701    fn array(&mut self, parent: PathNodeRef<'buf>, index: usize) -> PathNodeRef<'buf> {
702        self.push(PathNode::Array { parent, index })
703    }
704
705    /// Add a new `Path::Object` with the given index.
706    fn object(&mut self, parent: PathNodeRef<'buf>, key: RawStr<'buf>) -> PathNodeRef<'buf> {
707        self.push(PathNode::Object { parent, key })
708    }
709
710    #[expect(clippy::indexing_slicing, reason = "Paths are only added")]
711    fn push(&mut self, new_path: PathNode<'buf>) -> PathNodeRef<'buf> {
712        const GROWTH_FACTOR: usize = 2;
713
714        let Self { index, items } = self;
715        let next_index = *index + 1;
716
717        if next_index >= items.len() {
718            items.reserve(items.len() * GROWTH_FACTOR);
719            items.resize_with(items.capacity(), Default::default);
720        }
721
722        let path = &mut items[next_index];
723        debug_assert_eq!(Arc::strong_count(path), 1, "Paths are only added");
724        let path = Arc::get_mut(path).expect("Paths are only added");
725        *path = new_path;
726
727        let path = Arc::clone(&items[next_index]);
728        *index = next_index;
729        path
730    }
731}
732
733/// The `Span` defines the range of bytes that delimits a JSON `Element`.
734#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
735pub struct Span {
736    /// Index of the first the byte
737    pub start: usize,
738
739    /// Index one past the last byte
740    pub end: usize,
741}
742
743impl TryFrom<&json_tools::Span> for Span {
744    type Error = TryFromIntError;
745
746    fn try_from(span: &json_tools::Span) -> Result<Self, Self::Error> {
747        let json_tools::Span { first, end } = span;
748        let start = usize::try_from(*first)?;
749        let end = usize::try_from(*end)?;
750        Ok(Span { start, end })
751    }
752}
753
754struct Stack<'buf>(Vec<PartialElement<'buf>>);
755
756enum Pop<'buf> {
757    /// An [`Element`] has been created and added to it's parent [`Element`].
758    Element {
759        kind: ValueKind,
760        parent_path: PathNodeRef<'buf>,
761    },
762
763    /// The parse has completed creating the tree of [`Element`]s.
764    Complete(Element<'buf>),
765}
766
767impl<'buf> Stack<'buf> {
768    fn new() -> Self {
769        Self(vec![])
770    }
771
772    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
773    /// stack `Vec` contains it.
774    fn pop_head(&mut self) -> Option<PartialElement<'buf>> {
775        self.0.pop()
776    }
777
778    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
779    /// stack `Vec` contains it.
780    fn push_head(&mut self, head: PartialElement<'buf>) {
781        self.0.push(head);
782    }
783
784    /// Convert the head `PartialElement` into an `Element` using the parent to form the path.
785    fn head_into_element(&mut self, head: PartialElement<'buf>, token: &Token) -> Pop<'buf> {
786        let elem = head.into_element(token);
787
788        if let Some(parent) = self.0.last_mut() {
789            let event = Pop::Element {
790                kind: elem.value.kind(),
791                parent_path: elem.path_node(),
792            };
793            parent.elements.push(elem);
794            event
795        } else {
796            Pop::Complete(elem)
797        }
798    }
799
800    fn push_new_object(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
801        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Object);
802    }
803
804    fn push_new_array(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
805        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Array);
806    }
807
808    fn push_new_elem(
809        &mut self,
810        elem_id: ElemId,
811        parent_path: PathNodeRef<'buf>,
812        token: &Token,
813        elem_type: ObjectKind,
814    ) {
815        let partial = PartialElement {
816            elements: vec![],
817            elem_type,
818            path: parent_path,
819            span_start: token.span.start,
820            elem_id,
821        };
822        self.0.push(partial);
823    }
824}
825
826/// A parsing Error that keeps track of the token being parsed when the Error occurred and
827/// the slice of JSON surrounding the Error location.
828pub struct Error(Box<ErrorImpl>);
829
830impl warning::Kind for Error {
831    fn id(&self) -> Cow<'static, str> {
832        match self.0.kind {
833            ErrorKind::Internal(_) => "internal".into(),
834            ErrorKind::UnexpectedEOF => "unexpected_eof".into(),
835            ErrorKind::UnexpectedToken => "unexpected_token".to_string().into(),
836        }
837    }
838}
839
840impl fmt::Debug for Error {
841    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
842        fmt::Debug::fmt(&self.0, f)
843    }
844}
845
846impl From<ErrorImpl> for Error {
847    fn from(err: ErrorImpl) -> Self {
848        Self(err.into())
849    }
850}
851
852struct ErrorImpl {
853    /// The kind of error that occurred.
854    kind: ErrorKind,
855
856    /// The location the [`Error`] happened in the source code.
857    loc: &'static std::panic::Location<'static>,
858
859    /// The path to the [`Element`] the error occurred in.
860    path: Path,
861
862    /// The span of the JSON string the error occurred in.
863    span: Span,
864
865    /// The token being parsed at the time of the [`Error`].
866    token: Option<Token>,
867}
868
869impl fmt::Debug for ErrorImpl {
870    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
871        f.debug_struct("Error")
872            .field("kind", &self.kind)
873            .field("loc", &self.loc)
874            .field("path", &self.path)
875            .field("span", &self.span)
876            .field("token", &self.token)
877            .finish()
878    }
879}
880
881impl Error {
882    /// The kind of error that occurred.
883    pub fn kind(&self) -> &ErrorKind {
884        &self.0.kind
885    }
886
887    /// The path to the element the error occurred in.
888    pub fn path(&self) -> &Path {
889        &self.0.path
890    }
891
892    /// The span of the [`Element`] the error occurred in.
893    pub fn span(&self) -> Span {
894        self.0.span
895    }
896
897    pub fn token(&self) -> Option<&Token> {
898        self.0.token.as_ref()
899    }
900
901    /// Break the Error into it's constituent parts.
902    pub fn into_parts(self) -> (ErrorKind, Path, Span) {
903        let ErrorImpl {
904            kind,
905            loc: _,
906            path,
907            span,
908            token: _,
909        } = *self.0;
910        (kind, path, span)
911    }
912
913    /// Convert the Error into a more comprehensive report using the source JSON to provide
914    /// human readable context.
915    pub fn into_report(self, json: &str) -> ErrorReport<'_> {
916        ErrorReport::from_error(self, json)
917    }
918}
919
920/// A more comprehensive report on the [`Error`] using the source JSON `&str` to provide
921/// human readable context.
922#[derive(Debug)]
923pub struct ErrorReport<'buf> {
924    /// The [`Error`] that occurred.
925    error: Error,
926
927    /// The slice of JSON as defined by the `Error::span`.
928    json_context: &'buf str,
929
930    /// The slice of JSON as defined by the `Error::span` and expanded out to the
931    /// start and end of the line.
932    expanded_json_context: &'buf str,
933
934    /// The line and col indices of the start and end of the span.
935    span_bounds: SpanBounds,
936}
937
938impl<'buf> ErrorReport<'buf> {
939    /// Create the `ErrorReport` from the `Error` and source `&str`.
940    fn from_error(error: Error, json: &'buf str) -> Self {
941        let span = error.span();
942        debug!(?error, ?span, json, "from_error");
943        let json_context = &json.get(span.start..span.end).unwrap_or(json);
944
945        let start = {
946            let s = &json.get(0..span.start).unwrap_or_default();
947            line_col(s)
948        };
949        let end = {
950            let relative_end = line_col(json_context);
951            let line = start.line + relative_end.line;
952
953            if start.line == line {
954                LineCol {
955                    line,
956                    col: start.col + relative_end.col,
957                }
958            } else {
959                LineCol {
960                    line,
961                    col: relative_end.col,
962                }
963            }
964        };
965        let (prev, next) = find_expanded_newlines(json, span.start);
966        let expanded_json_context = &json.get(prev..next).unwrap_or(json_context);
967
968        let span_bounds = SpanBounds { start, end };
969
970        Self {
971            error,
972            json_context,
973            expanded_json_context,
974            span_bounds,
975        }
976    }
977
978    /// Return the slice of JSON as defined by the `Error::span`.
979    pub fn json_context(&self) -> &'buf str {
980        self.json_context
981    }
982
983    /// Return the slice of JSON as defined by the `Error::span` and expanded out to the
984    /// start and end of the line.
985    pub fn expand_json_context(&self) -> &'buf str {
986        self.expanded_json_context
987    }
988
989    /// Return the line and col number of each end of the span
990    pub fn span_bounds(&self) -> &SpanBounds {
991        &self.span_bounds
992    }
993
994    /// Discard the `Report` and take ownership of the `Error`.
995    pub fn into_error(self) -> Error {
996        self.error
997    }
998}
999
1000fn find_expanded_newlines(json: &str, byte_index: usize) -> (usize, usize) {
1001    let pre = json.get(..byte_index).unwrap_or(json);
1002    let post = json.get(byte_index..).unwrap_or(json);
1003
1004    let mut bytes = pre.as_bytes().iter().rev();
1005    let prev = pre.len() - bytes.position(|b| *b == b'\n').unwrap_or_default();
1006
1007    let mut bytes = post.as_bytes().iter();
1008    let next = bytes
1009        .position(|b| *b == b'\n')
1010        .map(|idx| idx + byte_index)
1011        .unwrap_or(prev + post.len());
1012
1013    (prev, next)
1014}
1015
1016/// The line and col indices of the start and end of the span.
1017#[derive(Clone, Debug)]
1018pub struct SpanBounds {
1019    /// The start of the `Span` expressed as line and column index.
1020    pub start: LineCol,
1021
1022    /// The end of the `Span` expressed as line and column index.
1023    pub end: LineCol,
1024}
1025
1026/// A file location expressed as line and column.
1027#[derive(Clone, Debug)]
1028pub struct LineCol {
1029    /// The line index is 0 based.
1030    pub line: u32,
1031
1032    /// The col index is 0 based.
1033    pub col: u32,
1034}
1035
1036impl From<(u32, u32)> for LineCol {
1037    fn from(value: (u32, u32)) -> Self {
1038        Self {
1039            line: value.0,
1040            col: value.1,
1041        }
1042    }
1043}
1044
1045impl From<LineCol> for (u32, u32) {
1046    fn from(value: LineCol) -> Self {
1047        (value.line, value.col)
1048    }
1049}
1050
1051impl PartialEq<(u32, u32)> for LineCol {
1052    fn eq(&self, other: &(u32, u32)) -> bool {
1053        self.line == other.0 && self.col == other.1
1054    }
1055}
1056
1057impl fmt::Display for LineCol {
1058    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1059        write!(f, "{}:{}", self.line, self.col)
1060    }
1061}
1062
1063/// Return the line and column indices of the end of the slice.
1064///
1065/// The line and column indices are zero based.
1066pub fn line_col(s: &str) -> LineCol {
1067    let mut chars = s.chars().rev();
1068    let mut line = 0;
1069    let mut col = 0;
1070
1071    // The col only needs to be calculated on the final line so we iterate from the last char
1072    // back to the start of the line and then only continue to count the lines after that.
1073    //
1074    // This is less work than continuously counting chars from the front of the slice.
1075    for c in chars.by_ref() {
1076        // If the `&str` is multiline, we count the line and stop accumulating the col count too.
1077        if c == '\n' {
1078            line += 1;
1079            break;
1080        }
1081        col += 1;
1082    }
1083
1084    // The col is now known, continue to the start of the str counting newlines as we go.
1085    for c in chars {
1086        if c == '\n' {
1087            line += 1;
1088        }
1089    }
1090
1091    LineCol { line, col }
1092}
1093
1094/// An error that has yet to be resolved with a [`Span`].
1095#[derive(Debug)]
1096pub struct PartialError {
1097    /// The location the [`PartialError`] happened in the source code.
1098    kind: ErrorKind,
1099
1100    /// The location the [`PartialError`] happened in the source code.
1101    loc: &'static std::panic::Location<'static>,
1102
1103    /// The token being parsed at the time of the [`PartialError`].
1104    token: Option<Token>,
1105}
1106
1107/// Convert a [`PartialError`] into an [`Error`] by providing a [`PartialElement`].
1108trait PartialIntoError<T> {
1109    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1110    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error>;
1111
1112    /// Converts a [`PartialError`] into an [`Error`] with a root path.
1113    ///
1114    /// This can be used If the path is unknown or the [`Error`] occurred at the root.
1115    fn with_root_path(self) -> Result<T, Error>;
1116}
1117
1118impl<T> PartialIntoError<T> for Result<T, PartialError> {
1119    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error> {
1120        match self {
1121            Ok(v) => Ok(v),
1122            Err(err) => Err(err.with_head(head)),
1123        }
1124    }
1125
1126    fn with_root_path(self) -> Result<T, Error> {
1127        match self {
1128            Ok(v) => Ok(v),
1129            Err(err) => Err(err.with_root_path()),
1130        }
1131    }
1132}
1133
1134impl PartialError {
1135    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1136    fn with_head(self, parent: &PartialElement<'_>) -> Error {
1137        let Self { loc, kind, token } = self;
1138        let span_end = token.map(|t| t.span.end).unwrap_or_default();
1139
1140        let (path, span) = if let Some(elem) = parent.elements.last() {
1141            (
1142                Path::from_node(Arc::clone(&elem.path_node)),
1143                Span {
1144                    start: elem.span.start,
1145                    end: span_end,
1146                },
1147            )
1148        } else {
1149            (
1150                Path::from_node(Arc::clone(&parent.path)),
1151                Span {
1152                    start: parent.span_start,
1153                    end: span_end,
1154                },
1155            )
1156        };
1157
1158        ErrorImpl {
1159            kind,
1160            loc,
1161            path,
1162            span,
1163            token,
1164        }
1165        .into()
1166    }
1167
1168    /// Converts a `PartialError` into an `Error` with a root path.
1169    ///
1170    /// This can be used If the path is unknown or the `Error` occurred at the root.
1171    pub fn with_root_path(self) -> Error {
1172        let Self { loc, kind, token } = self;
1173        let (span_start, span_end) = match (&kind, token) {
1174            (ErrorKind::UnexpectedToken, Some(t)) => (t.span.start, t.span.end),
1175            (_, Some(t)) => (0, t.span.end),
1176            (_, None) => (0, 0),
1177        };
1178        ErrorImpl {
1179            loc,
1180            kind,
1181            path: Path::root(),
1182            span: Span {
1183                start: span_start,
1184                end: span_end,
1185            },
1186            token,
1187        }
1188        .into()
1189    }
1190}
1191
1192/// The kind of Errors that can occur while parsing JSON.
1193#[derive(Debug)]
1194pub enum ErrorKind {
1195    /// An internal programming error.
1196    Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
1197
1198    /// The `Lexer` had no more tokens when more were expected.
1199    UnexpectedEOF,
1200
1201    /// An unexpected token was emitted by the `Lexer`.
1202    UnexpectedToken,
1203}
1204
1205impl ErrorKind {
1206    #[track_caller]
1207    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1208        PartialError {
1209            kind: self,
1210            loc: std::panic::Location::caller(),
1211            token,
1212        }
1213    }
1214
1215    #[track_caller]
1216    pub fn into_partial_error_without_token(self) -> PartialError {
1217        PartialError {
1218            kind: self,
1219            loc: std::panic::Location::caller(),
1220            token: None,
1221        }
1222    }
1223}
1224
1225impl std::error::Error for Error {}
1226
1227impl fmt::Display for Error {
1228    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1229        let ErrorImpl {
1230            kind,
1231            loc,
1232            path,
1233            span: _,
1234            token,
1235        } = &*self.0;
1236
1237        write!(
1238            f,
1239            "Error: code location: {loc}; while parsing element at `{path}`"
1240        )?;
1241
1242        if let Some(token) = token {
1243            write!(f, " token: `{}`", token.kind)?;
1244        }
1245
1246        match kind {
1247            ErrorKind::Internal(err) => write!(f, "Internal: {err}"),
1248            ErrorKind::UnexpectedEOF => f.write_str("Unexpected EOF"),
1249            ErrorKind::UnexpectedToken => write!(f, "unexpected token"),
1250        }
1251    }
1252}
1253
1254/// A programming Error resulting from fautly logic.
1255///
1256/// This should not be exposed on the public API.
1257#[derive(Debug)]
1258enum InternalError {
1259    /// Slicing into the JSON buf failed.
1260    ///
1261    /// This should not happen during parsing, as the parsing and `Span` calculations are all
1262    /// contained within the same callstack of functions.
1263    ///
1264    /// This can only happen if there's a mistake in the `Span` offset/range calculations.
1265    BufferSlice(Span),
1266
1267    /// The type of `Buffer` is invalid.
1268    ///
1269    /// The `json_tools::Lexer::next` is called in a few places and the `json_tools::Token` it
1270    /// emits is converted into a local `Token` with only a `Span` based buffer to avoid checking
1271    /// the buffer type each time it's used.
1272    ///
1273    /// The lexer is configured to only use a `Span` based buffer so the only way this Error can
1274    /// occur is if the code is changed so that the lexer uses a `String` based buffer.
1275    BufferType,
1276
1277    /// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1278    /// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1279    /// converted to the locally defined `Span` that uses `usize` based fields.
1280    ///
1281    /// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1282    FromInt(TryFromIntError),
1283
1284    /// A String was parsed without surrounding double quotes.
1285    ///
1286    /// This is only possible if the `json_tools` crate changes the implementation details of
1287    /// how they parse JSON strings.
1288    StringWithoutQuotes,
1289
1290    /// A `RawStr` was made using a token that is not a `String`.
1291    ///
1292    /// `RawStr`s are only creatable from inside the crate so the only way this can occur is
1293    /// through a programming error.
1294    RawStringFromInvalidToken,
1295}
1296
1297impl InternalError {
1298    #[track_caller]
1299    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1300        ErrorKind::Internal(Box::new(self)).into_partial_error(token)
1301    }
1302}
1303
1304impl std::error::Error for InternalError {}
1305
1306/// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1307/// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1308/// converted to the locally defined `Span` that uses `usize` based fields.
1309///
1310/// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1311impl From<TryFromIntError> for InternalError {
1312    fn from(err: TryFromIntError) -> Self {
1313        InternalError::FromInt(err)
1314    }
1315}
1316
1317impl From<InternalError> for Error {
1318    #[track_caller]
1319    fn from(err: InternalError) -> Self {
1320        ErrorImpl {
1321            kind: ErrorKind::Internal(Box::new(err)),
1322            loc: std::panic::Location::caller(),
1323            path: Path::root(),
1324            span: Span { start: 0, end: 0 },
1325            token: None,
1326        }
1327        .into()
1328    }
1329}
1330
1331impl fmt::Display for InternalError {
1332    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1333        match self {
1334            InternalError::BufferSlice(span) => {
1335                write!(f, "Slice into buffer failed; span: {span:?}")
1336            }
1337            InternalError::BufferType => write!(f, "The tokens buffer is not a `Span`"),
1338            InternalError::FromInt(err) => write!(f, "{err}"),
1339            InternalError::StringWithoutQuotes => {
1340                write!(f, "A String was parsed without surrounding double quotes.")
1341            }
1342
1343            InternalError::RawStringFromInvalidToken => {
1344                write!(
1345                    f,
1346                    "A `RawString` was created using a `Token` that's not a `String`"
1347                )
1348            }
1349        }
1350    }
1351}
1352
1353trait InternalErrorIntoPartial<T> {
1354    #[track_caller]
1355    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1356    where
1357        F: FnOnce() -> Token;
1358}
1359
1360impl<T> InternalErrorIntoPartial<T> for Result<T, InternalError> {
1361    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1362    where
1363        F: FnOnce() -> Token,
1364    {
1365        match self {
1366            Ok(v) => Ok(v),
1367            Err(err) => {
1368                let token = f();
1369                Err(err.into_partial_error(Some(token)))
1370            }
1371        }
1372    }
1373}
1374
1375/// Create the `Span` of an `Element` given the start and the closing token.
1376fn element_span(token_end: &Token, start: usize) -> Span {
1377    Span {
1378        start,
1379        end: token_end.span.end,
1380    }
1381}
1382
1383/// Return the content of the `Token` as a `&str`.
1384///
1385/// This in only useful for `Token`'s that contain variable data, such as `String`, `Number` etc.
1386#[track_caller]
1387fn token_str<'buf>(json: &'buf str, token: &Token) -> Result<&'buf str, PartialError> {
1388    let start = token.span.start;
1389    let end = token.span.end;
1390    let s = &json
1391        .get(start..end)
1392        .ok_or(InternalError::BufferSlice(Span { start, end }))
1393        .into_partial_error(|| *token)?;
1394    Ok(s)
1395}
1396
1397/// A `&str` with surrounding quotes removed and it hasn't been analyzed for escapes codes.
1398#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Default)]
1399pub struct RawStr<'buf> {
1400    /// An unescaped `&str` with surrounding quotes removed.
1401    source: &'buf str,
1402
1403    /// The `String` token that produced the source `&str`.
1404    span: Span,
1405}
1406
1407/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1408impl Borrow<str> for RawStr<'_> {
1409    fn borrow(&self) -> &str {
1410        self.source
1411    }
1412}
1413
1414/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1415impl Borrow<str> for &RawStr<'_> {
1416    fn borrow(&self) -> &str {
1417        self.source
1418    }
1419}
1420
1421impl<'buf> RawStr<'buf> {
1422    pub(super) fn from_str(source: &'buf str, span: Span) -> Self {
1423        Self { source, span }
1424    }
1425
1426    /// Create new `RawStr` from a string with surrounding quotes.
1427    #[track_caller]
1428    pub(super) fn from_quoted_str(
1429        s: &'buf str,
1430        token: Token,
1431    ) -> Result<RawStr<'buf>, PartialError> {
1432        const QUOTE: char = '"';
1433
1434        if token.kind != TokenType::String {
1435            return Err(InternalError::RawStringFromInvalidToken.into_partial_error(Some(token)));
1436        }
1437
1438        // remove double quotes
1439        let (_, s) = s
1440            .split_once(QUOTE)
1441            .ok_or(InternalError::StringWithoutQuotes)
1442            .into_partial_error(|| token)?;
1443
1444        let (source, _) = s
1445            .rsplit_once(QUOTE)
1446            .ok_or(InternalError::StringWithoutQuotes)
1447            .into_partial_error(|| token)?;
1448
1449        Ok(Self {
1450            source,
1451            span: token.span,
1452        })
1453    }
1454
1455    /// Return the raw unescaped `&str`.
1456    pub(crate) fn as_raw(&self) -> &'buf str {
1457        self.source
1458    }
1459
1460    /// Return the `&str` with all escapes decoded.
1461    pub(crate) fn decode_escapes(
1462        &self,
1463        elem: &Element<'buf>,
1464    ) -> Caveat<Cow<'_, str>, decode::WarningKind> {
1465        unescape_str(self.source, elem)
1466    }
1467
1468    /// Return a `&str` marked as either having escapes or not.
1469    pub(crate) fn has_escapes(
1470        &self,
1471        elem: &Element<'buf>,
1472    ) -> Caveat<decode::PendingStr<'_>, decode::WarningKind> {
1473        decode::analyze(self.source, elem)
1474    }
1475
1476    /// Return the [`Span`] of the [`Token`] that generated this string.
1477    pub fn span(&self) -> Span {
1478        self.span
1479    }
1480}
1481
1482impl fmt::Display for RawStr<'_> {
1483    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1484        fmt::Display::fmt(self.source, f)
1485    }
1486}
1487
1488#[track_caller]
1489fn token_str_as_string(json: &str, token: Token) -> Result<RawStr<'_>, PartialError> {
1490    let s = token_str(json, &token)?;
1491    let raw = RawStr::from_quoted_str(s, token)?;
1492    Ok(raw)
1493}
1494
1495#[cfg(test)]
1496mod test_raw_str {
1497    use assert_matches::assert_matches;
1498
1499    use crate::test;
1500
1501    use super::{ErrorKind, InternalError, RawStr, Span, Token, TokenType};
1502
1503    #[test]
1504    fn should_fail_to_create_raw_str_from_non_string_token() {
1505        test::setup();
1506
1507        let err = RawStr::from_quoted_str("fail", TokenType::Number.into()).unwrap_err();
1508        let err = assert_matches!(err.kind, ErrorKind::Internal(err) => err);
1509        let err = err.downcast_ref::<InternalError>().unwrap();
1510        assert_matches!(err, InternalError::RawStringFromInvalidToken);
1511    }
1512
1513    #[test]
1514    fn should_fail_to_create_raw_str_from_string_without_quotes() {
1515        test::setup();
1516
1517        let err = RawStr::from_quoted_str("fail", TokenType::String.into()).unwrap_err();
1518        let err = assert_matches!(err.kind, ErrorKind::Internal(err) => err);
1519        let err = err.downcast_ref::<InternalError>().unwrap();
1520        assert_matches!(err, InternalError::StringWithoutQuotes);
1521    }
1522
1523    impl From<TokenType> for Token {
1524        fn from(kind: TokenType) -> Self {
1525            Self {
1526                kind,
1527                span: Span::default(),
1528            }
1529        }
1530    }
1531}
1532
1533#[cfg(test)]
1534mod test_line_col {
1535    use super::{line_col, LineCol};
1536
1537    #[test]
1538    fn should_line_col_empty_str() {
1539        let json = "";
1540        let LineCol { line, col } = line_col(json);
1541        assert_eq!(line, 0);
1542        assert_eq!(col, 0);
1543    }
1544
1545    #[test]
1546    fn should_line_col_one_line_one_char_str() {
1547        let json = "1";
1548        let LineCol { line, col } = line_col(json);
1549        assert_eq!(line, 0);
1550        assert_eq!(col, 1);
1551    }
1552
1553    #[test]
1554    fn should_line_col_one_line_many_chars_str() {
1555        let json = "1234";
1556        let LineCol { line, col } = line_col(json);
1557        assert_eq!(line, 0);
1558        assert_eq!(col, 4);
1559    }
1560
1561    #[test]
1562    fn should_line_col_two_line_one_col_str() {
1563        let json = "1234\n1";
1564        let LineCol { line, col } = line_col(json);
1565        assert_eq!(line, 1);
1566        assert_eq!(col, 1);
1567    }
1568}
1569
1570#[cfg(test)]
1571mod test_parser {
1572    use assert_matches::assert_matches;
1573
1574    use crate::{
1575        json::{PathNode, ValueKind},
1576        test,
1577    };
1578
1579    use super::{Event, ObjectKind, Parser};
1580
1581    #[test]
1582    fn should_emit_events_for_object_with_single_field() {
1583        const JSON: &str = r#"{
1584    "field_a": 404
1585}"#;
1586
1587        test::setup();
1588
1589        let mut parser = Parser::new(JSON);
1590        let event = parser.next().unwrap().unwrap();
1591
1592        let path = assert_matches!(
1593            event,
1594            Event::Open {
1595                kind: ObjectKind::Object,
1596                parent_path
1597            }
1598             => parent_path
1599        );
1600        assert_matches!(*path, PathNode::Root);
1601
1602        let event = parser.next().unwrap().unwrap();
1603
1604        let path = assert_matches!(
1605            event,
1606            Event::Element {
1607                kind: ValueKind::Number,
1608                parent_path
1609            }
1610             => parent_path
1611        );
1612
1613        assert_eq!(*path, "$.field_a");
1614    }
1615
1616    #[test]
1617    fn should_emit_events_for_object_with_two_fields() {
1618        const JSON: &str = r#"{
1619    "field_a": 404,
1620    "field_b": "name"
1621}"#;
1622
1623        test::setup();
1624
1625        let mut parser = Parser::new(JSON);
1626        let event = parser.next().unwrap().unwrap();
1627
1628        let path = assert_matches!(
1629            event,
1630            Event::Open {
1631                kind: ObjectKind::Object,
1632                parent_path
1633            }
1634             => parent_path
1635        );
1636        assert_matches!(*path, PathNode::Root);
1637
1638        let event = parser.next().unwrap().unwrap();
1639
1640        let path = assert_matches!(
1641            event,
1642            Event::Element {
1643                kind: ValueKind::Number,
1644                parent_path
1645            }
1646             => parent_path
1647        );
1648
1649        assert_eq!(*path, "$.field_a");
1650
1651        let event = parser.next().unwrap().unwrap();
1652
1653        let path = assert_matches!(
1654            event,
1655            Event::Element {
1656                kind: ValueKind::String,
1657                parent_path
1658            }
1659             => parent_path
1660        );
1661
1662        assert_eq!(*path, "$.field_b");
1663    }
1664
1665    #[test]
1666    fn should_emit_events_for_object_with_nested_fields() {
1667        const JSON: &str = r#"{
1668    "field_a": 404,
1669    "field_b": {
1670        "field_c": "name"
1671    }
1672}"#;
1673
1674        test::setup();
1675
1676        let mut parser = Parser::new(JSON);
1677        let event = parser.next().unwrap().unwrap();
1678
1679        let path = assert_matches!(
1680            event,
1681            Event::Open {
1682                kind: ObjectKind::Object,
1683                parent_path
1684            }
1685             => parent_path
1686        );
1687        assert_matches!(*path, PathNode::Root);
1688
1689        let event = parser.next().unwrap().unwrap();
1690
1691        let path = assert_matches!(
1692            event,
1693            Event::Element {
1694                kind: ValueKind::Number,
1695                parent_path
1696            }
1697             => parent_path
1698        );
1699
1700        assert_eq!(*path, "$.field_a");
1701
1702        let event = parser.next().unwrap().unwrap();
1703
1704        let path = assert_matches!(
1705            event,
1706            Event::Open {
1707                kind: ObjectKind::Object,
1708                parent_path
1709            }
1710             => parent_path
1711        );
1712        assert_eq!(*path, "$.field_b");
1713
1714        let event = parser.next().unwrap().unwrap();
1715
1716        let path = assert_matches!(
1717            event,
1718            Event::Element {
1719                kind: ValueKind::String,
1720                parent_path
1721            }
1722             => parent_path
1723        );
1724
1725        assert_eq!(*path, "$.field_b.field_c");
1726    }
1727
1728    #[test]
1729    fn should_emit_events_for_array_with_single_field() {
1730        const JSON: &str = r#"["field_a"]"#;
1731
1732        test::setup();
1733
1734        let mut parser = Parser::new(JSON);
1735        let event = parser.next().unwrap().unwrap();
1736
1737        let path = assert_matches!(
1738            event,
1739            Event::Open {
1740                kind: ObjectKind::Array,
1741                parent_path
1742            }
1743             => parent_path
1744        );
1745        assert_matches!(*path, PathNode::Root);
1746
1747        let event = parser.next().unwrap().unwrap();
1748
1749        let path = assert_matches!(
1750            event,
1751            Event::Element {
1752                kind: ValueKind::String,
1753                parent_path
1754            }
1755             => parent_path
1756        );
1757
1758        assert_eq!(*path, "$.0");
1759    }
1760
1761    #[test]
1762    fn should_emit_events_for_array_with_two_fields() {
1763        const JSON: &str = r#"{
1764    "field_a": 404,
1765    "field_b": "name"
1766}"#;
1767
1768        test::setup();
1769
1770        let mut parser = Parser::new(JSON);
1771        let event = parser.next().unwrap().unwrap();
1772
1773        let path = assert_matches!(
1774            event,
1775            Event::Open {
1776                kind: ObjectKind::Object,
1777                parent_path
1778            }
1779             => parent_path
1780        );
1781        assert_matches!(*path, PathNode::Root);
1782
1783        let event = parser.next().unwrap().unwrap();
1784
1785        let path = assert_matches!(
1786            event,
1787            Event::Element {
1788                kind: ValueKind::Number,
1789                parent_path
1790            }
1791             => parent_path
1792        );
1793
1794        assert_eq!(*path, "$.field_a");
1795
1796        let event = parser.next().unwrap().unwrap();
1797
1798        let path = assert_matches!(
1799            event,
1800            Event::Element {
1801                kind: ValueKind::String,
1802                parent_path
1803            }
1804             => parent_path
1805        );
1806
1807        assert_eq!(*path, "$.field_b");
1808    }
1809
1810    #[test]
1811    fn should_emit_events_for_array_with_nested_fields() {
1812        const JSON: &str = r#"{
1813    "field_a": 404,
1814    "field_b": {
1815        "field_c": "name"
1816    }
1817}"#;
1818
1819        test::setup();
1820
1821        let mut parser = Parser::new(JSON);
1822        let event = parser.next().unwrap().unwrap();
1823
1824        let path = assert_matches!(
1825            event,
1826            Event::Open {
1827                kind: ObjectKind::Object,
1828                parent_path
1829            }
1830             => parent_path
1831        );
1832        assert_matches!(*path, PathNode::Root);
1833
1834        let event = parser.next().unwrap().unwrap();
1835
1836        let path = assert_matches!(
1837            event,
1838            Event::Element {
1839                kind: ValueKind::Number,
1840                parent_path
1841            }
1842             => parent_path
1843        );
1844
1845        assert_eq!(*path, "$.field_a");
1846
1847        let event = parser.next().unwrap().unwrap();
1848
1849        let path = assert_matches!(
1850            event,
1851            Event::Open {
1852                kind: ObjectKind::Object,
1853                parent_path
1854            }
1855             => parent_path
1856        );
1857        assert_eq!(*path, "$.field_b");
1858
1859        let event = parser.next().unwrap().unwrap();
1860
1861        let path = assert_matches!(
1862            event,
1863            Event::Element {
1864                kind: ValueKind::String,
1865                parent_path
1866            }
1867             => parent_path
1868        );
1869
1870        assert_eq!(*path, "$.field_b.field_c");
1871    }
1872}
1873
1874#[cfg(test)]
1875pub mod test {
1876    #![allow(clippy::string_slice, reason = "tests are allowed to panic")]
1877
1878    use super::{Error, ErrorKind, Span};
1879
1880    pub fn spanned_json(span: Span, json: &str) -> &str {
1881        &json[span.start..span.end]
1882    }
1883
1884    #[test]
1885    const fn error_should_be_send_and_sync() {
1886        const fn f<T: Send + Sync>() {}
1887
1888        f::<Error>();
1889        f::<ErrorKind>();
1890    }
1891}
1892
1893#[cfg(test)]
1894mod test_parser_basic_happy_structure {
1895    use assert_matches::assert_matches;
1896
1897    use crate::{json::Value, test};
1898
1899    use super::{parse, test::spanned_json, Element, PathNode};
1900
1901    #[test]
1902    fn should_parse_nested_object() {
1903        test::setup();
1904
1905        let json = r#"{ "field_a": "one", "field_b": { "field_ba": "two", "field_bb": "three" } }"#;
1906        let elem = parse(json).unwrap();
1907        let Element {
1908            path_node: path,
1909            value,
1910            span,
1911            id: _,
1912        } = elem;
1913
1914        assert_eq!(*path, PathNode::Root);
1915        assert_eq!(spanned_json(span, json), json);
1916
1917        let fields = assert_matches!(value, Value::Object(elems) => elems);
1918        let [field_a, field_b] = fields.try_into().unwrap();
1919
1920        {
1921            let (_id, path, span, value) = field_a.into_parts();
1922
1923            assert_eq!(*path, "$.field_a");
1924            assert_eq!(spanned_json(span, json), r#""one""#);
1925            let s = assert_matches!(value, Value::String(s) => s);
1926            assert_eq!(s.as_raw(), "one");
1927        }
1928
1929        {
1930            let (_id, path, span, value) = field_b.into_parts();
1931            assert_eq!(*path, "$.field_b");
1932            assert_eq!(
1933                spanned_json(span, json),
1934                r#"{ "field_ba": "two", "field_bb": "three" }"#
1935            );
1936
1937            let fields = assert_matches!(value, Value::Object(fields) => fields);
1938            let [field_b_a, field_b_b] = fields.try_into().unwrap();
1939
1940            {
1941                let (_id, path, span, value) = field_b_a.into_parts();
1942
1943                assert_eq!(spanned_json(span, json), r#""two""#);
1944                assert_eq!(*path, "$.field_b.field_ba");
1945                let s = assert_matches!(value, Value::String(s) => s);
1946                assert_eq!(s.as_raw(), "two");
1947            }
1948
1949            {
1950                let (_id, path, span, value) = field_b_b.into_parts();
1951
1952                assert_eq!(spanned_json(span, json), r#""three""#);
1953                assert_eq!(*path, "$.field_b.field_bb");
1954                let s = assert_matches!(value, Value::String(s) => s);
1955                assert_eq!(s.as_raw(), "three");
1956            }
1957        }
1958    }
1959
1960    #[test]
1961    fn should_parse_object_with_nested_array() {
1962        test::setup();
1963
1964        let json = r#"{ "field_a": "one", "field_b": [ "two", "three" ] }"#;
1965        let elem = parse(json).unwrap();
1966        let Element {
1967            path_node: path,
1968            value,
1969            span,
1970            id: _,
1971        } = elem;
1972
1973        assert_eq!(*path, PathNode::Root);
1974        assert_eq!(spanned_json(span, json), json);
1975
1976        let fields = assert_matches!(value, Value::Object(fields) => fields);
1977        let [field_a, field_b] = fields.try_into().unwrap();
1978
1979        {
1980            let (_id, path, span, value) = field_a.into_parts();
1981
1982            assert_eq!(spanned_json(span, json), r#""one""#);
1983            assert_eq!(*path, "$.field_a");
1984            let s = assert_matches!(value, Value::String(s) => s);
1985            assert_eq!(s.as_raw(), "one");
1986        }
1987
1988        {
1989            let (_id, path, span, value) = field_b.into_parts();
1990            assert_eq!(*path, "$.field_b");
1991            assert_eq!(spanned_json(span, json), r#"[ "two", "three" ]"#);
1992
1993            let elems = assert_matches!(value, Value::Array(elems) => elems);
1994            let [elem_b_a, elem_b_b] = elems.try_into().unwrap();
1995
1996            {
1997                let (_id, path, span, value) = elem_b_a.into_parts();
1998
1999                assert_eq!(spanned_json(span, json), r#""two""#);
2000                assert_eq!(*path, "$.field_b.0");
2001                let s = assert_matches!(value, Value::String(s) => s);
2002                assert_eq!(s.as_raw(), "two");
2003            }
2004
2005            {
2006                let (_id, path, span, value) = elem_b_b.into_parts();
2007
2008                assert_eq!(spanned_json(span, json), r#""three""#);
2009                assert_eq!(*path, "$.field_b.1");
2010                let s = assert_matches!(value, Value::String(s) => s);
2011                assert_eq!(s.as_raw(), "three");
2012            }
2013        }
2014    }
2015
2016    #[test]
2017    fn should_parse_nested_array() {
2018        test::setup();
2019
2020        let json = r#"[ "one", ["two", "three"] ]"#;
2021        let elem = parse(json).unwrap();
2022        let Element {
2023            path_node: path,
2024            value,
2025            span,
2026            id: _,
2027        } = elem;
2028
2029        assert_eq!(*path, PathNode::Root);
2030        assert_eq!(spanned_json(span, json), json);
2031
2032        let elems = assert_matches!(value, Value::Array(elems) => elems);
2033        let [elem_a, elem_b] = elems.try_into().unwrap();
2034
2035        {
2036            let Element {
2037                path_node: path,
2038                value,
2039                span,
2040                id: _,
2041            } = elem_a;
2042
2043            assert_eq!(spanned_json(span, json), r#""one""#);
2044            assert_eq!(*path, "$.0");
2045            let s = assert_matches!(value, Value::String(s) => s);
2046            assert_eq!(s.as_raw(), "one");
2047        }
2048
2049        {
2050            let Element {
2051                path_node: path,
2052                value,
2053                span,
2054                id: _,
2055            } = elem_b;
2056            assert_eq!(*path, "$.1");
2057            assert_eq!(spanned_json(span, json), r#"["two", "three"]"#);
2058
2059            let elems = assert_matches!(value, Value::Array(elems) => elems);
2060            let [elem_b_a, elem_b_b] = elems.try_into().unwrap();
2061
2062            {
2063                let Element {
2064                    path_node: path,
2065                    value,
2066                    span,
2067                    id: _,
2068                } = elem_b_a;
2069
2070                assert_eq!(spanned_json(span, json), r#""two""#);
2071                assert_eq!(*path, "$.1.0");
2072                let s = assert_matches!(value, Value::String(s) => s);
2073                assert_eq!(s.as_raw(), "two");
2074            }
2075
2076            {
2077                let Element {
2078                    path_node: path,
2079                    value,
2080                    span,
2081                    id: _,
2082                } = elem_b_b;
2083
2084                assert_eq!(spanned_json(span, json), r#""three""#);
2085                assert_eq!(*path, "$.1.1");
2086                let s = assert_matches!(value, Value::String(s) => s);
2087                assert_eq!(s.as_raw(), "three");
2088            }
2089        }
2090    }
2091
2092    #[test]
2093    fn should_parse_array_with_nested_object() {
2094        test::setup();
2095
2096        let json = r#"[ "one", {"field_a": "two", "field_b": "three"} ]"#;
2097        let elem = parse(json).unwrap();
2098        let Element {
2099            path_node: path,
2100            value,
2101            span,
2102            id: _,
2103        } = elem;
2104
2105        assert_eq!(*path, PathNode::Root);
2106        assert_eq!(spanned_json(span, json), json);
2107
2108        let elems = assert_matches!(value, Value::Array(elems) => elems);
2109        let [elem_a, elem_b] = elems.try_into().unwrap();
2110
2111        {
2112            let Element {
2113                path_node: path,
2114                value,
2115                span,
2116                id: _,
2117            } = elem_a;
2118
2119            assert_eq!(spanned_json(span, json), r#""one""#);
2120            assert_eq!(*path, "$.0");
2121            let s = assert_matches!(value, Value::String(s) => s);
2122            assert_eq!(s.as_raw(), "one");
2123        }
2124
2125        {
2126            let Element {
2127                path_node: path,
2128                value,
2129                span,
2130                id: _,
2131            } = elem_b;
2132            assert_eq!(*path, "$.1");
2133            assert_eq!(
2134                spanned_json(span, json),
2135                r#"{"field_a": "two", "field_b": "three"}"#
2136            );
2137
2138            let fields = assert_matches!(value, Value::Object(fields) => fields);
2139            let [field_b_a, field_b_b] = fields.try_into().unwrap();
2140
2141            {
2142                let (_id, path, span, value) = field_b_a.into_parts();
2143
2144                assert_eq!(spanned_json(span, json), r#""two""#);
2145                assert_eq!(*path, "$.1.field_a");
2146                let s = assert_matches!(value, Value::String(s) => s);
2147                assert_eq!(s.as_raw(), "two");
2148            }
2149
2150            {
2151                let (_id, path, span, value) = field_b_b.into_parts();
2152
2153                assert_eq!(spanned_json(span, json), r#""three""#);
2154                assert_eq!(*path, "$.1.field_b");
2155                let s = assert_matches!(value, Value::String(s) => s);
2156                assert_eq!(s.as_raw(), "three");
2157            }
2158        }
2159    }
2160}
2161
2162#[cfg(test)]
2163mod test_parser_error_reporting {
2164    #![allow(
2165        clippy::string_slice,
2166        clippy::as_conversions,
2167        reason = "panicking is tests is allowed"
2168    )]
2169
2170    use assert_matches::assert_matches;
2171
2172    use crate::test;
2173
2174    use super::{parse, ErrorKind, SpanBounds, TokenType};
2175
2176    #[test]
2177    fn should_report_trailing_comma() {
2178        const JSON: &str = r#"{
2179   "field_a": "one",
2180   "field_b": "two",
2181}"#;
2182
2183        test::setup();
2184
2185        let err = parse(JSON).unwrap_err();
2186
2187        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2188        assert_matches!(
2189            err.token().unwrap().kind,
2190            TokenType::Comma,
2191            "We are parsing a comma when we realize that it should not be there"
2192        );
2193        assert_eq!(*err.path(), "$.field_b");
2194
2195        let report = err.into_report(JSON);
2196
2197        assert_eq!(report.json_context(), r#""two","#);
2198        let SpanBounds { start, end } = report.span_bounds();
2199        assert_eq!(*start, (2, 14));
2200        assert_eq!(*end, (2, 20));
2201        assert_eq!(report.expand_json_context(), r#"   "field_b": "two","#);
2202    }
2203
2204    #[test]
2205    fn should_report_invalid_json() {
2206        const JSON: &str = r#"{
2207"field_"#;
2208
2209        test::setup();
2210
2211        let err = parse(JSON).unwrap_err();
2212
2213        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2214        assert_matches!(
2215            err.token().unwrap().kind,
2216            TokenType::Invalid,
2217            "We are parsing a string not ended be a double quote"
2218        );
2219        assert_eq!(*err.path(), "$");
2220
2221        let report = err.into_report(JSON);
2222
2223        assert_eq!(report.json_context(), r#""field_"#);
2224        let SpanBounds { start, end } = report.span_bounds();
2225        assert_eq!(*start, (1, 0));
2226        assert_eq!(*end, (1, 7));
2227        assert_eq!(report.expand_json_context(), r#""field_"#);
2228    }
2229
2230    #[test]
2231    fn should_report_invalid_json_in_some_place() {
2232        const JSON: &str = r#"{
2233"field_a": "Barney",
2234"field_"#;
2235
2236        test::setup();
2237
2238        let err = parse(JSON).unwrap_err();
2239
2240        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2241        assert_matches!(
2242            err.token().unwrap().kind,
2243            TokenType::Invalid,
2244            "We are parsing a string not ended be a double quote"
2245        );
2246        assert_eq!(*err.path(), "$");
2247
2248        let report = err.into_report(JSON);
2249
2250        assert_eq!(report.json_context(), r#""field_"#);
2251        let SpanBounds { start, end } = report.span_bounds();
2252        assert_eq!(*start, (2, 0));
2253        assert_eq!(*end, (2, 7));
2254        assert_eq!(report.expand_json_context(), r#""field_"#);
2255    }
2256
2257    #[test]
2258    fn should_report_invalid_json_in_some_place_in_the_middle() {
2259        const JSON: &str = r#"{
2260"field_a": "Barney",
2261"field_b",
2262"field_c": "Fred" }
2263"#;
2264
2265        test::setup();
2266
2267        let err = parse(JSON).unwrap_err();
2268
2269        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2270        assert_matches!(
2271            err.token().unwrap().kind,
2272            TokenType::Comma,
2273            "We are parsing a key value pair but the key is followed by comma."
2274        );
2275        assert_eq!(*err.path(), "$.field_a");
2276
2277        let report = err.into_report(JSON);
2278
2279        assert_eq!(
2280            report.json_context(),
2281            r#""Barney",
2282"field_b","#
2283        );
2284        let SpanBounds { start, end } = report.span_bounds();
2285        assert_eq!(*start, (1, 11));
2286        assert_eq!(*end, (2, 10));
2287        assert_eq!(report.expand_json_context(), r#""field_a": "Barney","#);
2288    }
2289
2290    #[test]
2291    fn should_report_missing_comma() {
2292        const JSON: &str = r#"{
2293   "field_a": "one"
2294   "field_b": "two"
2295}"#;
2296
2297        test::setup();
2298
2299        let err = parse(JSON).unwrap_err();
2300
2301        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2302        assert_matches!(
2303            err.token().unwrap().kind,
2304            TokenType::String,
2305            "We are parsing a String when we realize that there should be a comma"
2306        );
2307        assert_eq!(*err.path(), "$.field_a");
2308
2309        let report = err.into_report(JSON);
2310
2311        assert_eq!(
2312            report.json_context(),
2313            r#""one"
2314   "field_b""#
2315        );
2316        let SpanBounds { start, end } = report.span_bounds();
2317        assert_eq!(*start, (1, 14));
2318        assert_eq!(*end, (2, 12));
2319        assert_eq!(report.expand_json_context(), r#"   "field_a": "one""#);
2320    }
2321}
2322
2323#[cfg(test)]
2324mod test_type_sizes {
2325    use std::mem::size_of;
2326
2327    use super::{
2328        Element, Error, ErrorImpl, PartialElement, Path, PathNode, PathNodeRef, RawStr, Span,
2329        Token, TokenType, Value,
2330    };
2331
2332    #[test]
2333    #[cfg(target_pointer_width = "64")]
2334    fn should_match_sizes() {
2335        assert_eq!(size_of::<Element<'_>>(), 72);
2336        assert_eq!(size_of::<Error>(), 8);
2337        assert_eq!(size_of::<ErrorImpl>(), 96);
2338        assert_eq!(size_of::<PartialElement<'_>>(), 56);
2339        assert_eq!(size_of::<Path>(), 24);
2340        assert_eq!(size_of::<PathNode<'_>>(), 48);
2341        assert_eq!(size_of::<PathNodeRef<'_>>(), 8);
2342        assert_eq!(size_of::<RawStr<'_>>(), 32);
2343        assert_eq!(size_of::<Span>(), 16);
2344        assert_eq!(size_of::<Token>(), 24);
2345        assert_eq!(size_of::<TokenType>(), 1);
2346        assert_eq!(size_of::<Value<'_>>(), 40);
2347    }
2348}