ocpi_tariffs/json/
parser.rs

1//! A JSON parse built to keep track of where each element came from in the input.
2use std::borrow::{Borrow, Cow};
3use std::fmt;
4use std::iter::Peekable;
5use std::num::TryFromIntError;
6use std::str::Bytes;
7use std::sync::atomic::AtomicUsize;
8use std::sync::Arc;
9
10use json_tools::{Buffer, BufferType};
11use tracing::{debug, trace};
12
13use crate::Caveat;
14
15use super::{
16    decode::{self, unescape_str},
17    Element, Field, ObjectKind, PathNode, PathNodeRef, Value, ValueKind,
18};
19use super::{ElemId, Path};
20
21/// We peek at the next `Token` when asserting on trailing commas.
22type Lexer<'buf> = Peekable<json_tools::Lexer<Bytes<'buf>>>;
23
24/// Parse the JSON into a tree of [`Element`]s.
25pub(crate) fn parse(json: &str) -> Result<Element<'_>, Error> {
26    let parser = Parser::new(json);
27
28    // When just parsing the JSON into an `Element` we only care about the final event
29    // when the JSON has been completely transformed into a root element.
30    for event in parser {
31        if let Event::Complete(element) = event? {
32            return Ok(element);
33        }
34    }
35
36    Err(ErrorKind::UnexpectedEOF
37        .into_partial_error_without_token()
38        .with_root_path())
39}
40
41/// A parsing event emitted for each call to the `<Parser as Iterator>::next` function.
42#[derive(Debug)]
43pub(crate) enum Event<'buf> {
44    /// An [`Element`] has been opened and it's construction is in progerss.
45    Open {
46        kind: ObjectKind,
47        parent_path: PathNodeRef<'buf>,
48    },
49
50    /// An [`Element`] has been created and added to it's parent [`Element`].
51    ///
52    /// If the kind is `Array` or `Object` that means that this element is closed: it's construction is complete.
53    Element {
54        /// The kind of JSON value the [`Element`] is.
55        kind: ValueKind,
56        /// The path to the parent [`Element`].
57        parent_path: PathNodeRef<'buf>,
58    },
59
60    /// The parse has completed creating the tree of [`Element`]s.
61    Complete(Element<'buf>),
62}
63
64/// The context needed to parse a single chunk of JSON.
65pub(crate) struct Parser<'buf> {
66    /// Used to assign a unique [`ElemId`] to each [`Element`].
67    elem_count: AtomicUsize,
68
69    /// True if the `Parser` is complete.
70    ///
71    /// Any further calls to [`Parser::next`] will return `None`.
72    complete: bool,
73
74    /// The source JSON we're parsing.
75    json: &'buf str,
76
77    /// The JSON lexer.
78    lexer: Lexer<'buf>,
79
80    /// The pool with pre-allocated `Path`s.
81    path_pool: PathPool<'buf>,
82
83    /// The stack to track nested objects.
84    stack: Stack<'buf>,
85
86    /// The previous token seen.
87    token: Option<Token>,
88}
89
90/// Define our own `TokenType` so Clone can be defined on it.
91///
92/// This can be removed when `json_tools::TokenType` impl's `Clone`.
93#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
94pub enum TokenType {
95    /// `false`
96    BooleanFalse,
97    /// `true`
98    BooleanTrue,
99
100    /// `]`
101    BracketClose,
102    /// `[`
103    BracketOpen,
104
105    /// `:`
106    Colon,
107
108    /// `,`
109    Comma,
110
111    /// `}`
112    CurlyClose,
113    /// `{`
114    CurlyOpen,
115
116    /// The type of the token could not be identified.
117    /// Should be removed if this lexer is ever to be feature complete
118    Invalid,
119
120    /// `null`
121    Null,
122
123    /// A Number, like `1.1234` or `123` or `-0.0` or `-1` or `.0` or `.`
124    Number,
125
126    /// A json string , like `"foo"`
127    String,
128}
129
130impl TokenType {
131    fn as_str(self) -> &'static str {
132        match self {
133            TokenType::BooleanFalse => "false",
134            TokenType::BooleanTrue => "true",
135            TokenType::BracketClose => "]",
136            TokenType::BracketOpen => "[",
137            TokenType::Colon => ":",
138            TokenType::Comma => ",",
139            TokenType::CurlyClose => "}",
140            TokenType::CurlyOpen => "{",
141            TokenType::Invalid => "<invalid>",
142            TokenType::Null => "null",
143            TokenType::Number => "<number>",
144            TokenType::String => "<string>",
145        }
146    }
147}
148
149impl fmt::Display for TokenType {
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        f.write_str(self.as_str())
152    }
153}
154
155impl From<json_tools::TokenType> for TokenType {
156    fn from(value: json_tools::TokenType) -> Self {
157        match value {
158            json_tools::TokenType::BooleanFalse => TokenType::BooleanFalse,
159            json_tools::TokenType::BooleanTrue => TokenType::BooleanTrue,
160            json_tools::TokenType::BracketClose => TokenType::BracketClose,
161            json_tools::TokenType::BracketOpen => TokenType::BracketOpen,
162            json_tools::TokenType::CurlyClose => TokenType::CurlyClose,
163            json_tools::TokenType::CurlyOpen => TokenType::CurlyOpen,
164            json_tools::TokenType::Colon => TokenType::Colon,
165            json_tools::TokenType::Comma => TokenType::Comma,
166            json_tools::TokenType::Invalid => TokenType::Invalid,
167            json_tools::TokenType::Null => TokenType::Null,
168            json_tools::TokenType::Number => TokenType::Number,
169            json_tools::TokenType::String => TokenType::String,
170        }
171    }
172}
173
174/// A lexical token, identifying its kind and span.
175///
176/// We define our own `Token` as the `json_tools::Token` defines a `Buffer` that can be heap allocated
177/// or a `Span`. We only use the `Span` variant.
178///
179/// Our `Token` can also impl `Copy` and `Clone` as the size and semantics are acceptable.
180#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
181pub struct Token {
182    /// The exact type of the token
183    pub kind: TokenType,
184
185    /// The span allows to reference back into the source byte stream
186    /// to obtain the string making up the token.
187    /// Please note that for control characters, booleans and null (i.e
188    pub span: Span,
189}
190
191impl Token {
192    /// Return true is the token is a opening brace.
193    fn is_opening(&self) -> bool {
194        matches!(self.kind, TokenType::CurlyOpen | TokenType::BracketOpen)
195    }
196
197    /// Return true is the token is a closing brace.
198    fn is_closing(&self) -> bool {
199        matches!(self.kind, TokenType::CurlyClose | TokenType::BracketClose)
200    }
201
202    /// Return true is the token is a comma.
203    fn is_comma(&self) -> bool {
204        matches!(self.kind, TokenType::Comma)
205    }
206}
207
208impl fmt::Display for Token {
209    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
210        write!(
211            f,
212            "token: {}, ({},{})",
213            self.kind, self.span.start, self.span.end
214        )
215    }
216}
217
218impl TryFrom<json_tools::Token> for Token {
219    type Error = PartialError;
220
221    fn try_from(token: json_tools::Token) -> Result<Self, Self::Error> {
222        let json_tools::Token { kind, buf } = token;
223        let kind = kind.into();
224        let Buffer::Span(span) = &buf else {
225            return Err(InternalError::BufferType.into_partial_error(None));
226        };
227
228        let span = span
229            .try_into()
230            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
231
232        Ok(Self { kind, span })
233    }
234}
235
236impl TryFrom<&json_tools::Token> for Token {
237    type Error = PartialError;
238
239    fn try_from(token: &json_tools::Token) -> Result<Self, Self::Error> {
240        let json_tools::Token { kind, buf } = token;
241        let kind = kind.clone().into();
242        let Buffer::Span(span) = &buf else {
243            return Err(InternalError::BufferType.into_partial_error(None));
244        };
245
246        let span = span
247            .try_into()
248            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
249
250        Ok(Self { kind, span })
251    }
252}
253
254impl<'buf> Parser<'buf> {
255    pub fn new(json: &'buf str) -> Self {
256        let lexer = json_tools::Lexer::new(json.bytes(), BufferType::Span).peekable();
257
258        Self {
259            elem_count: AtomicUsize::new(0),
260            complete: false,
261            json,
262            lexer,
263            path_pool: PathPool::default(),
264            stack: Stack::new(),
265            token: None,
266        }
267    }
268
269    fn next_elem_id(&self) -> ElemId {
270        let id = self
271            .elem_count
272            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
273
274        ElemId(id)
275    }
276
277    fn expect_next(&mut self) -> Result<Token, PartialError> {
278        let Some(token) = self.lexer.next() else {
279            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
280        };
281
282        let token = token.try_into()?;
283
284        Ok(token)
285    }
286
287    /// Return an `Err` if the next token is not the expected.
288    fn expect_token(&mut self, token_type: TokenType) -> Result<(), PartialError> {
289        let Some(token) = self.lexer.next() else {
290            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
291        };
292
293        let token: Token = token.try_into()?;
294
295        if token.kind == token_type {
296            Ok(())
297        } else {
298            Err(unexpected_token(token))
299        }
300    }
301
302    fn next_event(&mut self) -> Result<Option<Event<'buf>>, Error> {
303        if self.complete {
304            return Ok(None);
305        }
306
307        let head = self.stack.pop_head();
308
309        match head {
310            None => {
311                let token = self.expect_next().with_root_path()?;
312
313                trace!(?token);
314                self.comma_checks(&token).with_root_path()?;
315
316                match token.kind {
317                    TokenType::CurlyOpen => {
318                        let parent_path = self.path_pool.root();
319                        self.stack.push_new_object(
320                            self.next_elem_id(),
321                            Arc::clone(&parent_path),
322                            &token,
323                        );
324                        Ok(Some(Event::Open {
325                            kind: ObjectKind::Object,
326                            parent_path,
327                        }))
328                    }
329                    TokenType::BracketOpen => {
330                        let parent_path = self.path_pool.root();
331                        self.stack.push_new_array(
332                            self.next_elem_id(),
333                            Arc::clone(&parent_path),
334                            &token,
335                        );
336                        Ok(Some(Event::Open {
337                            kind: ObjectKind::Array,
338                            parent_path,
339                        }))
340                    }
341                    TokenType::Number => {
342                        let value = Value::Number(token_str(self.json, &token).with_root_path()?);
343                        self.exit_with_value(token, value).with_root_path()
344                    }
345                    TokenType::Null => self.exit_with_value(token, Value::Null).with_root_path(),
346                    TokenType::String => {
347                        let value =
348                            Value::String(token_str_as_string(self.json, token).with_root_path()?);
349                        self.exit_with_value(token, value).with_root_path()
350                    }
351                    TokenType::BooleanTrue => {
352                        self.exit_with_value(token, Value::True).with_root_path()
353                    }
354                    TokenType::BooleanFalse => {
355                        self.exit_with_value(token, Value::False).with_root_path()
356                    }
357                    _ => Err(unexpected_token(token).with_root_path()),
358                }
359            }
360            Some(mut head) => {
361                let token = self.expect_next().with_head(&head)?;
362
363                trace!(?token, head = ?head.elem_type);
364                let token = if self.comma_checks(&token).with_head(&head)? {
365                    self.expect_next().with_head(&head)?
366                } else {
367                    token
368                };
369
370                let (value, token, path) = match head.elem_type {
371                    ObjectKind::Object => {
372                        let key = match token.kind {
373                            TokenType::String => {
374                                token_str_as_string(self.json, token).with_head(&head)?
375                            }
376                            TokenType::CurlyClose => {
377                                let event = self.close_element(head, &token)?;
378                                return Ok(event);
379                            }
380                            _ => return Err(unexpected_token(token).with_root_path()),
381                        };
382
383                        self.expect_token(TokenType::Colon).with_head(&head)?;
384                        let token = self.expect_next().with_head(&head)?;
385
386                        let value = match token.kind {
387                            TokenType::CurlyOpen => {
388                                let parent_path = head.parent_is_object(&mut self.path_pool, key);
389                                self.stack.push_head(head);
390                                self.stack.push_new_object(
391                                    self.next_elem_id(),
392                                    Arc::clone(&parent_path),
393                                    &token,
394                                );
395                                return Ok(Some(Event::Open {
396                                    kind: ObjectKind::Object,
397                                    parent_path,
398                                }));
399                            }
400                            TokenType::BracketOpen => {
401                                let parent_path = head.parent_is_object(&mut self.path_pool, key);
402                                self.stack.push_head(head);
403                                self.stack.push_new_array(
404                                    self.next_elem_id(),
405                                    Arc::clone(&parent_path),
406                                    &token,
407                                );
408                                return Ok(Some(Event::Open {
409                                    kind: ObjectKind::Array,
410                                    parent_path,
411                                }));
412                            }
413                            TokenType::CurlyClose => {
414                                let event = self.close_element(head, &token)?;
415                                return Ok(event);
416                            }
417                            TokenType::String => Value::String(
418                                token_str_as_string(self.json, token).with_head(&head)?,
419                            ),
420                            TokenType::Number => {
421                                Value::Number(token_str(self.json, &token).with_head(&head)?)
422                            }
423                            TokenType::Null => Value::Null,
424                            TokenType::BooleanTrue => Value::True,
425                            TokenType::BooleanFalse => Value::False,
426                            _ => return Err(unexpected_token(token).with_head(&head)),
427                        };
428
429                        (
430                            value,
431                            token,
432                            head.parent_is_object(&mut self.path_pool, key),
433                        )
434                    }
435                    ObjectKind::Array => {
436                        let value = match token.kind {
437                            TokenType::CurlyOpen => {
438                                let parent_path = head.parent_is_array(&mut self.path_pool);
439                                self.stack.push_head(head);
440                                self.stack.push_new_object(
441                                    self.next_elem_id(),
442                                    Arc::clone(&parent_path),
443                                    &token,
444                                );
445                                return Ok(Some(Event::Open {
446                                    kind: ObjectKind::Object,
447                                    parent_path,
448                                }));
449                            }
450                            TokenType::BracketOpen => {
451                                let parent_path = head.parent_is_array(&mut self.path_pool);
452                                self.stack.push_head(head);
453                                self.stack.push_new_array(
454                                    self.next_elem_id(),
455                                    Arc::clone(&parent_path),
456                                    &token,
457                                );
458                                return Ok(Some(Event::Open {
459                                    kind: ObjectKind::Array,
460                                    parent_path,
461                                }));
462                            }
463                            TokenType::BracketClose => {
464                                let event = self.close_element(head, &token)?;
465                                return Ok(event);
466                            }
467
468                            TokenType::String => Value::String(
469                                token_str_as_string(self.json, token).with_head(&head)?,
470                            ),
471                            TokenType::Number => {
472                                Value::Number(token_str(self.json, &token).with_head(&head)?)
473                            }
474                            TokenType::Null => Value::Null,
475                            TokenType::BooleanTrue => Value::True,
476                            TokenType::BooleanFalse => Value::False,
477                            _ => return Err(unexpected_token(token).with_head(&head)),
478                        };
479                        (value, token, head.parent_is_array(&mut self.path_pool))
480                    }
481                };
482
483                let event = Event::Element {
484                    kind: value.kind(),
485                    parent_path: Arc::clone(&path),
486                };
487                head.push_field(self.next_elem_id(), path, value, &token);
488
489                let peek_token = self.peek(&token).with_head(&head)?;
490
491                if !(peek_token.is_comma() || peek_token.is_closing()) {
492                    return Err(unexpected_token(peek_token).with_head(&head));
493                }
494
495                self.token.replace(token);
496                self.stack.push_head(head);
497
498                Ok(Some(event))
499            }
500        }
501    }
502
503    /// Close a [`PartialElement`] which creates an [`Element`] and returns an [`Event`]
504    fn close_element(
505        &mut self,
506        head: PartialElement<'buf>,
507        token: &Token,
508    ) -> Result<Option<Event<'buf>>, Error> {
509        let event = self.stack.head_into_element(head, token);
510
511        match event {
512            Pop::Element { kind, parent_path } => Ok(Some(Event::Element { kind, parent_path })),
513            Pop::Complete(element) => {
514                if let Some(token) = self.lexer.next() {
515                    let token = token.try_into().with_root_path()?;
516                    return Err(unexpected_token(token).with_root_path());
517                }
518
519                Ok(Some(Event::Complete(element)))
520            }
521        }
522    }
523
524    fn exit_with_value(
525        &mut self,
526        token: Token,
527        value: Value<'buf>,
528    ) -> Result<Option<Event<'buf>>, PartialError> {
529        self.complete = true;
530        let span = element_span(&token, 0);
531        let elem = Element::new(self.next_elem_id(), Arc::new(PathNode::Root), span, value);
532
533        if let Some(token) = self.lexer.next() {
534            let token = token.try_into()?;
535            return Err(unexpected_token(token));
536        }
537
538        Ok(Some(Event::Complete(elem)))
539    }
540
541    fn peek(&mut self, token: &Token) -> Result<Token, PartialError> {
542        let Some(peek_token) = self.lexer.peek() else {
543            return Err(ErrorKind::UnexpectedEOF.into_partial_error(Some(*token)));
544        };
545
546        let peek_token = peek_token.try_into()?;
547        Ok(peek_token)
548    }
549
550    /// Perform comma position checks
551    ///
552    /// Return `Err(unexpected)` if a trailing or rogue comma is found.
553    fn comma_checks(&mut self, token: &Token) -> Result<bool, PartialError> {
554        trace!(?token, "comma_checks");
555        let is_comma = token.is_comma();
556
557        if is_comma {
558            let peek_token = self.peek(token)?;
559
560            // A comma can only be followed by an opening brace or a value.
561            if peek_token.is_closing() {
562                return Err(unexpected_token(*token));
563            }
564
565            if peek_token.is_comma() {
566                return Err(unexpected_token(peek_token));
567            }
568        } else if token.is_opening() {
569            let peek_token = self.peek(token)?;
570
571            // An opening brace should not be followed by a comma.
572            if peek_token.is_comma() {
573                return Err(unexpected_token(peek_token));
574            }
575        }
576
577        Ok(is_comma)
578    }
579}
580
581/// Create an [`PartialError`] with [`ErrorKind::UnexpectedToken`].
582#[track_caller]
583fn unexpected_token(token: Token) -> PartialError {
584    ErrorKind::UnexpectedToken.into_partial_error(Some(token))
585}
586
587impl<'buf> Iterator for Parser<'buf> {
588    type Item = Result<Event<'buf>, Error>;
589
590    fn next(&mut self) -> Option<Self::Item> {
591        match self.next_event() {
592            Ok(event) => event.map(Ok),
593            Err(err) => {
594                self.complete = true;
595                Some(Err(err))
596            }
597        }
598    }
599}
600
601/// An partial `Element` that we desend into and parse it's child `Element`s.
602#[derive(Debug)]
603struct PartialElement<'buf> {
604    /// The Id of the [`Element`] to be created.
605    elem_id: ElemId,
606
607    /// The type of [`Element`].
608    elem_type: ObjectKind,
609
610    /// The child [`Element`]s.
611    ///
612    /// This is filled as we parse the current JSON [`Element`].
613    elements: Vec<Element<'buf>>,
614
615    /// The path up to the [`Element`].
616    path: PathNodeRef<'buf>,
617
618    /// The index of the [`Element`]'s first byte.
619    span_start: usize,
620}
621
622impl<'buf> PartialElement<'buf> {
623    fn parent_is_object(
624        &self,
625        path_pool: &mut PathPool<'buf>,
626        key: RawStr<'buf>,
627    ) -> PathNodeRef<'buf> {
628        path_pool.object(Arc::clone(&self.path), key)
629    }
630
631    fn parent_is_array(&self, path_pool: &mut PathPool<'buf>) -> PathNodeRef<'buf> {
632        path_pool.array(Arc::clone(&self.path), self.elements.len())
633    }
634
635    fn push_field(
636        &mut self,
637        elem_id: ElemId,
638        path: PathNodeRef<'buf>,
639        value: Value<'buf>,
640        token: &Token,
641    ) {
642        let span = element_span(token, token.span.start);
643        let elem = Element::new(elem_id, path, span, value);
644        self.elements.push(elem);
645    }
646
647    /// Resolve the `PartialElement` to an `Element`.
648    fn into_element(self, token: &Token) -> Element<'buf> {
649        let span = element_span(token, self.span_start);
650
651        let PartialElement {
652            elem_type,
653            span_start: _,
654            elements,
655            path,
656            elem_id,
657        } = self;
658
659        let value = match elem_type {
660            ObjectKind::Object => {
661                let fields = elements.into_iter().map(Field).collect();
662                Value::Object(fields)
663            }
664            ObjectKind::Array => Value::Array(elements),
665        };
666
667        Element::new(elem_id, path, span, value)
668    }
669}
670
671/// `Path`s are added and never removed.
672struct PathPool<'buf> {
673    index: usize,
674    items: Vec<PathNodeRef<'buf>>,
675}
676
677impl Default for PathPool<'_> {
678    fn default() -> Self {
679        Self::with_capacity(1000)
680    }
681}
682
683impl<'buf> PathPool<'buf> {
684    fn with_capacity(capacity: usize) -> Self {
685        let capacity = capacity.max(1);
686        let mut items = Vec::with_capacity(capacity);
687        items.resize_with(capacity, Default::default);
688
689        Self { index: 0, items }
690    }
691
692    #[expect(
693        clippy::indexing_slicing,
694        reason = "The root Path is added in the constructor and the capacity is always at least 1"
695    )]
696    fn root(&self) -> PathNodeRef<'buf> {
697        Arc::clone(&self.items[0])
698    }
699
700    /// Add a new `Path::Array` with the given index.
701    fn array(&mut self, parent: PathNodeRef<'buf>, index: usize) -> PathNodeRef<'buf> {
702        self.push(PathNode::Array { parent, index })
703    }
704
705    /// Add a new `Path::Object` with the given index.
706    fn object(&mut self, parent: PathNodeRef<'buf>, key: RawStr<'buf>) -> PathNodeRef<'buf> {
707        self.push(PathNode::Object { parent, key })
708    }
709
710    #[expect(clippy::indexing_slicing, reason = "Paths are only added")]
711    fn push(&mut self, new_path: PathNode<'buf>) -> PathNodeRef<'buf> {
712        const GROWTH_FACTOR: usize = 2;
713
714        let Self { index, items } = self;
715        let next_index = *index + 1;
716
717        if next_index >= items.len() {
718            items.reserve(items.len() * GROWTH_FACTOR);
719            items.resize_with(items.capacity(), Default::default);
720        }
721
722        let path = &mut items[next_index];
723        debug_assert_eq!(Arc::strong_count(path), 1, "Paths are only added");
724        let path = Arc::get_mut(path).expect("Paths are only added");
725        *path = new_path;
726
727        let path = Arc::clone(&items[next_index]);
728        *index = next_index;
729        path
730    }
731}
732
733/// The `Span` defines the range of bytes that delimits a JSON `Element`.
734#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
735pub struct Span {
736    /// Index of the first the byte
737    pub start: usize,
738
739    /// Index one past the last byte
740    pub end: usize,
741}
742
743impl TryFrom<&json_tools::Span> for Span {
744    type Error = TryFromIntError;
745
746    fn try_from(span: &json_tools::Span) -> Result<Self, Self::Error> {
747        let json_tools::Span { first, end } = span;
748        let start = usize::try_from(*first)?;
749        let end = usize::try_from(*end)?;
750        Ok(Span { start, end })
751    }
752}
753
754struct Stack<'buf>(Vec<PartialElement<'buf>>);
755
756enum Pop<'buf> {
757    /// An [`Element`] has been created and added to it's parent [`Element`].
758    Element {
759        kind: ValueKind,
760        parent_path: PathNodeRef<'buf>,
761    },
762
763    /// The parse has completed creating the tree of [`Element`]s.
764    Complete(Element<'buf>),
765}
766
767impl<'buf> Stack<'buf> {
768    fn new() -> Self {
769        Self(vec![])
770    }
771
772    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
773    /// stack `Vec` contains it.
774    fn pop_head(&mut self) -> Option<PartialElement<'buf>> {
775        self.0.pop()
776    }
777
778    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
779    /// stack `Vec` contains it.
780    fn push_head(&mut self, head: PartialElement<'buf>) {
781        self.0.push(head);
782    }
783
784    /// Convert the head `PartialElement` into an `Element` using the parent to form the path.
785    fn head_into_element(&mut self, head: PartialElement<'buf>, token: &Token) -> Pop<'buf> {
786        let elem = head.into_element(token);
787
788        if let Some(parent) = self.0.last_mut() {
789            let event = Pop::Element {
790                kind: elem.value.kind(),
791                parent_path: elem.path_node(),
792            };
793            parent.elements.push(elem);
794            event
795        } else {
796            Pop::Complete(elem)
797        }
798    }
799
800    fn push_new_object(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
801        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Object);
802    }
803
804    fn push_new_array(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
805        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Array);
806    }
807
808    fn push_new_elem(
809        &mut self,
810        elem_id: ElemId,
811        parent_path: PathNodeRef<'buf>,
812        token: &Token,
813        elem_type: ObjectKind,
814    ) {
815        let partial = PartialElement {
816            elements: vec![],
817            elem_type,
818            path: parent_path,
819            span_start: token.span.start,
820            elem_id,
821        };
822        self.0.push(partial);
823    }
824}
825
826/// A parsing Error that keeps track of the token being parsed when the Error occurred and
827/// the slice of JSON surrounding the Error location.
828pub struct Error(Box<ErrorImpl>);
829
830impl fmt::Debug for Error {
831    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
832        fmt::Debug::fmt(&self.0, f)
833    }
834}
835
836impl From<ErrorImpl> for Error {
837    fn from(err: ErrorImpl) -> Self {
838        Self(err.into())
839    }
840}
841
842struct ErrorImpl {
843    /// The kind of error that occurred.
844    kind: ErrorKind,
845
846    /// The location the [`Error`] happened in the source code.
847    loc: &'static std::panic::Location<'static>,
848
849    /// The path to the [`Element`] the error occurred in.
850    path: Path,
851
852    /// The span of the JSON string the error occurred in.
853    span: Span,
854
855    /// The token being parsed at the time of the [`Error`].
856    token: Option<Token>,
857}
858
859impl fmt::Debug for ErrorImpl {
860    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
861        f.debug_struct("Error")
862            .field("kind", &self.kind)
863            .field("loc", &self.loc)
864            .field("path", &self.path)
865            .field("span", &self.span)
866            .field("token", &self.token)
867            .finish()
868    }
869}
870
871impl Error {
872    /// The kind of error that occurred.
873    pub fn kind(&self) -> &ErrorKind {
874        &self.0.kind
875    }
876
877    /// The path to the element the error occurred in.
878    pub fn path(&self) -> &Path {
879        &self.0.path
880    }
881
882    /// The span of the [`Element`] the error occurred in.
883    pub fn span(&self) -> Span {
884        self.0.span
885    }
886
887    pub fn token(&self) -> Option<&Token> {
888        self.0.token.as_ref()
889    }
890
891    /// Break the Error into it's constituent parts.
892    pub fn into_parts(self) -> (ErrorKind, Path, Span) {
893        let ErrorImpl {
894            kind,
895            loc: _,
896            path,
897            span,
898            token: _,
899        } = *self.0;
900        (kind, path, span)
901    }
902
903    /// Convert the Error into a more comprehensive report using the source JSON to provide
904    /// human readable context.
905    pub fn into_report(self, json: &str) -> ErrorReport<'_> {
906        ErrorReport::from_error(self, json)
907    }
908}
909
910/// A more comprehensive report on the [`Error`] using the source JSON `&str` to provide
911/// human readable context.
912#[derive(Debug)]
913pub struct ErrorReport<'buf> {
914    /// The [`Error`] that occurred.
915    error: Error,
916
917    /// The slice of JSON as defined by the `Error::span`.
918    json_context: &'buf str,
919
920    /// The slice of JSON as defined by the `Error::span` and expanded out to the
921    /// start and end of the line.
922    expanded_json_context: &'buf str,
923
924    /// The line and col indices of the start and end of the span.
925    span_bounds: SpanBounds,
926}
927
928impl<'buf> ErrorReport<'buf> {
929    /// Create the `ErrorReport` from the `Error` and source `&str`.
930    fn from_error(error: Error, json: &'buf str) -> Self {
931        let span = error.span();
932        debug!(?error, ?span, json, "from_error");
933        let json_context = &json.get(span.start..span.end).unwrap_or(json);
934
935        let start = {
936            let s = &json.get(0..span.start).unwrap_or_default();
937            line_col(s)
938        };
939        let end = {
940            let relative_end = line_col(json_context);
941            let line = start.line + relative_end.line;
942
943            if start.line == line {
944                LineCol {
945                    line,
946                    col: start.col + relative_end.col,
947                }
948            } else {
949                LineCol {
950                    line,
951                    col: relative_end.col,
952                }
953            }
954        };
955        let (prev, next) = find_expanded_newlines(json, span.start);
956        let expanded_json_context = &json.get(prev..next).unwrap_or(json_context);
957
958        let span_bounds = SpanBounds { start, end };
959
960        Self {
961            error,
962            json_context,
963            expanded_json_context,
964            span_bounds,
965        }
966    }
967
968    /// Return the slice of JSON as defined by the `Error::span`.
969    pub fn json_context(&self) -> &'buf str {
970        self.json_context
971    }
972
973    /// Return the slice of JSON as defined by the `Error::span` and expanded out to the
974    /// start and end of the line.
975    pub fn expand_json_context(&self) -> &'buf str {
976        self.expanded_json_context
977    }
978
979    /// Return the line and col number of each end of the span
980    pub fn span_bounds(&self) -> &SpanBounds {
981        &self.span_bounds
982    }
983
984    /// Discard the `Report` and take ownership of the `Error`.
985    pub fn into_error(self) -> Error {
986        self.error
987    }
988}
989
990fn find_expanded_newlines(json: &str, byte_index: usize) -> (usize, usize) {
991    let pre = json.get(..byte_index).unwrap_or(json);
992    let post = json.get(byte_index..).unwrap_or(json);
993
994    let mut bytes = pre.as_bytes().iter().rev();
995    let prev = pre.len() - bytes.position(|b| *b == b'\n').unwrap_or_default();
996
997    let mut bytes = post.as_bytes().iter();
998    let next = bytes
999        .position(|b| *b == b'\n')
1000        .map(|idx| idx + byte_index)
1001        .unwrap_or(prev + post.len());
1002
1003    (prev, next)
1004}
1005
1006/// The line and col indices of the start and end of the span.
1007#[derive(Clone, Debug)]
1008pub struct SpanBounds {
1009    /// The start of the `Span` expressed as line and column index.
1010    pub start: LineCol,
1011
1012    /// The end of the `Span` expressed as line and column index.
1013    pub end: LineCol,
1014}
1015
1016/// A file location expressed as line and column.
1017#[derive(Clone, Debug)]
1018pub struct LineCol {
1019    /// The line index is 0 based.
1020    pub line: u32,
1021
1022    /// The col index is 0 based.
1023    pub col: u32,
1024}
1025
1026impl From<(u32, u32)> for LineCol {
1027    fn from(value: (u32, u32)) -> Self {
1028        Self {
1029            line: value.0,
1030            col: value.1,
1031        }
1032    }
1033}
1034
1035impl From<LineCol> for (u32, u32) {
1036    fn from(value: LineCol) -> Self {
1037        (value.line, value.col)
1038    }
1039}
1040
1041impl PartialEq<(u32, u32)> for LineCol {
1042    fn eq(&self, other: &(u32, u32)) -> bool {
1043        self.line == other.0 && self.col == other.1
1044    }
1045}
1046
1047impl fmt::Display for LineCol {
1048    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1049        write!(f, "{}:{}", self.line, self.col)
1050    }
1051}
1052
1053/// Return the line and column indices of the end of the slice.
1054///
1055/// The line and column indices are zero based.
1056pub fn line_col(s: &str) -> LineCol {
1057    let mut chars = s.chars().rev();
1058    let mut line = 0;
1059    let mut col = 0;
1060
1061    // The col only needs to be calculated on the final line so we iterate from the last char
1062    // back to the start of the line and then only continue to count the lines after that.
1063    //
1064    // This is less work than continuously counting chars from the front of the slice.
1065    for c in chars.by_ref() {
1066        // If the `&str` is multiline, we count the line and stop accumulating the col count too.
1067        if c == '\n' {
1068            line += 1;
1069            break;
1070        }
1071        col += 1;
1072    }
1073
1074    // The col is now known, continue to the start of the str counting newlines as we go.
1075    for c in chars {
1076        if c == '\n' {
1077            line += 1;
1078        }
1079    }
1080
1081    LineCol { line, col }
1082}
1083
1084/// An error that has yet to be resolved with a [`Span`].
1085#[derive(Debug)]
1086pub struct PartialError {
1087    /// The location the [`PartialError`] happened in the source code.
1088    kind: ErrorKind,
1089
1090    /// The location the [`PartialError`] happened in the source code.
1091    loc: &'static std::panic::Location<'static>,
1092
1093    /// The token being parsed at the time of the [`PartialError`].
1094    token: Option<Token>,
1095}
1096
1097/// Convert a [`PartialError`] into an [`Error`] by providing a [`PartialElement`].
1098trait PartialIntoError<T> {
1099    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1100    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error>;
1101
1102    /// Converts a [`PartialError`] into an [`Error`] with a root path.
1103    ///
1104    /// This can be used If the path is unknown or the [`Error`] occurred at the root.
1105    fn with_root_path(self) -> Result<T, Error>;
1106}
1107
1108impl<T> PartialIntoError<T> for Result<T, PartialError> {
1109    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error> {
1110        match self {
1111            Ok(v) => Ok(v),
1112            Err(err) => Err(err.with_head(head)),
1113        }
1114    }
1115
1116    fn with_root_path(self) -> Result<T, Error> {
1117        match self {
1118            Ok(v) => Ok(v),
1119            Err(err) => Err(err.with_root_path()),
1120        }
1121    }
1122}
1123
1124impl PartialError {
1125    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1126    fn with_head(self, parent: &PartialElement<'_>) -> Error {
1127        let Self { loc, kind, token } = self;
1128        let span_end = token.map(|t| t.span.end).unwrap_or_default();
1129
1130        let (path, span) = if let Some(elem) = parent.elements.last() {
1131            (
1132                Path::from_node(Arc::clone(&elem.path_node)),
1133                Span {
1134                    start: elem.span.start,
1135                    end: span_end,
1136                },
1137            )
1138        } else {
1139            (
1140                Path::from_node(Arc::clone(&parent.path)),
1141                Span {
1142                    start: parent.span_start,
1143                    end: span_end,
1144                },
1145            )
1146        };
1147
1148        ErrorImpl {
1149            kind,
1150            loc,
1151            path,
1152            span,
1153            token,
1154        }
1155        .into()
1156    }
1157
1158    /// Converts a `PartialError` into an `Error` with a root path.
1159    ///
1160    /// This can be used If the path is unknown or the `Error` occurred at the root.
1161    pub fn with_root_path(self) -> Error {
1162        let Self { loc, kind, token } = self;
1163        let (span_start, span_end) = match (&kind, token) {
1164            (ErrorKind::UnexpectedToken, Some(t)) => (t.span.start, t.span.end),
1165            (_, Some(t)) => (0, t.span.end),
1166            (_, None) => (0, 0),
1167        };
1168        ErrorImpl {
1169            loc,
1170            kind,
1171            path: Path::root(),
1172            span: Span {
1173                start: span_start,
1174                end: span_end,
1175            },
1176            token,
1177        }
1178        .into()
1179    }
1180}
1181
1182/// The kind of Errors that can occur while parsing JSON.
1183#[derive(Debug)]
1184pub enum ErrorKind {
1185    /// An internal programming error.
1186    Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
1187
1188    /// The `Lexer` had no more tokens when more were expected.
1189    UnexpectedEOF,
1190
1191    /// An unexpected token was emitted by the `Lexer`.
1192    UnexpectedToken,
1193}
1194
1195impl ErrorKind {
1196    #[track_caller]
1197    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1198        PartialError {
1199            kind: self,
1200            loc: std::panic::Location::caller(),
1201            token,
1202        }
1203    }
1204
1205    #[track_caller]
1206    pub fn into_partial_error_without_token(self) -> PartialError {
1207        PartialError {
1208            kind: self,
1209            loc: std::panic::Location::caller(),
1210            token: None,
1211        }
1212    }
1213}
1214
1215impl std::error::Error for Error {}
1216
1217impl fmt::Display for Error {
1218    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1219        let ErrorImpl {
1220            kind,
1221            loc,
1222            path,
1223            span: _,
1224            token,
1225        } = &*self.0;
1226
1227        write!(
1228            f,
1229            "Error: code location: {loc}; while parsing element at `{path}`"
1230        )?;
1231
1232        if let Some(token) = token {
1233            write!(f, " token: `{}`", token.kind)?;
1234        }
1235
1236        match kind {
1237            ErrorKind::Internal(err) => write!(f, "Internal: {err}"),
1238            ErrorKind::UnexpectedEOF => f.write_str("Unexpected EOF"),
1239            ErrorKind::UnexpectedToken => write!(f, "unexpected token"),
1240        }
1241    }
1242}
1243
1244/// A programming Error resulting from fautly logic.
1245///
1246/// This should not be exposed on the public API.
1247#[derive(Debug)]
1248enum InternalError {
1249    /// Slicing into the JSON buf failed.
1250    ///
1251    /// This should not happen during parsing, as the parsing and `Span` calculations are all
1252    /// contained within the same callstack of functions.
1253    ///
1254    /// This can only happen if there's a mistake in the `Span` offset/range calculations.
1255    BufferSlice(Span),
1256
1257    /// The type of `Buffer` is invalid.
1258    ///
1259    /// The `json_tools::Lexer::next` is called in a few places and the `json_tools::Token` it
1260    /// emits is converted into a local `Token` with only a `Span` based buffer to avoid checking
1261    /// the buffer type each time it's used.
1262    ///
1263    /// The lexer is configured to only use a `Span` based buffer so the only way this Error can
1264    /// occur is if the code is changed so that the lexer uses a `String` based buffer.
1265    BufferType,
1266
1267    /// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1268    /// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1269    /// converted to the locally defined `Span` that uses `usize` based fields.
1270    ///
1271    /// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1272    FromInt(TryFromIntError),
1273
1274    /// A String was parsed without surrounding double quotes.
1275    ///
1276    /// This is only possible if the `json_tools` crate changes the implementation details of
1277    /// how they parse JSON strings.
1278    StringWithoutQuotes,
1279
1280    /// A `RawStr` was made using a token that is not a `String`.
1281    ///
1282    /// `RawStr`s are only creatable from inside the crate so the only way this can occur is
1283    /// through a programming error.
1284    RawStringFromInvalidToken,
1285}
1286
1287impl InternalError {
1288    #[track_caller]
1289    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1290        ErrorKind::Internal(Box::new(self)).into_partial_error(token)
1291    }
1292}
1293
1294impl std::error::Error for InternalError {}
1295
1296/// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1297/// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1298/// converted to the locally defined `Span` that uses `usize` based fields.
1299///
1300/// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1301impl From<TryFromIntError> for InternalError {
1302    fn from(err: TryFromIntError) -> Self {
1303        InternalError::FromInt(err)
1304    }
1305}
1306
1307impl From<InternalError> for Error {
1308    #[track_caller]
1309    fn from(err: InternalError) -> Self {
1310        ErrorImpl {
1311            kind: ErrorKind::Internal(Box::new(err)),
1312            loc: std::panic::Location::caller(),
1313            path: Path::root(),
1314            span: Span { start: 0, end: 0 },
1315            token: None,
1316        }
1317        .into()
1318    }
1319}
1320
1321impl fmt::Display for InternalError {
1322    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1323        match self {
1324            InternalError::BufferSlice(span) => {
1325                write!(f, "Slice into buffer failed; span: {span:?}")
1326            }
1327            InternalError::BufferType => write!(f, "The tokens buffer is not a `Span`"),
1328            InternalError::FromInt(err) => write!(f, "{err}"),
1329            InternalError::StringWithoutQuotes => {
1330                write!(f, "A String was parsed without surrounding double quotes.")
1331            }
1332
1333            InternalError::RawStringFromInvalidToken => {
1334                write!(
1335                    f,
1336                    "A `RawString` was created using a `Token` that's not a `String`"
1337                )
1338            }
1339        }
1340    }
1341}
1342
1343trait InternalErrorIntoPartial<T> {
1344    #[track_caller]
1345    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1346    where
1347        F: FnOnce() -> Token;
1348}
1349
1350impl<T> InternalErrorIntoPartial<T> for Result<T, InternalError> {
1351    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1352    where
1353        F: FnOnce() -> Token,
1354    {
1355        match self {
1356            Ok(v) => Ok(v),
1357            Err(err) => {
1358                let token = f();
1359                Err(err.into_partial_error(Some(token)))
1360            }
1361        }
1362    }
1363}
1364
1365/// Create the `Span` of an `Element` given the start and the closing token.
1366fn element_span(token_end: &Token, start: usize) -> Span {
1367    Span {
1368        start,
1369        end: token_end.span.end,
1370    }
1371}
1372
1373/// Return the content of the `Token` as a `&str`.
1374///
1375/// This in only useful for `Token`'s that contain variable data, such as `String`, `Number` etc.
1376#[track_caller]
1377fn token_str<'buf>(json: &'buf str, token: &Token) -> Result<&'buf str, PartialError> {
1378    let start = token.span.start;
1379    let end = token.span.end;
1380    let s = &json
1381        .get(start..end)
1382        .ok_or(InternalError::BufferSlice(Span { start, end }))
1383        .into_partial_error(|| *token)?;
1384    Ok(s)
1385}
1386
1387/// A `&str` with surrounding quotes removed and it hasn't been analyzed for escapes codes.
1388#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Default)]
1389pub struct RawStr<'buf> {
1390    /// An unescaped `&str` with surrounding quotes removed.
1391    source: &'buf str,
1392
1393    /// The `String` token that produced the source `&str`.
1394    span: Span,
1395}
1396
1397/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1398impl Borrow<str> for RawStr<'_> {
1399    fn borrow(&self) -> &str {
1400        self.source
1401    }
1402}
1403
1404/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1405impl Borrow<str> for &RawStr<'_> {
1406    fn borrow(&self) -> &str {
1407        self.source
1408    }
1409}
1410
1411impl<'buf> RawStr<'buf> {
1412    pub(super) fn from_str(source: &'buf str, span: Span) -> Self {
1413        Self { source, span }
1414    }
1415
1416    /// Create new `RawStr` from a string with surrounding quotes.
1417    #[track_caller]
1418    pub(super) fn from_quoted_str(
1419        s: &'buf str,
1420        token: Token,
1421    ) -> Result<RawStr<'buf>, PartialError> {
1422        const QUOTE: char = '"';
1423
1424        if token.kind != TokenType::String {
1425            return Err(InternalError::RawStringFromInvalidToken.into_partial_error(Some(token)));
1426        }
1427
1428        // remove double quotes
1429        let (_, s) = s
1430            .split_once(QUOTE)
1431            .ok_or(InternalError::StringWithoutQuotes)
1432            .into_partial_error(|| token)?;
1433
1434        let (source, _) = s
1435            .rsplit_once(QUOTE)
1436            .ok_or(InternalError::StringWithoutQuotes)
1437            .into_partial_error(|| token)?;
1438
1439        Ok(Self {
1440            source,
1441            span: token.span,
1442        })
1443    }
1444
1445    /// Return the raw unescaped `&str`.
1446    pub(crate) fn as_raw(&self) -> &'buf str {
1447        self.source
1448    }
1449
1450    /// Return the `&str` with all escapes decoded.
1451    pub(crate) fn decode_escapes(
1452        &self,
1453        elem: &Element<'buf>,
1454    ) -> Caveat<Cow<'_, str>, decode::WarningKind> {
1455        unescape_str(self.source, elem)
1456    }
1457
1458    /// Return a `&str` marked as either having escapes or not.
1459    pub(crate) fn has_escapes(
1460        &self,
1461        elem: &Element<'buf>,
1462    ) -> Caveat<decode::PendingStr<'_>, decode::WarningKind> {
1463        decode::analyze(self.source, elem)
1464    }
1465
1466    /// Return the [`Span`] of the [`Token`] that generated this string.
1467    pub fn span(&self) -> Span {
1468        self.span
1469    }
1470}
1471
1472impl fmt::Display for RawStr<'_> {
1473    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1474        fmt::Display::fmt(self.source, f)
1475    }
1476}
1477
1478#[track_caller]
1479fn token_str_as_string(json: &str, token: Token) -> Result<RawStr<'_>, PartialError> {
1480    let s = token_str(json, &token)?;
1481    let raw = RawStr::from_quoted_str(s, token)?;
1482    Ok(raw)
1483}
1484
1485#[cfg(test)]
1486mod test_raw_str {
1487    use assert_matches::assert_matches;
1488
1489    use crate::test;
1490
1491    use super::{ErrorKind, InternalError, RawStr, Span, Token, TokenType};
1492
1493    #[test]
1494    fn should_fail_to_create_raw_str_from_non_string_token() {
1495        test::setup();
1496
1497        let err = RawStr::from_quoted_str("fail", TokenType::Number.into()).unwrap_err();
1498        let err = assert_matches!(err.kind, ErrorKind::Internal(err) => err);
1499        let err = err.downcast_ref::<InternalError>().unwrap();
1500        assert_matches!(err, InternalError::RawStringFromInvalidToken);
1501    }
1502
1503    #[test]
1504    fn should_fail_to_create_raw_str_from_string_without_quotes() {
1505        test::setup();
1506
1507        let err = RawStr::from_quoted_str("fail", TokenType::String.into()).unwrap_err();
1508        let err = assert_matches!(err.kind, ErrorKind::Internal(err) => err);
1509        let err = err.downcast_ref::<InternalError>().unwrap();
1510        assert_matches!(err, InternalError::StringWithoutQuotes);
1511    }
1512
1513    impl From<TokenType> for Token {
1514        fn from(kind: TokenType) -> Self {
1515            Self {
1516                kind,
1517                span: Span::default(),
1518            }
1519        }
1520    }
1521}
1522
1523#[cfg(test)]
1524mod test_line_col {
1525    use super::{line_col, LineCol};
1526
1527    #[test]
1528    fn should_line_col_empty_str() {
1529        let json = "";
1530        let LineCol { line, col } = line_col(json);
1531        assert_eq!(line, 0);
1532        assert_eq!(col, 0);
1533    }
1534
1535    #[test]
1536    fn should_line_col_one_line_one_char_str() {
1537        let json = "1";
1538        let LineCol { line, col } = line_col(json);
1539        assert_eq!(line, 0);
1540        assert_eq!(col, 1);
1541    }
1542
1543    #[test]
1544    fn should_line_col_one_line_many_chars_str() {
1545        let json = "1234";
1546        let LineCol { line, col } = line_col(json);
1547        assert_eq!(line, 0);
1548        assert_eq!(col, 4);
1549    }
1550
1551    #[test]
1552    fn should_line_col_two_line_one_col_str() {
1553        let json = "1234\n1";
1554        let LineCol { line, col } = line_col(json);
1555        assert_eq!(line, 1);
1556        assert_eq!(col, 1);
1557    }
1558}
1559
1560#[cfg(test)]
1561mod test_parser {
1562    use assert_matches::assert_matches;
1563
1564    use crate::{
1565        json::{PathNode, ValueKind},
1566        test,
1567    };
1568
1569    use super::{Event, ObjectKind, Parser};
1570
1571    #[test]
1572    fn should_emit_events_for_object_with_single_field() {
1573        const JSON: &str = r#"{
1574    "field_a": 404
1575}"#;
1576
1577        test::setup();
1578
1579        let mut parser = Parser::new(JSON);
1580        let event = parser.next().unwrap().unwrap();
1581
1582        let path = assert_matches!(
1583            event,
1584            Event::Open {
1585                kind: ObjectKind::Object,
1586                parent_path
1587            }
1588             => parent_path
1589        );
1590        assert_matches!(*path, PathNode::Root);
1591
1592        let event = parser.next().unwrap().unwrap();
1593
1594        let path = assert_matches!(
1595            event,
1596            Event::Element {
1597                kind: ValueKind::Number,
1598                parent_path
1599            }
1600             => parent_path
1601        );
1602
1603        assert_eq!(*path, "$.field_a");
1604    }
1605
1606    #[test]
1607    fn should_emit_events_for_object_with_two_fields() {
1608        const JSON: &str = r#"{
1609    "field_a": 404,
1610    "field_b": "name"
1611}"#;
1612
1613        test::setup();
1614
1615        let mut parser = Parser::new(JSON);
1616        let event = parser.next().unwrap().unwrap();
1617
1618        let path = assert_matches!(
1619            event,
1620            Event::Open {
1621                kind: ObjectKind::Object,
1622                parent_path
1623            }
1624             => parent_path
1625        );
1626        assert_matches!(*path, PathNode::Root);
1627
1628        let event = parser.next().unwrap().unwrap();
1629
1630        let path = assert_matches!(
1631            event,
1632            Event::Element {
1633                kind: ValueKind::Number,
1634                parent_path
1635            }
1636             => parent_path
1637        );
1638
1639        assert_eq!(*path, "$.field_a");
1640
1641        let event = parser.next().unwrap().unwrap();
1642
1643        let path = assert_matches!(
1644            event,
1645            Event::Element {
1646                kind: ValueKind::String,
1647                parent_path
1648            }
1649             => parent_path
1650        );
1651
1652        assert_eq!(*path, "$.field_b");
1653    }
1654
1655    #[test]
1656    fn should_emit_events_for_object_with_nested_fields() {
1657        const JSON: &str = r#"{
1658    "field_a": 404,
1659    "field_b": {
1660        "field_c": "name"
1661    }
1662}"#;
1663
1664        test::setup();
1665
1666        let mut parser = Parser::new(JSON);
1667        let event = parser.next().unwrap().unwrap();
1668
1669        let path = assert_matches!(
1670            event,
1671            Event::Open {
1672                kind: ObjectKind::Object,
1673                parent_path
1674            }
1675             => parent_path
1676        );
1677        assert_matches!(*path, PathNode::Root);
1678
1679        let event = parser.next().unwrap().unwrap();
1680
1681        let path = assert_matches!(
1682            event,
1683            Event::Element {
1684                kind: ValueKind::Number,
1685                parent_path
1686            }
1687             => parent_path
1688        );
1689
1690        assert_eq!(*path, "$.field_a");
1691
1692        let event = parser.next().unwrap().unwrap();
1693
1694        let path = assert_matches!(
1695            event,
1696            Event::Open {
1697                kind: ObjectKind::Object,
1698                parent_path
1699            }
1700             => parent_path
1701        );
1702        assert_eq!(*path, "$.field_b");
1703
1704        let event = parser.next().unwrap().unwrap();
1705
1706        let path = assert_matches!(
1707            event,
1708            Event::Element {
1709                kind: ValueKind::String,
1710                parent_path
1711            }
1712             => parent_path
1713        );
1714
1715        assert_eq!(*path, "$.field_b.field_c");
1716    }
1717
1718    #[test]
1719    fn should_emit_events_for_array_with_single_field() {
1720        const JSON: &str = r#"["field_a"]"#;
1721
1722        test::setup();
1723
1724        let mut parser = Parser::new(JSON);
1725        let event = parser.next().unwrap().unwrap();
1726
1727        let path = assert_matches!(
1728            event,
1729            Event::Open {
1730                kind: ObjectKind::Array,
1731                parent_path
1732            }
1733             => parent_path
1734        );
1735        assert_matches!(*path, PathNode::Root);
1736
1737        let event = parser.next().unwrap().unwrap();
1738
1739        let path = assert_matches!(
1740            event,
1741            Event::Element {
1742                kind: ValueKind::String,
1743                parent_path
1744            }
1745             => parent_path
1746        );
1747
1748        assert_eq!(*path, "$.0");
1749    }
1750
1751    #[test]
1752    fn should_emit_events_for_array_with_two_fields() {
1753        const JSON: &str = r#"{
1754    "field_a": 404,
1755    "field_b": "name"
1756}"#;
1757
1758        test::setup();
1759
1760        let mut parser = Parser::new(JSON);
1761        let event = parser.next().unwrap().unwrap();
1762
1763        let path = assert_matches!(
1764            event,
1765            Event::Open {
1766                kind: ObjectKind::Object,
1767                parent_path
1768            }
1769             => parent_path
1770        );
1771        assert_matches!(*path, PathNode::Root);
1772
1773        let event = parser.next().unwrap().unwrap();
1774
1775        let path = assert_matches!(
1776            event,
1777            Event::Element {
1778                kind: ValueKind::Number,
1779                parent_path
1780            }
1781             => parent_path
1782        );
1783
1784        assert_eq!(*path, "$.field_a");
1785
1786        let event = parser.next().unwrap().unwrap();
1787
1788        let path = assert_matches!(
1789            event,
1790            Event::Element {
1791                kind: ValueKind::String,
1792                parent_path
1793            }
1794             => parent_path
1795        );
1796
1797        assert_eq!(*path, "$.field_b");
1798    }
1799
1800    #[test]
1801    fn should_emit_events_for_array_with_nested_fields() {
1802        const JSON: &str = r#"{
1803    "field_a": 404,
1804    "field_b": {
1805        "field_c": "name"
1806    }
1807}"#;
1808
1809        test::setup();
1810
1811        let mut parser = Parser::new(JSON);
1812        let event = parser.next().unwrap().unwrap();
1813
1814        let path = assert_matches!(
1815            event,
1816            Event::Open {
1817                kind: ObjectKind::Object,
1818                parent_path
1819            }
1820             => parent_path
1821        );
1822        assert_matches!(*path, PathNode::Root);
1823
1824        let event = parser.next().unwrap().unwrap();
1825
1826        let path = assert_matches!(
1827            event,
1828            Event::Element {
1829                kind: ValueKind::Number,
1830                parent_path
1831            }
1832             => parent_path
1833        );
1834
1835        assert_eq!(*path, "$.field_a");
1836
1837        let event = parser.next().unwrap().unwrap();
1838
1839        let path = assert_matches!(
1840            event,
1841            Event::Open {
1842                kind: ObjectKind::Object,
1843                parent_path
1844            }
1845             => parent_path
1846        );
1847        assert_eq!(*path, "$.field_b");
1848
1849        let event = parser.next().unwrap().unwrap();
1850
1851        let path = assert_matches!(
1852            event,
1853            Event::Element {
1854                kind: ValueKind::String,
1855                parent_path
1856            }
1857             => parent_path
1858        );
1859
1860        assert_eq!(*path, "$.field_b.field_c");
1861    }
1862}
1863
1864#[cfg(test)]
1865pub mod test {
1866    #![allow(clippy::string_slice, reason = "tests are allowed to panic")]
1867
1868    use super::{Error, ErrorKind, Span};
1869
1870    pub fn spanned_json(span: Span, json: &str) -> &str {
1871        &json[span.start..span.end]
1872    }
1873
1874    #[test]
1875    const fn error_should_be_send_and_sync() {
1876        const fn f<T: Send + Sync>() {}
1877
1878        f::<Error>();
1879        f::<ErrorKind>();
1880    }
1881}
1882
1883#[cfg(test)]
1884mod test_parser_basic_happy_structure {
1885    use assert_matches::assert_matches;
1886
1887    use crate::{json::Value, test};
1888
1889    use super::{parse, test::spanned_json, Element, PathNode};
1890
1891    #[test]
1892    fn should_parse_nested_object() {
1893        test::setup();
1894
1895        let json = r#"{ "field_a": "one", "field_b": { "field_ba": "two", "field_bb": "three" } }"#;
1896        let elem = parse(json).unwrap();
1897        let Element {
1898            path_node: path,
1899            value,
1900            span,
1901            id: _,
1902        } = elem;
1903
1904        assert_eq!(*path, PathNode::Root);
1905        assert_eq!(spanned_json(span, json), json);
1906
1907        let fields = assert_matches!(value, Value::Object(elems) => elems);
1908        let [field_a, field_b] = fields.try_into().unwrap();
1909
1910        {
1911            let (_id, path, span, value) = field_a.into_parts();
1912
1913            assert_eq!(*path, "$.field_a");
1914            assert_eq!(spanned_json(span, json), r#""one""#);
1915            let s = assert_matches!(value, Value::String(s) => s);
1916            assert_eq!(s.as_raw(), "one");
1917        }
1918
1919        {
1920            let (_id, path, span, value) = field_b.into_parts();
1921            assert_eq!(*path, "$.field_b");
1922            assert_eq!(
1923                spanned_json(span, json),
1924                r#"{ "field_ba": "two", "field_bb": "three" }"#
1925            );
1926
1927            let fields = assert_matches!(value, Value::Object(fields) => fields);
1928            let [field_b_a, field_b_b] = fields.try_into().unwrap();
1929
1930            {
1931                let (_id, path, span, value) = field_b_a.into_parts();
1932
1933                assert_eq!(spanned_json(span, json), r#""two""#);
1934                assert_eq!(*path, "$.field_b.field_ba");
1935                let s = assert_matches!(value, Value::String(s) => s);
1936                assert_eq!(s.as_raw(), "two");
1937            }
1938
1939            {
1940                let (_id, path, span, value) = field_b_b.into_parts();
1941
1942                assert_eq!(spanned_json(span, json), r#""three""#);
1943                assert_eq!(*path, "$.field_b.field_bb");
1944                let s = assert_matches!(value, Value::String(s) => s);
1945                assert_eq!(s.as_raw(), "three");
1946            }
1947        }
1948    }
1949
1950    #[test]
1951    fn should_parse_object_with_nested_array() {
1952        test::setup();
1953
1954        let json = r#"{ "field_a": "one", "field_b": [ "two", "three" ] }"#;
1955        let elem = parse(json).unwrap();
1956        let Element {
1957            path_node: path,
1958            value,
1959            span,
1960            id: _,
1961        } = elem;
1962
1963        assert_eq!(*path, PathNode::Root);
1964        assert_eq!(spanned_json(span, json), json);
1965
1966        let fields = assert_matches!(value, Value::Object(fields) => fields);
1967        let [field_a, field_b] = fields.try_into().unwrap();
1968
1969        {
1970            let (_id, path, span, value) = field_a.into_parts();
1971
1972            assert_eq!(spanned_json(span, json), r#""one""#);
1973            assert_eq!(*path, "$.field_a");
1974            let s = assert_matches!(value, Value::String(s) => s);
1975            assert_eq!(s.as_raw(), "one");
1976        }
1977
1978        {
1979            let (_id, path, span, value) = field_b.into_parts();
1980            assert_eq!(*path, "$.field_b");
1981            assert_eq!(spanned_json(span, json), r#"[ "two", "three" ]"#);
1982
1983            let elems = assert_matches!(value, Value::Array(elems) => elems);
1984            let [elem_b_a, elem_b_b] = elems.try_into().unwrap();
1985
1986            {
1987                let (_id, path, span, value) = elem_b_a.into_parts();
1988
1989                assert_eq!(spanned_json(span, json), r#""two""#);
1990                assert_eq!(*path, "$.field_b.0");
1991                let s = assert_matches!(value, Value::String(s) => s);
1992                assert_eq!(s.as_raw(), "two");
1993            }
1994
1995            {
1996                let (_id, path, span, value) = elem_b_b.into_parts();
1997
1998                assert_eq!(spanned_json(span, json), r#""three""#);
1999                assert_eq!(*path, "$.field_b.1");
2000                let s = assert_matches!(value, Value::String(s) => s);
2001                assert_eq!(s.as_raw(), "three");
2002            }
2003        }
2004    }
2005
2006    #[test]
2007    fn should_parse_nested_array() {
2008        test::setup();
2009
2010        let json = r#"[ "one", ["two", "three"] ]"#;
2011        let elem = parse(json).unwrap();
2012        let Element {
2013            path_node: path,
2014            value,
2015            span,
2016            id: _,
2017        } = elem;
2018
2019        assert_eq!(*path, PathNode::Root);
2020        assert_eq!(spanned_json(span, json), json);
2021
2022        let elems = assert_matches!(value, Value::Array(elems) => elems);
2023        let [elem_a, elem_b] = elems.try_into().unwrap();
2024
2025        {
2026            let Element {
2027                path_node: path,
2028                value,
2029                span,
2030                id: _,
2031            } = elem_a;
2032
2033            assert_eq!(spanned_json(span, json), r#""one""#);
2034            assert_eq!(*path, "$.0");
2035            let s = assert_matches!(value, Value::String(s) => s);
2036            assert_eq!(s.as_raw(), "one");
2037        }
2038
2039        {
2040            let Element {
2041                path_node: path,
2042                value,
2043                span,
2044                id: _,
2045            } = elem_b;
2046            assert_eq!(*path, "$.1");
2047            assert_eq!(spanned_json(span, json), r#"["two", "three"]"#);
2048
2049            let elems = assert_matches!(value, Value::Array(elems) => elems);
2050            let [elem_b_a, elem_b_b] = elems.try_into().unwrap();
2051
2052            {
2053                let Element {
2054                    path_node: path,
2055                    value,
2056                    span,
2057                    id: _,
2058                } = elem_b_a;
2059
2060                assert_eq!(spanned_json(span, json), r#""two""#);
2061                assert_eq!(*path, "$.1.0");
2062                let s = assert_matches!(value, Value::String(s) => s);
2063                assert_eq!(s.as_raw(), "two");
2064            }
2065
2066            {
2067                let Element {
2068                    path_node: path,
2069                    value,
2070                    span,
2071                    id: _,
2072                } = elem_b_b;
2073
2074                assert_eq!(spanned_json(span, json), r#""three""#);
2075                assert_eq!(*path, "$.1.1");
2076                let s = assert_matches!(value, Value::String(s) => s);
2077                assert_eq!(s.as_raw(), "three");
2078            }
2079        }
2080    }
2081
2082    #[test]
2083    fn should_parse_array_with_nested_object() {
2084        test::setup();
2085
2086        let json = r#"[ "one", {"field_a": "two", "field_b": "three"} ]"#;
2087        let elem = parse(json).unwrap();
2088        let Element {
2089            path_node: path,
2090            value,
2091            span,
2092            id: _,
2093        } = elem;
2094
2095        assert_eq!(*path, PathNode::Root);
2096        assert_eq!(spanned_json(span, json), json);
2097
2098        let elems = assert_matches!(value, Value::Array(elems) => elems);
2099        let [elem_a, elem_b] = elems.try_into().unwrap();
2100
2101        {
2102            let Element {
2103                path_node: path,
2104                value,
2105                span,
2106                id: _,
2107            } = elem_a;
2108
2109            assert_eq!(spanned_json(span, json), r#""one""#);
2110            assert_eq!(*path, "$.0");
2111            let s = assert_matches!(value, Value::String(s) => s);
2112            assert_eq!(s.as_raw(), "one");
2113        }
2114
2115        {
2116            let Element {
2117                path_node: path,
2118                value,
2119                span,
2120                id: _,
2121            } = elem_b;
2122            assert_eq!(*path, "$.1");
2123            assert_eq!(
2124                spanned_json(span, json),
2125                r#"{"field_a": "two", "field_b": "three"}"#
2126            );
2127
2128            let fields = assert_matches!(value, Value::Object(fields) => fields);
2129            let [field_b_a, field_b_b] = fields.try_into().unwrap();
2130
2131            {
2132                let (_id, path, span, value) = field_b_a.into_parts();
2133
2134                assert_eq!(spanned_json(span, json), r#""two""#);
2135                assert_eq!(*path, "$.1.field_a");
2136                let s = assert_matches!(value, Value::String(s) => s);
2137                assert_eq!(s.as_raw(), "two");
2138            }
2139
2140            {
2141                let (_id, path, span, value) = field_b_b.into_parts();
2142
2143                assert_eq!(spanned_json(span, json), r#""three""#);
2144                assert_eq!(*path, "$.1.field_b");
2145                let s = assert_matches!(value, Value::String(s) => s);
2146                assert_eq!(s.as_raw(), "three");
2147            }
2148        }
2149    }
2150}
2151
2152#[cfg(test)]
2153mod test_parser_error_reporting {
2154    #![allow(
2155        clippy::string_slice,
2156        clippy::as_conversions,
2157        reason = "panicking is tests is allowed"
2158    )]
2159
2160    use assert_matches::assert_matches;
2161
2162    use crate::test;
2163
2164    use super::{parse, ErrorKind, SpanBounds, TokenType};
2165
2166    #[test]
2167    fn should_report_trailing_comma() {
2168        const JSON: &str = r#"{
2169   "field_a": "one",
2170   "field_b": "two",
2171}"#;
2172
2173        test::setup();
2174
2175        let err = parse(JSON).unwrap_err();
2176
2177        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2178        assert_matches!(
2179            err.token().unwrap().kind,
2180            TokenType::Comma,
2181            "We are parsing a comma when we realize that it should not be there"
2182        );
2183        assert_eq!(*err.path(), "$.field_b");
2184
2185        let report = err.into_report(JSON);
2186
2187        assert_eq!(report.json_context(), r#""two","#);
2188        let SpanBounds { start, end } = report.span_bounds();
2189        assert_eq!(*start, (2, 14));
2190        assert_eq!(*end, (2, 20));
2191        assert_eq!(report.expand_json_context(), r#"   "field_b": "two","#);
2192    }
2193
2194    #[test]
2195    fn should_report_invalid_json() {
2196        const JSON: &str = r#"{
2197"field_"#;
2198
2199        test::setup();
2200
2201        let err = parse(JSON).unwrap_err();
2202
2203        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2204        assert_matches!(
2205            err.token().unwrap().kind,
2206            TokenType::Invalid,
2207            "We are parsing a string not ended be a double quote"
2208        );
2209        assert_eq!(*err.path(), "$");
2210
2211        let report = err.into_report(JSON);
2212
2213        assert_eq!(report.json_context(), r#""field_"#);
2214        let SpanBounds { start, end } = report.span_bounds();
2215        assert_eq!(*start, (1, 0));
2216        assert_eq!(*end, (1, 7));
2217        assert_eq!(report.expand_json_context(), r#""field_"#);
2218    }
2219
2220    #[test]
2221    fn should_report_invalid_json_in_some_place() {
2222        const JSON: &str = r#"{
2223"field_a": "Barney",
2224"field_"#;
2225
2226        test::setup();
2227
2228        let err = parse(JSON).unwrap_err();
2229
2230        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2231        assert_matches!(
2232            err.token().unwrap().kind,
2233            TokenType::Invalid,
2234            "We are parsing a string not ended be a double quote"
2235        );
2236        assert_eq!(*err.path(), "$");
2237
2238        let report = err.into_report(JSON);
2239
2240        assert_eq!(report.json_context(), r#""field_"#);
2241        let SpanBounds { start, end } = report.span_bounds();
2242        assert_eq!(*start, (2, 0));
2243        assert_eq!(*end, (2, 7));
2244        assert_eq!(report.expand_json_context(), r#""field_"#);
2245    }
2246
2247    #[test]
2248    fn should_report_invalid_json_in_some_place_in_the_middle() {
2249        const JSON: &str = r#"{
2250"field_a": "Barney",
2251"field_b",
2252"field_c": "Fred" }
2253"#;
2254
2255        test::setup();
2256
2257        let err = parse(JSON).unwrap_err();
2258
2259        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2260        assert_matches!(
2261            err.token().unwrap().kind,
2262            TokenType::Comma,
2263            "We are parsing a key value pair but the key is followed by comma."
2264        );
2265        assert_eq!(*err.path(), "$.field_a");
2266
2267        let report = err.into_report(JSON);
2268
2269        assert_eq!(
2270            report.json_context(),
2271            r#""Barney",
2272"field_b","#
2273        );
2274        let SpanBounds { start, end } = report.span_bounds();
2275        assert_eq!(*start, (1, 11));
2276        assert_eq!(*end, (2, 10));
2277        assert_eq!(report.expand_json_context(), r#""field_a": "Barney","#);
2278    }
2279
2280    #[test]
2281    fn should_report_missing_comma() {
2282        const JSON: &str = r#"{
2283   "field_a": "one"
2284   "field_b": "two"
2285}"#;
2286
2287        test::setup();
2288
2289        let err = parse(JSON).unwrap_err();
2290
2291        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2292        assert_matches!(
2293            err.token().unwrap().kind,
2294            TokenType::String,
2295            "We are parsing a String when we realize that there should be a comma"
2296        );
2297        assert_eq!(*err.path(), "$.field_a");
2298
2299        let report = err.into_report(JSON);
2300
2301        assert_eq!(
2302            report.json_context(),
2303            r#""one"
2304   "field_b""#
2305        );
2306        let SpanBounds { start, end } = report.span_bounds();
2307        assert_eq!(*start, (1, 14));
2308        assert_eq!(*end, (2, 12));
2309        assert_eq!(report.expand_json_context(), r#"   "field_a": "one""#);
2310    }
2311}
2312
2313#[cfg(test)]
2314mod test_type_sizes {
2315    use std::mem::size_of;
2316
2317    use super::{
2318        Element, Error, ErrorImpl, PartialElement, Path, PathNode, PathNodeRef, RawStr, Span,
2319        Token, TokenType, Value,
2320    };
2321
2322    #[test]
2323    #[cfg(target_pointer_width = "64")]
2324    fn should_match_sizes() {
2325        assert_eq!(size_of::<Element<'_>>(), 72);
2326        assert_eq!(size_of::<Error>(), 8);
2327        assert_eq!(size_of::<ErrorImpl>(), 96);
2328        assert_eq!(size_of::<PartialElement<'_>>(), 56);
2329        assert_eq!(size_of::<Path>(), 24);
2330        assert_eq!(size_of::<PathNode<'_>>(), 48);
2331        assert_eq!(size_of::<PathNodeRef<'_>>(), 8);
2332        assert_eq!(size_of::<RawStr<'_>>(), 32);
2333        assert_eq!(size_of::<Span>(), 16);
2334        assert_eq!(size_of::<Token>(), 24);
2335        assert_eq!(size_of::<TokenType>(), 1);
2336        assert_eq!(size_of::<Value<'_>>(), 40);
2337    }
2338}