ocpi_tariffs/json/
parser.rs

1use std::borrow::{Borrow, Cow};
2use std::iter::Peekable;
3use std::num::TryFromIntError;
4use std::str::Bytes;
5use std::sync::atomic::AtomicUsize;
6use std::{fmt, rc::Rc};
7
8use json_tools::{Buffer, BufferType};
9use tracing::{debug, trace};
10
11use crate::Caveat;
12
13use super::{
14    decode::{self, unescape_str},
15    Element, Field, ObjectKind, PathNode, PathNodeRef, Value, ValueKind,
16};
17use super::{ElemId, Path};
18
19/// We peek at the next `Token` when asserting on trailing commas.
20type Lexer<'buf> = Peekable<json_tools::Lexer<Bytes<'buf>>>;
21
22/// Parse the JSON into a tree of [`Element`]s.
23pub fn parse(json: &str) -> Result<Element<'_>, Error> {
24    let parser = Parser::new(json);
25
26    // When just parsing the JSON into an `Element` we only care about the final event
27    // when the JSON has been completely transformed into a root element.
28    for event in parser {
29        if let Event::Complete(element) = event? {
30            return Ok(element);
31        }
32    }
33
34    Err(ErrorKind::UnexpectedEOF
35        .into_partial_error_without_token()
36        .with_root_path())
37}
38
39/// A parsing event emitted for each call to the `<Parser as Iterator>::next` function.
40#[derive(Debug)]
41pub enum Event<'buf> {
42    /// An [`Element`] has been opened and it's construction is in progerss.
43    Open {
44        kind: ObjectKind,
45        parent_path: PathNodeRef<'buf>,
46    },
47
48    /// An [`Element`] has been created and added to it's parent [`Element`].
49    ///
50    /// If the kind is `Array` or `Object` that means that this element is closed: it's construction is complete.
51    Element {
52        /// The kind of JSON value the [`Element`] is.
53        kind: ValueKind,
54        /// The path to the parent [`Element`].
55        parent_path: PathNodeRef<'buf>,
56    },
57
58    /// The parse has completed creating the tree of [`Element`]s.
59    Complete(Element<'buf>),
60}
61
62/// The context needed to parse a single chunk of JSON.
63pub struct Parser<'buf> {
64    /// Used to assign a unique [`ElemId`] to each [`Element`].
65    elem_count: AtomicUsize,
66
67    /// True if the `Parser` is complete.
68    ///
69    /// Any further calls to [`Parser::next`] will return `None`.
70    complete: bool,
71
72    /// The source JSON we're parsing.
73    json: &'buf str,
74
75    /// The JSON lexer.
76    lexer: Lexer<'buf>,
77
78    /// The pool with pre-allocated `Path`s.
79    path_pool: PathPool<'buf>,
80
81    /// The stack to track nested objects.
82    stack: Stack<'buf>,
83
84    /// The previous token seen.
85    token: Option<Token>,
86}
87
88/// Define our own `TokenType` so Clone can be defined on it.
89///
90/// This can be removed when `json_tools::TokenType` impl's `Clone`.
91#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
92pub enum TokenType {
93    /// `false`
94    BooleanFalse,
95    /// `true`
96    BooleanTrue,
97
98    /// `]`
99    BracketClose,
100    /// `[`
101    BracketOpen,
102
103    /// `:`
104    Colon,
105
106    /// `,`
107    Comma,
108
109    /// `}`
110    CurlyClose,
111    /// `{`
112    CurlyOpen,
113
114    /// The type of the token could not be identified.
115    /// Should be removed if this lexer is ever to be feature complete
116    Invalid,
117
118    /// `null`
119    Null,
120
121    /// A Number, like `1.1234` or `123` or `-0.0` or `-1` or `.0` or `.`
122    Number,
123
124    /// A json string , like `"foo"`
125    String,
126}
127
128impl TokenType {
129    fn as_str(self) -> &'static str {
130        match self {
131            TokenType::BooleanFalse => "false",
132            TokenType::BooleanTrue => "true",
133            TokenType::BracketClose => "]",
134            TokenType::BracketOpen => "[",
135            TokenType::Colon => ":",
136            TokenType::Comma => ",",
137            TokenType::CurlyClose => "}",
138            TokenType::CurlyOpen => "{",
139            TokenType::Invalid => "<invalid>",
140            TokenType::Null => "null",
141            TokenType::Number => "<number>",
142            TokenType::String => "<string>",
143        }
144    }
145}
146
147impl fmt::Display for TokenType {
148    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149        f.write_str(self.as_str())
150    }
151}
152
153impl From<json_tools::TokenType> for TokenType {
154    fn from(value: json_tools::TokenType) -> Self {
155        match value {
156            json_tools::TokenType::BooleanFalse => TokenType::BooleanFalse,
157            json_tools::TokenType::BooleanTrue => TokenType::BooleanTrue,
158            json_tools::TokenType::BracketClose => TokenType::BracketClose,
159            json_tools::TokenType::BracketOpen => TokenType::BracketOpen,
160            json_tools::TokenType::CurlyClose => TokenType::CurlyClose,
161            json_tools::TokenType::CurlyOpen => TokenType::CurlyOpen,
162            json_tools::TokenType::Colon => TokenType::Colon,
163            json_tools::TokenType::Comma => TokenType::Comma,
164            json_tools::TokenType::Invalid => TokenType::Invalid,
165            json_tools::TokenType::Null => TokenType::Null,
166            json_tools::TokenType::Number => TokenType::Number,
167            json_tools::TokenType::String => TokenType::String,
168        }
169    }
170}
171
172/// A lexical token, identifying its kind and span.
173///
174/// We define our own `Token` as the `json_tools::Token` defines a `Buffer` that can be heap allocated
175/// or a `Span`. We only use the `Span` variant.
176///
177/// Our `Token` can also impl `Copy` and `Clone` as the size and semantics are acceptable.
178#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
179pub struct Token {
180    /// The exact type of the token
181    pub kind: TokenType,
182
183    /// The span allows to reference back into the source byte stream
184    /// to obtain the string making up the token.
185    /// Please note that for control characters, booleans and null (i.e
186    pub span: Span,
187}
188
189impl Token {
190    /// Return true is the token is a opening brace.
191    fn is_opening(&self) -> bool {
192        matches!(self.kind, TokenType::CurlyOpen | TokenType::BracketOpen)
193    }
194
195    /// Return true is the token is a closing brace.
196    fn is_closing(&self) -> bool {
197        matches!(self.kind, TokenType::CurlyClose | TokenType::BracketClose)
198    }
199
200    /// Return true is the token is a comma.
201    fn is_comma(&self) -> bool {
202        matches!(self.kind, TokenType::Comma)
203    }
204}
205
206impl fmt::Display for Token {
207    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208        write!(
209            f,
210            "token: {}, ({},{})",
211            self.kind, self.span.start, self.span.end
212        )
213    }
214}
215
216impl TryFrom<json_tools::Token> for Token {
217    type Error = PartialError;
218
219    fn try_from(token: json_tools::Token) -> Result<Self, Self::Error> {
220        let json_tools::Token { kind, buf } = token;
221        let kind = kind.into();
222        let Buffer::Span(span) = &buf else {
223            return Err(InternalError::BufferType.into_partial_error(None));
224        };
225
226        let span = span
227            .try_into()
228            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
229
230        Ok(Self { kind, span })
231    }
232}
233
234impl TryFrom<&json_tools::Token> for Token {
235    type Error = PartialError;
236
237    fn try_from(token: &json_tools::Token) -> Result<Self, Self::Error> {
238        let json_tools::Token { kind, buf } = token;
239        let kind = kind.clone().into();
240        let Buffer::Span(span) = &buf else {
241            return Err(InternalError::BufferType.into_partial_error(None));
242        };
243
244        let span = span
245            .try_into()
246            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
247
248        Ok(Self { kind, span })
249    }
250}
251
252impl<'buf> Parser<'buf> {
253    pub fn new(json: &'buf str) -> Self {
254        let lexer = json_tools::Lexer::new(json.bytes(), BufferType::Span).peekable();
255
256        Self {
257            elem_count: AtomicUsize::new(0),
258            complete: false,
259            json,
260            lexer,
261            path_pool: PathPool::default(),
262            stack: Stack::new(),
263            token: None,
264        }
265    }
266
267    fn next_elem_id(&self) -> ElemId {
268        let id = self
269            .elem_count
270            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
271
272        ElemId(id)
273    }
274
275    fn expect_next(&mut self) -> Result<Token, PartialError> {
276        let Some(token) = self.lexer.next() else {
277            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
278        };
279
280        let token = token.try_into()?;
281
282        Ok(token)
283    }
284
285    /// Return an `Err` if the next token is not the expected.
286    fn expect_token(&mut self, token_type: TokenType) -> Result<(), PartialError> {
287        let Some(token) = self.lexer.next() else {
288            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
289        };
290
291        let token: Token = token.try_into()?;
292
293        if token.kind == token_type {
294            Ok(())
295        } else {
296            Err(unexpected_token(token))
297        }
298    }
299
300    fn next_event(&mut self) -> Result<Option<Event<'buf>>, Error> {
301        if self.complete {
302            return Ok(None);
303        }
304
305        let head = self.stack.pop_head();
306
307        match head {
308            None => {
309                let token = self.expect_next().with_root_path()?;
310
311                trace!(?token);
312                self.comma_checks(&token).with_root_path()?;
313
314                match token.kind {
315                    TokenType::CurlyOpen => {
316                        let parent_path = self.path_pool.root();
317                        self.stack.push_new_object(
318                            self.next_elem_id(),
319                            Rc::clone(&parent_path),
320                            &token,
321                        );
322                        Ok(Some(Event::Open {
323                            kind: ObjectKind::Object,
324                            parent_path,
325                        }))
326                    }
327                    TokenType::BracketOpen => {
328                        let parent_path = self.path_pool.root();
329                        self.stack.push_new_array(
330                            self.next_elem_id(),
331                            Rc::clone(&parent_path),
332                            &token,
333                        );
334                        Ok(Some(Event::Open {
335                            kind: ObjectKind::Array,
336                            parent_path,
337                        }))
338                    }
339                    TokenType::Number => {
340                        let value = Value::Number(token_str(self.json, &token).with_root_path()?);
341                        self.exit_with_value(token, value).with_root_path()
342                    }
343                    TokenType::Null => self.exit_with_value(token, Value::Null).with_root_path(),
344                    TokenType::String => {
345                        let value =
346                            Value::String(token_str_as_string(self.json, token).with_root_path()?);
347                        self.exit_with_value(token, value).with_root_path()
348                    }
349                    TokenType::BooleanTrue => {
350                        self.exit_with_value(token, Value::True).with_root_path()
351                    }
352                    TokenType::BooleanFalse => {
353                        self.exit_with_value(token, Value::False).with_root_path()
354                    }
355                    _ => Err(unexpected_token(token).with_root_path()),
356                }
357            }
358            Some(mut head) => {
359                let token = self.expect_next().with_head(&head)?;
360
361                trace!(?token, head = ?head.elem_type);
362                let token = if self.comma_checks(&token).with_head(&head)? {
363                    self.expect_next().with_head(&head)?
364                } else {
365                    token
366                };
367
368                let (value, token, path) = match head.elem_type {
369                    ObjectKind::Object => {
370                        let key = match token.kind {
371                            TokenType::String => {
372                                token_str_as_string(self.json, token).with_head(&head)?
373                            }
374                            TokenType::CurlyClose => {
375                                let event = self.close_element(head, &token)?;
376                                return Ok(event);
377                            }
378                            _ => return Err(unexpected_token(token).with_root_path()),
379                        };
380
381                        self.expect_token(TokenType::Colon).with_head(&head)?;
382                        let token = self.expect_next().with_head(&head)?;
383
384                        let value = match token.kind {
385                            TokenType::CurlyOpen => {
386                                let parent_path = head.parent_is_object(&mut self.path_pool, key);
387                                self.stack.push_head(head);
388                                self.stack.push_new_object(
389                                    self.next_elem_id(),
390                                    Rc::clone(&parent_path),
391                                    &token,
392                                );
393                                return Ok(Some(Event::Open {
394                                    kind: ObjectKind::Object,
395                                    parent_path,
396                                }));
397                            }
398                            TokenType::BracketOpen => {
399                                let parent_path = head.parent_is_object(&mut self.path_pool, key);
400                                self.stack.push_head(head);
401                                self.stack.push_new_array(
402                                    self.next_elem_id(),
403                                    Rc::clone(&parent_path),
404                                    &token,
405                                );
406                                return Ok(Some(Event::Open {
407                                    kind: ObjectKind::Array,
408                                    parent_path,
409                                }));
410                            }
411                            TokenType::CurlyClose => {
412                                let event = self.close_element(head, &token)?;
413                                return Ok(event);
414                            }
415                            TokenType::String => Value::String(
416                                token_str_as_string(self.json, token).with_head(&head)?,
417                            ),
418                            TokenType::Number => {
419                                Value::Number(token_str(self.json, &token).with_head(&head)?)
420                            }
421                            TokenType::Null => Value::Null,
422                            TokenType::BooleanTrue => Value::True,
423                            TokenType::BooleanFalse => Value::False,
424                            _ => return Err(unexpected_token(token).with_head(&head)),
425                        };
426
427                        (
428                            value,
429                            token,
430                            head.parent_is_object(&mut self.path_pool, key),
431                        )
432                    }
433                    ObjectKind::Array => {
434                        let value = match token.kind {
435                            TokenType::CurlyOpen => {
436                                let parent_path = head.parent_is_array(&mut self.path_pool);
437                                self.stack.push_head(head);
438                                self.stack.push_new_object(
439                                    self.next_elem_id(),
440                                    Rc::clone(&parent_path),
441                                    &token,
442                                );
443                                return Ok(Some(Event::Open {
444                                    kind: ObjectKind::Object,
445                                    parent_path,
446                                }));
447                            }
448                            TokenType::BracketOpen => {
449                                let parent_path = head.parent_is_array(&mut self.path_pool);
450                                self.stack.push_head(head);
451                                self.stack.push_new_array(
452                                    self.next_elem_id(),
453                                    Rc::clone(&parent_path),
454                                    &token,
455                                );
456                                return Ok(Some(Event::Open {
457                                    kind: ObjectKind::Array,
458                                    parent_path,
459                                }));
460                            }
461                            TokenType::BracketClose => {
462                                let event = self.close_element(head, &token)?;
463                                return Ok(event);
464                            }
465
466                            TokenType::String => Value::String(
467                                token_str_as_string(self.json, token).with_head(&head)?,
468                            ),
469                            TokenType::Number => {
470                                Value::Number(token_str(self.json, &token).with_head(&head)?)
471                            }
472                            TokenType::Null => Value::Null,
473                            TokenType::BooleanTrue => Value::True,
474                            TokenType::BooleanFalse => Value::False,
475                            _ => return Err(unexpected_token(token).with_head(&head)),
476                        };
477                        (value, token, head.parent_is_array(&mut self.path_pool))
478                    }
479                };
480
481                let event = Event::Element {
482                    kind: value.kind(),
483                    parent_path: Rc::clone(&path),
484                };
485                head.push_field(self.next_elem_id(), path, value, &token);
486
487                let peek_token = self.peek(&token).with_head(&head)?;
488
489                if !(peek_token.is_comma() || peek_token.is_closing()) {
490                    return Err(unexpected_token(peek_token).with_head(&head));
491                }
492
493                self.token.replace(token);
494                self.stack.push_head(head);
495
496                Ok(Some(event))
497            }
498        }
499    }
500
501    /// Close a [`PartialElement`] which creates an [`Element`] and returns an [`Event`]
502    fn close_element(
503        &mut self,
504        head: PartialElement<'buf>,
505        token: &Token,
506    ) -> Result<Option<Event<'buf>>, Error> {
507        let event = self.stack.head_into_element(head, token);
508
509        match event {
510            Pop::Element { kind, parent_path } => Ok(Some(Event::Element { kind, parent_path })),
511            Pop::Complete(element) => {
512                if let Some(token) = self.lexer.next() {
513                    let token = token.try_into().with_root_path()?;
514                    return Err(unexpected_token(token).with_root_path());
515                }
516
517                Ok(Some(Event::Complete(element)))
518            }
519        }
520    }
521
522    fn exit_with_value(
523        &mut self,
524        token: Token,
525        value: Value<'buf>,
526    ) -> Result<Option<Event<'buf>>, PartialError> {
527        self.complete = true;
528        let span = element_span(&token, 0);
529        let elem = Element::new(self.next_elem_id(), Rc::new(PathNode::Root), span, value);
530
531        if let Some(token) = self.lexer.next() {
532            let token = token.try_into()?;
533            return Err(unexpected_token(token));
534        }
535
536        Ok(Some(Event::Complete(elem)))
537    }
538
539    fn peek(&mut self, token: &Token) -> Result<Token, PartialError> {
540        let Some(peek_token) = self.lexer.peek() else {
541            return Err(ErrorKind::UnexpectedEOF.into_partial_error(Some(*token)));
542        };
543
544        let peek_token = peek_token.try_into()?;
545        Ok(peek_token)
546    }
547
548    /// Perform comma position checks
549    ///
550    /// Return `Err(unexpected)` if a trailing or rogue comma is found.
551    fn comma_checks(&mut self, token: &Token) -> Result<bool, PartialError> {
552        trace!(?token, "comma_checks");
553        let is_comma = token.is_comma();
554
555        if is_comma {
556            let peek_token = self.peek(token)?;
557
558            // A comma can only be followed by an opening brace or a value.
559            if peek_token.is_closing() {
560                return Err(unexpected_token(*token));
561            }
562
563            if peek_token.is_comma() {
564                return Err(unexpected_token(peek_token));
565            }
566        } else if token.is_opening() {
567            let peek_token = self.peek(token)?;
568
569            // An opening brace should not be followed by a comma.
570            if peek_token.is_comma() {
571                return Err(unexpected_token(peek_token));
572            }
573        }
574
575        Ok(is_comma)
576    }
577}
578
579/// Create an [`PartialError`] with [`ErrorKind::UnexpectedToken`].
580#[track_caller]
581fn unexpected_token(token: Token) -> PartialError {
582    ErrorKind::UnexpectedToken.into_partial_error(Some(token))
583}
584
585impl<'buf> Iterator for Parser<'buf> {
586    type Item = Result<Event<'buf>, Error>;
587
588    fn next(&mut self) -> Option<Self::Item> {
589        match self.next_event() {
590            Ok(event) => event.map(Ok),
591            Err(err) => {
592                self.complete = true;
593                Some(Err(err))
594            }
595        }
596    }
597}
598
599/// An partial `Element` that we desend into and parse it's child `Element`s.
600#[derive(Debug)]
601struct PartialElement<'buf> {
602    /// The Id of the [`Element`] to be created.
603    elem_id: ElemId,
604
605    /// The type of [`Element`].
606    elem_type: ObjectKind,
607
608    /// The child [`Element`]s.
609    ///
610    /// This is filled as we parse the current JSON [`Element`].
611    elements: Vec<Element<'buf>>,
612
613    /// The path up to the [`Element`].
614    path: PathNodeRef<'buf>,
615
616    /// The index of the [`Element`]'s first byte.
617    span_start: usize,
618}
619
620impl<'buf> PartialElement<'buf> {
621    fn parent_is_object(
622        &self,
623        path_pool: &mut PathPool<'buf>,
624        key: RawStr<'buf>,
625    ) -> PathNodeRef<'buf> {
626        path_pool.object(Rc::clone(&self.path), key)
627    }
628
629    fn parent_is_array(&self, path_pool: &mut PathPool<'buf>) -> PathNodeRef<'buf> {
630        path_pool.array(Rc::clone(&self.path), self.elements.len())
631    }
632
633    fn push_field(
634        &mut self,
635        elem_id: ElemId,
636        path: PathNodeRef<'buf>,
637        value: Value<'buf>,
638        token: &Token,
639    ) {
640        let span = element_span(token, token.span.start);
641        let elem = Element::new(elem_id, path, span, value);
642        self.elements.push(elem);
643    }
644
645    /// Resolve the `PartialElement` to an `Element`.
646    fn into_element(self, token: &Token) -> Element<'buf> {
647        let span = element_span(token, self.span_start);
648
649        let PartialElement {
650            elem_type,
651            span_start: _,
652            elements,
653            path,
654            elem_id,
655        } = self;
656
657        let value = match elem_type {
658            ObjectKind::Object => {
659                let fields = elements.into_iter().map(Field).collect();
660                Value::Object(fields)
661            }
662            ObjectKind::Array => Value::Array(elements),
663        };
664
665        Element::new(elem_id, path, span, value)
666    }
667}
668
669/// `Path`s are added and never removed.
670struct PathPool<'buf> {
671    index: usize,
672    items: Vec<Rc<PathNode<'buf>>>,
673}
674
675impl Default for PathPool<'_> {
676    fn default() -> Self {
677        Self::with_capacity(1000)
678    }
679}
680
681impl<'buf> PathPool<'buf> {
682    fn with_capacity(capacity: usize) -> Self {
683        let capacity = capacity.max(1);
684        let mut items = Vec::with_capacity(capacity);
685        items.resize_with(capacity, Default::default);
686
687        Self { index: 0, items }
688    }
689
690    #[expect(
691        clippy::indexing_slicing,
692        reason = "The root Path is added in the constructor and the capacity is always at least 1"
693    )]
694    fn root(&self) -> PathNodeRef<'buf> {
695        Rc::clone(&self.items[0])
696    }
697
698    /// Add a new `Path::Array` with the given index.
699    fn array(&mut self, parent: PathNodeRef<'buf>, index: usize) -> PathNodeRef<'buf> {
700        self.push(PathNode::Array { parent, index })
701    }
702
703    /// Add a new `Path::Object` with the given index.
704    fn object(&mut self, parent: PathNodeRef<'buf>, key: RawStr<'buf>) -> PathNodeRef<'buf> {
705        self.push(PathNode::Object { parent, key })
706    }
707
708    #[expect(clippy::indexing_slicing, reason = "Paths are only added")]
709    fn push(&mut self, new_path: PathNode<'buf>) -> PathNodeRef<'buf> {
710        const GROWTH_FACTOR: usize = 2;
711
712        let Self { index, items } = self;
713        let next_index = *index + 1;
714
715        if next_index >= items.len() {
716            items.reserve(items.len() * GROWTH_FACTOR);
717            items.resize_with(items.capacity(), Default::default);
718        }
719
720        let path = &mut items[next_index];
721        debug_assert_eq!(Rc::strong_count(path), 1, "Paths are only added");
722        let path = Rc::get_mut(path).expect("Paths are only added");
723        *path = new_path;
724
725        let path = Rc::clone(&items[next_index]);
726        *index = next_index;
727        path
728    }
729}
730
731/// The `Span` defines the range of bytes that delimits a JSON `Element`.
732#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
733pub struct Span {
734    /// Index of the first the byte
735    pub start: usize,
736
737    /// Index one past the last byte
738    pub end: usize,
739}
740
741impl TryFrom<&json_tools::Span> for Span {
742    type Error = TryFromIntError;
743
744    fn try_from(span: &json_tools::Span) -> Result<Self, Self::Error> {
745        let json_tools::Span { first, end } = span;
746        let start = usize::try_from(*first)?;
747        let end = usize::try_from(*end)?;
748        Ok(Span { start, end })
749    }
750}
751
752struct Stack<'buf>(Vec<PartialElement<'buf>>);
753
754enum Pop<'buf> {
755    /// An [`Element`] has been created and added to it's parent [`Element`].
756    Element {
757        kind: ValueKind,
758        parent_path: PathNodeRef<'buf>,
759    },
760
761    /// The parse has completed creating the tree of [`Element`]s.
762    Complete(Element<'buf>),
763}
764
765impl<'buf> Stack<'buf> {
766    fn new() -> Self {
767        Self(vec![])
768    }
769
770    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
771    /// stack `Vec` contains it.
772    fn pop_head(&mut self) -> Option<PartialElement<'buf>> {
773        self.0.pop()
774    }
775
776    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
777    /// stack `Vec` contains it.
778    fn push_head(&mut self, head: PartialElement<'buf>) {
779        self.0.push(head);
780    }
781
782    /// Convert the head `PartialElement` into an `Element` using the parent to form the path.
783    fn head_into_element(&mut self, head: PartialElement<'buf>, token: &Token) -> Pop<'buf> {
784        let elem = head.into_element(token);
785
786        if let Some(parent) = self.0.last_mut() {
787            let event = Pop::Element {
788                kind: elem.value.kind(),
789                parent_path: elem.path_node(),
790            };
791            parent.elements.push(elem);
792            event
793        } else {
794            Pop::Complete(elem)
795        }
796    }
797
798    fn push_new_object(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
799        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Object);
800    }
801
802    fn push_new_array(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
803        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Array);
804    }
805
806    fn push_new_elem(
807        &mut self,
808        elem_id: ElemId,
809        parent_path: PathNodeRef<'buf>,
810        token: &Token,
811        elem_type: ObjectKind,
812    ) {
813        let partial = PartialElement {
814            elements: vec![],
815            elem_type,
816            path: parent_path,
817            span_start: token.span.start,
818            elem_id,
819        };
820        self.0.push(partial);
821    }
822}
823
824/// A parsing Error that keeps track of the token being parsed when the Error occurred and
825/// the slice of JSON surrounding the Error location.
826pub struct Error(Box<ErrorImpl>);
827
828impl fmt::Debug for Error {
829    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
830        fmt::Debug::fmt(&self.0, f)
831    }
832}
833
834impl From<ErrorImpl> for Error {
835    fn from(err: ErrorImpl) -> Self {
836        Self(err.into())
837    }
838}
839
840struct ErrorImpl {
841    /// The kind of error that occurred.
842    kind: ErrorKind,
843
844    /// The location the [`Error`] happened in the source code.
845    loc: &'static std::panic::Location<'static>,
846
847    /// The path to the [`Element`] the error occurred in.
848    path: Path,
849
850    /// The span of the JSON string the error occurred in.
851    span: Span,
852
853    /// The token being parsed at the time of the [`Error`].
854    token: Option<Token>,
855}
856
857impl fmt::Debug for ErrorImpl {
858    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
859        f.debug_struct("Error")
860            .field("kind", &self.kind)
861            .field("loc", &self.loc)
862            .field("path", &self.path)
863            .field("span", &self.span)
864            .field("token", &self.token)
865            .finish()
866    }
867}
868
869impl Error {
870    /// The kind of error that occurred.
871    pub fn kind(&self) -> &ErrorKind {
872        &self.0.kind
873    }
874
875    /// The path to the element the error occurred in.
876    pub fn path(&self) -> &Path {
877        &self.0.path
878    }
879
880    /// The span of the [`Element`] the error occurred in.
881    pub fn span(&self) -> Span {
882        self.0.span
883    }
884
885    pub fn token(&self) -> Option<&Token> {
886        self.0.token.as_ref()
887    }
888
889    /// Break the Error into it's constituent parts.
890    pub fn into_parts(self) -> (ErrorKind, Path, Span) {
891        let ErrorImpl {
892            kind,
893            loc: _,
894            path,
895            span,
896            token: _,
897        } = *self.0;
898        (kind, path, span)
899    }
900
901    /// Convert the Error into a more comprehensive report using the source JSON to provide
902    /// human readable context.
903    pub fn into_report(self, json: &str) -> ErrorReport<'_> {
904        ErrorReport::from_error(self, json)
905    }
906}
907
908/// A more comprehensive report on the [`Error`] using the source JSON `&str` to provide
909/// human readable context.
910#[derive(Debug)]
911pub struct ErrorReport<'buf> {
912    /// The [`Error`] that occurred.
913    error: Error,
914
915    /// The slice of JSON as defined by the `Error::span`.
916    json_context: &'buf str,
917
918    /// The slice of JSON as defined by the `Error::span` and expanded out to the
919    /// start and end of the line.
920    expanded_json_context: &'buf str,
921
922    /// The line and col indices of the start and end of the span.
923    span_bounds: SpanBounds,
924}
925
926impl<'buf> ErrorReport<'buf> {
927    /// Create the `ErrorReport` from the `Error` and source `&str`.
928    fn from_error(error: Error, json: &'buf str) -> Self {
929        let span = error.span();
930        debug!(?error, ?span, json, "from_error");
931        let json_context = &json.get(span.start..span.end).unwrap_or(json);
932
933        let start = {
934            let s = &json.get(0..span.start).unwrap_or_default();
935            line_col(s)
936        };
937        let end = {
938            let relative_end = line_col(json_context);
939            let line = start.line + relative_end.line;
940
941            if start.line == line {
942                LineCol {
943                    line,
944                    col: start.col + relative_end.col,
945                }
946            } else {
947                LineCol {
948                    line,
949                    col: relative_end.col,
950                }
951            }
952        };
953        let (prev, next) = find_expanded_newlines(json, span.start);
954        let expanded_json_context = &json.get(prev..next).unwrap_or(json_context);
955
956        let span_bounds = SpanBounds { start, end };
957
958        Self {
959            error,
960            json_context,
961            expanded_json_context,
962            span_bounds,
963        }
964    }
965
966    /// Return the slice of JSON as defined by the `Error::span`.
967    pub fn json_context(&self) -> &'buf str {
968        self.json_context
969    }
970
971    /// Return the slice of JSON as defined by the `Error::span` and expanded out to the
972    /// start and end of the line.
973    pub fn expand_json_context(&self) -> &'buf str {
974        self.expanded_json_context
975    }
976
977    /// Return the line and col number of each end of the span
978    pub fn span_bounds(&self) -> &SpanBounds {
979        &self.span_bounds
980    }
981
982    /// Discard the `Report` and take ownership of the `Error`.
983    pub fn into_error(self) -> Error {
984        self.error
985    }
986}
987
988fn find_expanded_newlines(json: &str, byte_index: usize) -> (usize, usize) {
989    let pre = json.get(..byte_index).unwrap_or(json);
990    let post = json.get(byte_index..).unwrap_or(json);
991
992    let mut bytes = pre.as_bytes().iter().rev();
993    let prev = pre.len() - bytes.position(|b| *b == b'\n').unwrap_or_default();
994
995    let mut bytes = post.as_bytes().iter();
996    let next = bytes
997        .position(|b| *b == b'\n')
998        .map(|idx| idx + byte_index)
999        .unwrap_or(prev + post.len());
1000
1001    (prev, next)
1002}
1003
1004/// The line and col indices of the start and end of the span.
1005#[derive(Clone, Debug)]
1006pub struct SpanBounds {
1007    /// The start of the `Span` expressed as line and column index.
1008    pub start: LineCol,
1009
1010    /// The end of the `Span` expressed as line and column index.
1011    pub end: LineCol,
1012}
1013
1014/// A file location expressed as line and column.
1015#[derive(Clone, Debug)]
1016pub struct LineCol {
1017    /// The line index is 0 based.
1018    pub line: u32,
1019
1020    /// The col index is 0 based.
1021    pub col: u32,
1022}
1023
1024impl From<(u32, u32)> for LineCol {
1025    fn from(value: (u32, u32)) -> Self {
1026        Self {
1027            line: value.0,
1028            col: value.1,
1029        }
1030    }
1031}
1032
1033impl From<LineCol> for (u32, u32) {
1034    fn from(value: LineCol) -> Self {
1035        (value.line, value.col)
1036    }
1037}
1038
1039impl PartialEq<(u32, u32)> for LineCol {
1040    fn eq(&self, other: &(u32, u32)) -> bool {
1041        self.line == other.0 && self.col == other.1
1042    }
1043}
1044
1045impl fmt::Display for LineCol {
1046    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1047        write!(f, "{}:{}", self.line, self.col)
1048    }
1049}
1050
1051/// Return the line and column indices of the end of the slice.
1052///
1053/// The line and column indices are zero based.
1054pub fn line_col(s: &str) -> LineCol {
1055    let mut chars = s.chars().rev();
1056    let mut line = 0;
1057    let mut col = 0;
1058
1059    // The col only needs to be calculated on the final line so we iterate from the last char
1060    // back to the start of the line and then only continue to count the lines after that.
1061    //
1062    // This is less work than continuously counting chars from the front of the slice.
1063    for c in chars.by_ref() {
1064        // If the `&str` is multiline, we count the line and stop accumulating the col count too.
1065        if c == '\n' {
1066            line += 1;
1067            break;
1068        }
1069        col += 1;
1070    }
1071
1072    // The col is now known, continue to the start of the str counting newlines as we go.
1073    for c in chars {
1074        if c == '\n' {
1075            line += 1;
1076        }
1077    }
1078
1079    LineCol { line, col }
1080}
1081
1082/// An error that has yet to be resolved with a [`Span`].
1083#[derive(Debug)]
1084pub struct PartialError {
1085    /// The location the [`PartialError`] happened in the source code.
1086    kind: ErrorKind,
1087
1088    /// The location the [`PartialError`] happened in the source code.
1089    loc: &'static std::panic::Location<'static>,
1090
1091    /// The token being parsed at the time of the [`PartialError`].
1092    token: Option<Token>,
1093}
1094
1095/// Convert a [`PartialError`] into an [`Error`] by providing a [`PartialElement`].
1096trait PartialIntoError<T> {
1097    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1098    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error>;
1099
1100    /// Converts a [`PartialError`] into an [`Error`] with a root path.
1101    ///
1102    /// This can be used If the path is unknown or the [`Error`] occurred at the root.
1103    fn with_root_path(self) -> Result<T, Error>;
1104}
1105
1106impl<T> PartialIntoError<T> for Result<T, PartialError> {
1107    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error> {
1108        match self {
1109            Ok(v) => Ok(v),
1110            Err(err) => Err(err.with_head(head)),
1111        }
1112    }
1113
1114    fn with_root_path(self) -> Result<T, Error> {
1115        match self {
1116            Ok(v) => Ok(v),
1117            Err(err) => Err(err.with_root_path()),
1118        }
1119    }
1120}
1121
1122impl PartialError {
1123    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1124    fn with_head(self, parent: &PartialElement<'_>) -> Error {
1125        let Self { loc, kind, token } = self;
1126        let span_end = token.map(|t| t.span.end).unwrap_or_default();
1127
1128        let (path, span) = if let Some(elem) = parent.elements.last() {
1129            (
1130                Path::from_node(Rc::clone(&elem.path_node)),
1131                Span {
1132                    start: elem.span.start,
1133                    end: span_end,
1134                },
1135            )
1136        } else {
1137            (
1138                Path::from_node(Rc::clone(&parent.path)),
1139                Span {
1140                    start: parent.span_start,
1141                    end: span_end,
1142                },
1143            )
1144        };
1145
1146        ErrorImpl {
1147            kind,
1148            loc,
1149            path,
1150            span,
1151            token,
1152        }
1153        .into()
1154    }
1155
1156    /// Converts a `PartialError` into an `Error` with a root path.
1157    ///
1158    /// This can be used If the path is unknown or the `Error` occurred at the root.
1159    pub fn with_root_path(self) -> Error {
1160        let Self { loc, kind, token } = self;
1161        let (span_start, span_end) = match (&kind, token) {
1162            (ErrorKind::UnexpectedToken, Some(t)) => (t.span.start, t.span.end),
1163            (_, Some(t)) => (0, t.span.end),
1164            (_, None) => (0, 0),
1165        };
1166        ErrorImpl {
1167            loc,
1168            kind,
1169            path: Path::root(),
1170            span: Span {
1171                start: span_start,
1172                end: span_end,
1173            },
1174            token,
1175        }
1176        .into()
1177    }
1178}
1179
1180/// The kind of Errors that can occur while parsing JSON.
1181#[derive(Debug)]
1182pub enum ErrorKind {
1183    /// An internal programming error.
1184    Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
1185
1186    /// The `Lexer` had no more tokens when more were expected.
1187    UnexpectedEOF,
1188
1189    /// An unexpected token was emitted by the `Lexer`.
1190    UnexpectedToken,
1191}
1192
1193impl ErrorKind {
1194    #[track_caller]
1195    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1196        PartialError {
1197            kind: self,
1198            loc: std::panic::Location::caller(),
1199            token,
1200        }
1201    }
1202
1203    #[track_caller]
1204    pub fn into_partial_error_without_token(self) -> PartialError {
1205        PartialError {
1206            kind: self,
1207            loc: std::panic::Location::caller(),
1208            token: None,
1209        }
1210    }
1211}
1212
1213impl std::error::Error for Error {}
1214
1215impl fmt::Display for Error {
1216    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1217        let ErrorImpl {
1218            kind,
1219            loc,
1220            path,
1221            span: _,
1222            token,
1223        } = &*self.0;
1224
1225        write!(
1226            f,
1227            "Error: code location: {loc}; while parsing element at `{path}`"
1228        )?;
1229
1230        if let Some(token) = token {
1231            write!(f, " token: `{}`", token.kind)?;
1232        }
1233
1234        match kind {
1235            ErrorKind::Internal(err) => write!(f, "Internal: {err}"),
1236            ErrorKind::UnexpectedEOF => f.write_str("Unexpected EOF"),
1237            ErrorKind::UnexpectedToken => write!(f, "unexpected token"),
1238        }
1239    }
1240}
1241
1242/// A programming Error resulting from fautly logic.
1243///
1244/// This should not be exposed on the public API.
1245#[derive(Debug)]
1246enum InternalError {
1247    /// Slicing into the JSON buf failed.
1248    ///
1249    /// This should not happen during parsing, as the parsing and `Span` calculations are all
1250    /// contained within the same callstack of functions.
1251    ///
1252    /// This can only happen if there's a mistake in the `Span` offset/range calculations.
1253    BufferSlice(Span),
1254
1255    /// The type of `Buffer` is invalid.
1256    ///
1257    /// The `json_tools::Lexer::next` is called in a few places and the `json_tools::Token` it
1258    /// emits is converted into a local `Token` with only a `Span` based buffer to avoid checking
1259    /// the buffer type each time it's used.
1260    ///
1261    /// The lexer is configured to only use a `Span` based buffer so the only way this Error can
1262    /// occur is if the code is changed so that the lexer uses a `String` based buffer.
1263    BufferType,
1264
1265    /// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1266    /// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1267    /// converted to the locally defined `Span` that uses `usize` based fields.
1268    ///
1269    /// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1270    FromInt(TryFromIntError),
1271
1272    /// A String was parsed without surrounding double quotes.
1273    ///
1274    /// This is only possible if the `json_tools` crate changes the implementation details of
1275    /// how they parse JSON strings.
1276    StringWithoutQuotes,
1277
1278    /// A `RawStr` was made using a token that is not a `String`.
1279    ///
1280    /// `RawStr`s are only creatable from inside the crate so the only way this can occur is
1281    /// through a programming error.
1282    RawStringFromInvalidToken,
1283}
1284
1285impl InternalError {
1286    #[track_caller]
1287    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1288        ErrorKind::Internal(Box::new(self)).into_partial_error(token)
1289    }
1290}
1291
1292impl std::error::Error for InternalError {}
1293
1294/// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1295/// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1296/// converted to the locally defined `Span` that uses `usize` based fields.
1297///
1298/// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1299impl From<TryFromIntError> for InternalError {
1300    fn from(err: TryFromIntError) -> Self {
1301        InternalError::FromInt(err)
1302    }
1303}
1304
1305impl From<InternalError> for Error {
1306    #[track_caller]
1307    fn from(err: InternalError) -> Self {
1308        ErrorImpl {
1309            kind: ErrorKind::Internal(Box::new(err)),
1310            loc: std::panic::Location::caller(),
1311            path: Path::root(),
1312            span: Span { start: 0, end: 0 },
1313            token: None,
1314        }
1315        .into()
1316    }
1317}
1318
1319impl fmt::Display for InternalError {
1320    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1321        match self {
1322            InternalError::BufferSlice(span) => {
1323                write!(f, "Slice into buffer failed; span: {span:?}")
1324            }
1325            InternalError::BufferType => write!(f, "The tokens buffer is not a `Span`"),
1326            InternalError::FromInt(err) => write!(f, "{err}"),
1327            InternalError::StringWithoutQuotes => {
1328                write!(f, "A String was parsed without surrounding double quotes.")
1329            }
1330
1331            InternalError::RawStringFromInvalidToken => {
1332                write!(
1333                    f,
1334                    "A `RawString` was created using a `Token` that's not a `String`"
1335                )
1336            }
1337        }
1338    }
1339}
1340
1341trait InternalErrorIntoPartial<T> {
1342    #[track_caller]
1343    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1344    where
1345        F: FnOnce() -> Token;
1346}
1347
1348impl<T> InternalErrorIntoPartial<T> for Result<T, InternalError> {
1349    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1350    where
1351        F: FnOnce() -> Token,
1352    {
1353        match self {
1354            Ok(v) => Ok(v),
1355            Err(err) => {
1356                let token = f();
1357                Err(err.into_partial_error(Some(token)))
1358            }
1359        }
1360    }
1361}
1362
1363/// Create the `Span` of an `Element` given the start and the closing token.
1364fn element_span(token_end: &Token, start: usize) -> Span {
1365    Span {
1366        start,
1367        end: token_end.span.end,
1368    }
1369}
1370
1371/// Return the content of the `Token` as a `&str`.
1372///
1373/// This in only useful for `Token`'s that contain variable data, such as `String`, `Number` etc.
1374#[track_caller]
1375fn token_str<'buf>(json: &'buf str, token: &Token) -> Result<&'buf str, PartialError> {
1376    let start = token.span.start;
1377    let end = token.span.end;
1378    let s = &json
1379        .get(start..end)
1380        .ok_or(InternalError::BufferSlice(Span { start, end }))
1381        .into_partial_error(|| *token)?;
1382    Ok(s)
1383}
1384
1385/// A `&str` with surrounding quotes removed and it hasn't been analyzed for escapes codes.
1386#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Default)]
1387pub struct RawStr<'buf> {
1388    /// An unescaped `&str` with surrounding quotes removed.
1389    source: &'buf str,
1390
1391    /// The `String` token that produced the source `&str`.
1392    span: Span,
1393}
1394
1395/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1396impl Borrow<str> for RawStr<'_> {
1397    fn borrow(&self) -> &str {
1398        self.source
1399    }
1400}
1401
1402/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1403impl Borrow<str> for &RawStr<'_> {
1404    fn borrow(&self) -> &str {
1405        self.source
1406    }
1407}
1408
1409impl<'buf> RawStr<'buf> {
1410    pub(super) fn from_str(source: &'buf str, span: Span) -> Self {
1411        Self { source, span }
1412    }
1413
1414    /// Create new `RawStr` from a string with surrounding quotes.
1415    #[track_caller]
1416    pub(super) fn from_quoted_str(
1417        s: &'buf str,
1418        token: Token,
1419    ) -> Result<RawStr<'buf>, PartialError> {
1420        const QUOTE: char = '"';
1421
1422        if token.kind != TokenType::String {
1423            return Err(InternalError::RawStringFromInvalidToken.into_partial_error(Some(token)));
1424        }
1425
1426        // remove double quotes
1427        let (_, s) = s
1428            .split_once(QUOTE)
1429            .ok_or(InternalError::StringWithoutQuotes)
1430            .into_partial_error(|| token)?;
1431
1432        let (source, _) = s
1433            .rsplit_once(QUOTE)
1434            .ok_or(InternalError::StringWithoutQuotes)
1435            .into_partial_error(|| token)?;
1436
1437        Ok(Self {
1438            source,
1439            span: token.span,
1440        })
1441    }
1442
1443    /// Return the raw unescaped `&str`.
1444    pub(crate) fn as_raw(&self) -> &'buf str {
1445        self.source
1446    }
1447
1448    /// Return the `&str` with all escapes decoded.
1449    pub(crate) fn decode_escapes(
1450        &self,
1451        elem: &Element<'buf>,
1452    ) -> Caveat<Cow<'_, str>, decode::WarningKind> {
1453        unescape_str(self.source, elem)
1454    }
1455
1456    /// Return a `&str` marked as either having escapes or not.
1457    pub(crate) fn has_escapes(
1458        &self,
1459        elem: &Element<'buf>,
1460    ) -> Caveat<decode::PendingStr<'_>, decode::WarningKind> {
1461        decode::analyze(self.source, elem)
1462    }
1463
1464    /// Return the [`Span`] of the [`Token`] that generated this string.
1465    pub fn span(&self) -> Span {
1466        self.span
1467    }
1468}
1469
1470impl fmt::Display for RawStr<'_> {
1471    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1472        fmt::Display::fmt(self.source, f)
1473    }
1474}
1475
1476#[track_caller]
1477fn token_str_as_string(json: &str, token: Token) -> Result<RawStr<'_>, PartialError> {
1478    let s = token_str(json, &token)?;
1479    let raw = RawStr::from_quoted_str(s, token)?;
1480    Ok(raw)
1481}
1482
1483#[cfg(test)]
1484mod test_raw_str {
1485    use assert_matches::assert_matches;
1486
1487    use crate::test;
1488
1489    use super::{ErrorKind, InternalError, RawStr, Span, Token, TokenType};
1490
1491    #[test]
1492    fn should_fail_to_create_raw_str_from_non_string_token() {
1493        test::setup();
1494
1495        let err = RawStr::from_quoted_str("fail", TokenType::Number.into()).unwrap_err();
1496        let err = assert_matches!(err.kind, ErrorKind::Internal(err) => err);
1497        let err = err.downcast_ref::<InternalError>().unwrap();
1498        assert_matches!(err, InternalError::RawStringFromInvalidToken);
1499    }
1500
1501    #[test]
1502    fn should_fail_to_create_raw_str_from_string_without_quotes() {
1503        test::setup();
1504
1505        let err = RawStr::from_quoted_str("fail", TokenType::String.into()).unwrap_err();
1506        let err = assert_matches!(err.kind, ErrorKind::Internal(err) => err);
1507        let err = err.downcast_ref::<InternalError>().unwrap();
1508        assert_matches!(err, InternalError::StringWithoutQuotes);
1509    }
1510
1511    impl From<TokenType> for Token {
1512        fn from(kind: TokenType) -> Self {
1513            Self {
1514                kind,
1515                span: Span::default(),
1516            }
1517        }
1518    }
1519}
1520
1521#[cfg(test)]
1522mod test_line_col {
1523    use super::{line_col, LineCol};
1524
1525    #[test]
1526    fn should_line_col_empty_str() {
1527        let json = "";
1528        let LineCol { line, col } = line_col(json);
1529        assert_eq!(line, 0);
1530        assert_eq!(col, 0);
1531    }
1532
1533    #[test]
1534    fn should_line_col_one_line_one_char_str() {
1535        let json = "1";
1536        let LineCol { line, col } = line_col(json);
1537        assert_eq!(line, 0);
1538        assert_eq!(col, 1);
1539    }
1540
1541    #[test]
1542    fn should_line_col_one_line_many_chars_str() {
1543        let json = "1234";
1544        let LineCol { line, col } = line_col(json);
1545        assert_eq!(line, 0);
1546        assert_eq!(col, 4);
1547    }
1548
1549    #[test]
1550    fn should_line_col_two_line_one_col_str() {
1551        let json = "1234\n1";
1552        let LineCol { line, col } = line_col(json);
1553        assert_eq!(line, 1);
1554        assert_eq!(col, 1);
1555    }
1556}
1557
1558#[cfg(test)]
1559mod test_parser {
1560    use assert_matches::assert_matches;
1561
1562    use crate::{
1563        json::{PathNode, ValueKind},
1564        test,
1565    };
1566
1567    use super::{Event, ObjectKind, Parser};
1568
1569    #[test]
1570    fn should_emit_events_for_object_with_single_field() {
1571        const JSON: &str = r#"{
1572    "field_a": 404
1573}"#;
1574
1575        test::setup();
1576
1577        let mut parser = Parser::new(JSON);
1578        let event = parser.next().unwrap().unwrap();
1579
1580        let path = assert_matches!(
1581            event,
1582            Event::Open {
1583                kind: ObjectKind::Object,
1584                parent_path
1585            }
1586             => parent_path
1587        );
1588        assert_matches!(*path, PathNode::Root);
1589
1590        let event = parser.next().unwrap().unwrap();
1591
1592        let path = assert_matches!(
1593            event,
1594            Event::Element {
1595                kind: ValueKind::Number,
1596                parent_path
1597            }
1598             => parent_path
1599        );
1600
1601        assert_eq!(*path, "$.field_a");
1602    }
1603
1604    #[test]
1605    fn should_emit_events_for_object_with_two_fields() {
1606        const JSON: &str = r#"{
1607    "field_a": 404,
1608    "field_b": "name"
1609}"#;
1610
1611        test::setup();
1612
1613        let mut parser = Parser::new(JSON);
1614        let event = parser.next().unwrap().unwrap();
1615
1616        let path = assert_matches!(
1617            event,
1618            Event::Open {
1619                kind: ObjectKind::Object,
1620                parent_path
1621            }
1622             => parent_path
1623        );
1624        assert_matches!(*path, PathNode::Root);
1625
1626        let event = parser.next().unwrap().unwrap();
1627
1628        let path = assert_matches!(
1629            event,
1630            Event::Element {
1631                kind: ValueKind::Number,
1632                parent_path
1633            }
1634             => parent_path
1635        );
1636
1637        assert_eq!(*path, "$.field_a");
1638
1639        let event = parser.next().unwrap().unwrap();
1640
1641        let path = assert_matches!(
1642            event,
1643            Event::Element {
1644                kind: ValueKind::String,
1645                parent_path
1646            }
1647             => parent_path
1648        );
1649
1650        assert_eq!(*path, "$.field_b");
1651    }
1652
1653    #[test]
1654    fn should_emit_events_for_object_with_nested_fields() {
1655        const JSON: &str = r#"{
1656    "field_a": 404,
1657    "field_b": {
1658        "field_c": "name"
1659    }
1660}"#;
1661
1662        test::setup();
1663
1664        let mut parser = Parser::new(JSON);
1665        let event = parser.next().unwrap().unwrap();
1666
1667        let path = assert_matches!(
1668            event,
1669            Event::Open {
1670                kind: ObjectKind::Object,
1671                parent_path
1672            }
1673             => parent_path
1674        );
1675        assert_matches!(*path, PathNode::Root);
1676
1677        let event = parser.next().unwrap().unwrap();
1678
1679        let path = assert_matches!(
1680            event,
1681            Event::Element {
1682                kind: ValueKind::Number,
1683                parent_path
1684            }
1685             => parent_path
1686        );
1687
1688        assert_eq!(*path, "$.field_a");
1689
1690        let event = parser.next().unwrap().unwrap();
1691
1692        let path = assert_matches!(
1693            event,
1694            Event::Open {
1695                kind: ObjectKind::Object,
1696                parent_path
1697            }
1698             => parent_path
1699        );
1700        assert_eq!(*path, "$.field_b");
1701
1702        let event = parser.next().unwrap().unwrap();
1703
1704        let path = assert_matches!(
1705            event,
1706            Event::Element {
1707                kind: ValueKind::String,
1708                parent_path
1709            }
1710             => parent_path
1711        );
1712
1713        assert_eq!(*path, "$.field_b.field_c");
1714    }
1715
1716    #[test]
1717    fn should_emit_events_for_array_with_single_field() {
1718        const JSON: &str = r#"["field_a"]"#;
1719
1720        test::setup();
1721
1722        let mut parser = Parser::new(JSON);
1723        let event = parser.next().unwrap().unwrap();
1724
1725        let path = assert_matches!(
1726            event,
1727            Event::Open {
1728                kind: ObjectKind::Array,
1729                parent_path
1730            }
1731             => parent_path
1732        );
1733        assert_matches!(*path, PathNode::Root);
1734
1735        let event = parser.next().unwrap().unwrap();
1736
1737        let path = assert_matches!(
1738            event,
1739            Event::Element {
1740                kind: ValueKind::String,
1741                parent_path
1742            }
1743             => parent_path
1744        );
1745
1746        assert_eq!(*path, "$.0");
1747    }
1748
1749    #[test]
1750    fn should_emit_events_for_array_with_two_fields() {
1751        const JSON: &str = r#"{
1752    "field_a": 404,
1753    "field_b": "name"
1754}"#;
1755
1756        test::setup();
1757
1758        let mut parser = Parser::new(JSON);
1759        let event = parser.next().unwrap().unwrap();
1760
1761        let path = assert_matches!(
1762            event,
1763            Event::Open {
1764                kind: ObjectKind::Object,
1765                parent_path
1766            }
1767             => parent_path
1768        );
1769        assert_matches!(*path, PathNode::Root);
1770
1771        let event = parser.next().unwrap().unwrap();
1772
1773        let path = assert_matches!(
1774            event,
1775            Event::Element {
1776                kind: ValueKind::Number,
1777                parent_path
1778            }
1779             => parent_path
1780        );
1781
1782        assert_eq!(*path, "$.field_a");
1783
1784        let event = parser.next().unwrap().unwrap();
1785
1786        let path = assert_matches!(
1787            event,
1788            Event::Element {
1789                kind: ValueKind::String,
1790                parent_path
1791            }
1792             => parent_path
1793        );
1794
1795        assert_eq!(*path, "$.field_b");
1796    }
1797
1798    #[test]
1799    fn should_emit_events_for_array_with_nested_fields() {
1800        const JSON: &str = r#"{
1801    "field_a": 404,
1802    "field_b": {
1803        "field_c": "name"
1804    }
1805}"#;
1806
1807        test::setup();
1808
1809        let mut parser = Parser::new(JSON);
1810        let event = parser.next().unwrap().unwrap();
1811
1812        let path = assert_matches!(
1813            event,
1814            Event::Open {
1815                kind: ObjectKind::Object,
1816                parent_path
1817            }
1818             => parent_path
1819        );
1820        assert_matches!(*path, PathNode::Root);
1821
1822        let event = parser.next().unwrap().unwrap();
1823
1824        let path = assert_matches!(
1825            event,
1826            Event::Element {
1827                kind: ValueKind::Number,
1828                parent_path
1829            }
1830             => parent_path
1831        );
1832
1833        assert_eq!(*path, "$.field_a");
1834
1835        let event = parser.next().unwrap().unwrap();
1836
1837        let path = assert_matches!(
1838            event,
1839            Event::Open {
1840                kind: ObjectKind::Object,
1841                parent_path
1842            }
1843             => parent_path
1844        );
1845        assert_eq!(*path, "$.field_b");
1846
1847        let event = parser.next().unwrap().unwrap();
1848
1849        let path = assert_matches!(
1850            event,
1851            Event::Element {
1852                kind: ValueKind::String,
1853                parent_path
1854            }
1855             => parent_path
1856        );
1857
1858        assert_eq!(*path, "$.field_b.field_c");
1859    }
1860}
1861
1862#[cfg(test)]
1863pub mod test {
1864    #![allow(clippy::string_slice, reason = "tests are allowed to panic")]
1865
1866    use super::{Error, ErrorKind, Span};
1867
1868    pub fn spanned_json(span: Span, json: &str) -> &str {
1869        &json[span.start..span.end]
1870    }
1871
1872    #[test]
1873    const fn error_should_be_send_and_sync() {
1874        const fn f<T: Send + Sync>() {}
1875
1876        f::<Error>();
1877        f::<ErrorKind>();
1878    }
1879}
1880
1881#[cfg(test)]
1882mod test_parser_basic_happy_structure {
1883    use assert_matches::assert_matches;
1884
1885    use crate::{json::Value, test};
1886
1887    use super::{parse, test::spanned_json, Element, PathNode};
1888
1889    #[test]
1890    fn should_parse_nested_object() {
1891        test::setup();
1892
1893        let json = r#"{ "field_a": "one", "field_b": { "field_ba": "two", "field_bb": "three" } }"#;
1894        let elem = parse(json).unwrap();
1895        let Element {
1896            path_node: path,
1897            value,
1898            span,
1899            id: _,
1900        } = elem;
1901
1902        assert_eq!(*path, PathNode::Root);
1903        assert_eq!(spanned_json(span, json), json);
1904
1905        let fields = assert_matches!(value, Value::Object(elems) => elems);
1906        let [field_a, field_b] = fields.try_into().unwrap();
1907
1908        {
1909            let (_id, path, span, value) = field_a.into_parts();
1910
1911            assert_eq!(*path, "$.field_a");
1912            assert_eq!(spanned_json(span, json), r#""one""#);
1913            let s = assert_matches!(value, Value::String(s) => s);
1914            assert_eq!(s.as_raw(), "one");
1915        }
1916
1917        {
1918            let (_id, path, span, value) = field_b.into_parts();
1919            assert_eq!(*path, "$.field_b");
1920            assert_eq!(
1921                spanned_json(span, json),
1922                r#"{ "field_ba": "two", "field_bb": "three" }"#
1923            );
1924
1925            let fields = assert_matches!(value, Value::Object(fields) => fields);
1926            let [field_b_a, field_b_b] = fields.try_into().unwrap();
1927
1928            {
1929                let (_id, path, span, value) = field_b_a.into_parts();
1930
1931                assert_eq!(spanned_json(span, json), r#""two""#);
1932                assert_eq!(*path, "$.field_b.field_ba");
1933                let s = assert_matches!(value, Value::String(s) => s);
1934                assert_eq!(s.as_raw(), "two");
1935            }
1936
1937            {
1938                let (_id, path, span, value) = field_b_b.into_parts();
1939
1940                assert_eq!(spanned_json(span, json), r#""three""#);
1941                assert_eq!(*path, "$.field_b.field_bb");
1942                let s = assert_matches!(value, Value::String(s) => s);
1943                assert_eq!(s.as_raw(), "three");
1944            }
1945        }
1946    }
1947
1948    #[test]
1949    fn should_parse_object_with_nested_array() {
1950        test::setup();
1951
1952        let json = r#"{ "field_a": "one", "field_b": [ "two", "three" ] }"#;
1953        let elem = parse(json).unwrap();
1954        let Element {
1955            path_node: path,
1956            value,
1957            span,
1958            id: _,
1959        } = elem;
1960
1961        assert_eq!(*path, PathNode::Root);
1962        assert_eq!(spanned_json(span, json), json);
1963
1964        let fields = assert_matches!(value, Value::Object(fields) => fields);
1965        let [field_a, field_b] = fields.try_into().unwrap();
1966
1967        {
1968            let (_id, path, span, value) = field_a.into_parts();
1969
1970            assert_eq!(spanned_json(span, json), r#""one""#);
1971            assert_eq!(*path, "$.field_a");
1972            let s = assert_matches!(value, Value::String(s) => s);
1973            assert_eq!(s.as_raw(), "one");
1974        }
1975
1976        {
1977            let (_id, path, span, value) = field_b.into_parts();
1978            assert_eq!(*path, "$.field_b");
1979            assert_eq!(spanned_json(span, json), r#"[ "two", "three" ]"#);
1980
1981            let elems = assert_matches!(value, Value::Array(elems) => elems);
1982            let [elem_b_a, elem_b_b] = elems.try_into().unwrap();
1983
1984            {
1985                let (_id, path, span, value) = elem_b_a.into_parts();
1986
1987                assert_eq!(spanned_json(span, json), r#""two""#);
1988                assert_eq!(*path, "$.field_b.0");
1989                let s = assert_matches!(value, Value::String(s) => s);
1990                assert_eq!(s.as_raw(), "two");
1991            }
1992
1993            {
1994                let (_id, path, span, value) = elem_b_b.into_parts();
1995
1996                assert_eq!(spanned_json(span, json), r#""three""#);
1997                assert_eq!(*path, "$.field_b.1");
1998                let s = assert_matches!(value, Value::String(s) => s);
1999                assert_eq!(s.as_raw(), "three");
2000            }
2001        }
2002    }
2003
2004    #[test]
2005    fn should_parse_nested_array() {
2006        test::setup();
2007
2008        let json = r#"[ "one", ["two", "three"] ]"#;
2009        let elem = parse(json).unwrap();
2010        let Element {
2011            path_node: path,
2012            value,
2013            span,
2014            id: _,
2015        } = elem;
2016
2017        assert_eq!(*path, PathNode::Root);
2018        assert_eq!(spanned_json(span, json), json);
2019
2020        let elems = assert_matches!(value, Value::Array(elems) => elems);
2021        let [elem_a, elem_b] = elems.try_into().unwrap();
2022
2023        {
2024            let Element {
2025                path_node: path,
2026                value,
2027                span,
2028                id: _,
2029            } = elem_a;
2030
2031            assert_eq!(spanned_json(span, json), r#""one""#);
2032            assert_eq!(*path, "$.0");
2033            let s = assert_matches!(value, Value::String(s) => s);
2034            assert_eq!(s.as_raw(), "one");
2035        }
2036
2037        {
2038            let Element {
2039                path_node: path,
2040                value,
2041                span,
2042                id: _,
2043            } = elem_b;
2044            assert_eq!(*path, "$.1");
2045            assert_eq!(spanned_json(span, json), r#"["two", "three"]"#);
2046
2047            let elems = assert_matches!(value, Value::Array(elems) => elems);
2048            let [elem_b_a, elem_b_b] = elems.try_into().unwrap();
2049
2050            {
2051                let Element {
2052                    path_node: path,
2053                    value,
2054                    span,
2055                    id: _,
2056                } = elem_b_a;
2057
2058                assert_eq!(spanned_json(span, json), r#""two""#);
2059                assert_eq!(*path, "$.1.0");
2060                let s = assert_matches!(value, Value::String(s) => s);
2061                assert_eq!(s.as_raw(), "two");
2062            }
2063
2064            {
2065                let Element {
2066                    path_node: path,
2067                    value,
2068                    span,
2069                    id: _,
2070                } = elem_b_b;
2071
2072                assert_eq!(spanned_json(span, json), r#""three""#);
2073                assert_eq!(*path, "$.1.1");
2074                let s = assert_matches!(value, Value::String(s) => s);
2075                assert_eq!(s.as_raw(), "three");
2076            }
2077        }
2078    }
2079
2080    #[test]
2081    fn should_parse_array_with_nested_object() {
2082        test::setup();
2083
2084        let json = r#"[ "one", {"field_a": "two", "field_b": "three"} ]"#;
2085        let elem = parse(json).unwrap();
2086        let Element {
2087            path_node: path,
2088            value,
2089            span,
2090            id: _,
2091        } = elem;
2092
2093        assert_eq!(*path, PathNode::Root);
2094        assert_eq!(spanned_json(span, json), json);
2095
2096        let elems = assert_matches!(value, Value::Array(elems) => elems);
2097        let [elem_a, elem_b] = elems.try_into().unwrap();
2098
2099        {
2100            let Element {
2101                path_node: path,
2102                value,
2103                span,
2104                id: _,
2105            } = elem_a;
2106
2107            assert_eq!(spanned_json(span, json), r#""one""#);
2108            assert_eq!(*path, "$.0");
2109            let s = assert_matches!(value, Value::String(s) => s);
2110            assert_eq!(s.as_raw(), "one");
2111        }
2112
2113        {
2114            let Element {
2115                path_node: path,
2116                value,
2117                span,
2118                id: _,
2119            } = elem_b;
2120            assert_eq!(*path, "$.1");
2121            assert_eq!(
2122                spanned_json(span, json),
2123                r#"{"field_a": "two", "field_b": "three"}"#
2124            );
2125
2126            let fields = assert_matches!(value, Value::Object(fields) => fields);
2127            let [field_b_a, field_b_b] = fields.try_into().unwrap();
2128
2129            {
2130                let (_id, path, span, value) = field_b_a.into_parts();
2131
2132                assert_eq!(spanned_json(span, json), r#""two""#);
2133                assert_eq!(*path, "$.1.field_a");
2134                let s = assert_matches!(value, Value::String(s) => s);
2135                assert_eq!(s.as_raw(), "two");
2136            }
2137
2138            {
2139                let (_id, path, span, value) = field_b_b.into_parts();
2140
2141                assert_eq!(spanned_json(span, json), r#""three""#);
2142                assert_eq!(*path, "$.1.field_b");
2143                let s = assert_matches!(value, Value::String(s) => s);
2144                assert_eq!(s.as_raw(), "three");
2145            }
2146        }
2147    }
2148}
2149
2150#[cfg(test)]
2151mod test_parser_error_reporting {
2152    #![allow(
2153        clippy::string_slice,
2154        clippy::as_conversions,
2155        reason = "panicking is tests is allowed"
2156    )]
2157
2158    use assert_matches::assert_matches;
2159
2160    use crate::test;
2161
2162    use super::{parse, ErrorKind, SpanBounds, TokenType};
2163
2164    #[test]
2165    fn should_report_trailing_comma() {
2166        const JSON: &str = r#"{
2167   "field_a": "one",
2168   "field_b": "two",
2169}"#;
2170
2171        test::setup();
2172
2173        let err = parse(JSON).unwrap_err();
2174
2175        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2176        assert_matches!(
2177            err.token().unwrap().kind,
2178            TokenType::Comma,
2179            "We are parsing a comma when we realize that it should not be there"
2180        );
2181        assert_eq!(*err.path(), "$.field_b");
2182
2183        let report = err.into_report(JSON);
2184
2185        assert_eq!(report.json_context(), r#""two","#);
2186        let SpanBounds { start, end } = report.span_bounds();
2187        assert_eq!(*start, (2, 14));
2188        assert_eq!(*end, (2, 20));
2189        assert_eq!(report.expand_json_context(), r#"   "field_b": "two","#);
2190    }
2191
2192    #[test]
2193    fn should_report_invalid_json() {
2194        const JSON: &str = r#"{
2195"field_"#;
2196
2197        test::setup();
2198
2199        let err = parse(JSON).unwrap_err();
2200
2201        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2202        assert_matches!(
2203            err.token().unwrap().kind,
2204            TokenType::Invalid,
2205            "We are parsing a string not ended be a double quote"
2206        );
2207        assert_eq!(*err.path(), "$");
2208
2209        let report = err.into_report(JSON);
2210
2211        assert_eq!(report.json_context(), r#""field_"#);
2212        let SpanBounds { start, end } = report.span_bounds();
2213        assert_eq!(*start, (1, 0));
2214        assert_eq!(*end, (1, 7));
2215        assert_eq!(report.expand_json_context(), r#""field_"#);
2216    }
2217
2218    #[test]
2219    fn should_report_invalid_json_in_some_place() {
2220        const JSON: &str = r#"{
2221"field_a": "Barney",
2222"field_"#;
2223
2224        test::setup();
2225
2226        let err = parse(JSON).unwrap_err();
2227
2228        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2229        assert_matches!(
2230            err.token().unwrap().kind,
2231            TokenType::Invalid,
2232            "We are parsing a string not ended be a double quote"
2233        );
2234        assert_eq!(*err.path(), "$");
2235
2236        let report = err.into_report(JSON);
2237
2238        assert_eq!(report.json_context(), r#""field_"#);
2239        let SpanBounds { start, end } = report.span_bounds();
2240        assert_eq!(*start, (2, 0));
2241        assert_eq!(*end, (2, 7));
2242        assert_eq!(report.expand_json_context(), r#""field_"#);
2243    }
2244
2245    #[test]
2246    fn should_report_invalid_json_in_some_place_in_the_middle() {
2247        const JSON: &str = r#"{
2248"field_a": "Barney",
2249"field_b",
2250"field_c": "Fred" }
2251"#;
2252
2253        test::setup();
2254
2255        let err = parse(JSON).unwrap_err();
2256
2257        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2258        assert_matches!(
2259            err.token().unwrap().kind,
2260            TokenType::Comma,
2261            "We are parsing a key value pair but the key is followed by comma."
2262        );
2263        assert_eq!(*err.path(), "$.field_a");
2264
2265        let report = err.into_report(JSON);
2266
2267        assert_eq!(
2268            report.json_context(),
2269            r#""Barney",
2270"field_b","#
2271        );
2272        let SpanBounds { start, end } = report.span_bounds();
2273        assert_eq!(*start, (1, 11));
2274        assert_eq!(*end, (2, 10));
2275        assert_eq!(report.expand_json_context(), r#""field_a": "Barney","#);
2276    }
2277
2278    #[test]
2279    fn should_report_missing_comma() {
2280        const JSON: &str = r#"{
2281   "field_a": "one"
2282   "field_b": "two"
2283}"#;
2284
2285        test::setup();
2286
2287        let err = parse(JSON).unwrap_err();
2288
2289        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2290        assert_matches!(
2291            err.token().unwrap().kind,
2292            TokenType::String,
2293            "We are parsing a String when we realize that there should be a comma"
2294        );
2295        assert_eq!(*err.path(), "$.field_a");
2296
2297        let report = err.into_report(JSON);
2298
2299        assert_eq!(
2300            report.json_context(),
2301            r#""one"
2302   "field_b""#
2303        );
2304        let SpanBounds { start, end } = report.span_bounds();
2305        assert_eq!(*start, (1, 14));
2306        assert_eq!(*end, (2, 12));
2307        assert_eq!(report.expand_json_context(), r#"   "field_a": "one""#);
2308    }
2309}
2310
2311#[cfg(test)]
2312mod test_type_sizes {
2313    use std::mem::size_of;
2314
2315    use super::{
2316        Element, Error, ErrorImpl, PartialElement, Path, PathNode, PathNodeRef, RawStr, Span,
2317        Token, TokenType, Value,
2318    };
2319
2320    #[test]
2321    #[cfg(target_pointer_width = "64")]
2322    fn should_match_sizes() {
2323        assert_eq!(size_of::<Element<'_>>(), 72);
2324        assert_eq!(size_of::<Error>(), 8);
2325        assert_eq!(size_of::<ErrorImpl>(), 96);
2326        assert_eq!(size_of::<PartialElement<'_>>(), 56);
2327        assert_eq!(size_of::<Path>(), 24);
2328        assert_eq!(size_of::<PathNode<'_>>(), 48);
2329        assert_eq!(size_of::<PathNodeRef<'_>>(), 8);
2330        assert_eq!(size_of::<RawStr<'_>>(), 32);
2331        assert_eq!(size_of::<Span>(), 16);
2332        assert_eq!(size_of::<Token>(), 24);
2333        assert_eq!(size_of::<TokenType>(), 1);
2334        assert_eq!(size_of::<Value<'_>>(), 40);
2335    }
2336}