ocpi_tariffs/json/
parser.rs

1//! A JSON parse built to keep track of where each element came from in the input.
2use std::borrow::{Borrow, Cow};
3use std::iter::Peekable;
4use std::num::TryFromIntError;
5use std::str::Bytes;
6use std::sync::atomic::AtomicUsize;
7use std::{fmt, rc::Rc};
8
9use json_tools::{Buffer, BufferType};
10use tracing::{debug, trace};
11
12use crate::Caveat;
13
14use super::{
15    decode::{self, unescape_str},
16    Element, Field, ObjectKind, PathNode, PathNodeRef, Value, ValueKind,
17};
18use super::{ElemId, Path};
19
20/// We peek at the next `Token` when asserting on trailing commas.
21type Lexer<'buf> = Peekable<json_tools::Lexer<Bytes<'buf>>>;
22
23/// Parse the JSON into a tree of [`Element`]s.
24pub(crate) fn parse(json: &str) -> Result<Element<'_>, Error> {
25    let parser = Parser::new(json);
26
27    // When just parsing the JSON into an `Element` we only care about the final event
28    // when the JSON has been completely transformed into a root element.
29    for event in parser {
30        if let Event::Complete(element) = event? {
31            return Ok(element);
32        }
33    }
34
35    Err(ErrorKind::UnexpectedEOF
36        .into_partial_error_without_token()
37        .with_root_path())
38}
39
40/// A parsing event emitted for each call to the `<Parser as Iterator>::next` function.
41#[derive(Debug)]
42pub(crate) enum Event<'buf> {
43    /// An [`Element`] has been opened and it's construction is in progerss.
44    Open {
45        kind: ObjectKind,
46        parent_path: PathNodeRef<'buf>,
47    },
48
49    /// An [`Element`] has been created and added to it's parent [`Element`].
50    ///
51    /// If the kind is `Array` or `Object` that means that this element is closed: it's construction is complete.
52    Element {
53        /// The kind of JSON value the [`Element`] is.
54        kind: ValueKind,
55        /// The path to the parent [`Element`].
56        parent_path: PathNodeRef<'buf>,
57    },
58
59    /// The parse has completed creating the tree of [`Element`]s.
60    Complete(Element<'buf>),
61}
62
63/// The context needed to parse a single chunk of JSON.
64pub(crate) struct Parser<'buf> {
65    /// Used to assign a unique [`ElemId`] to each [`Element`].
66    elem_count: AtomicUsize,
67
68    /// True if the `Parser` is complete.
69    ///
70    /// Any further calls to [`Parser::next`] will return `None`.
71    complete: bool,
72
73    /// The source JSON we're parsing.
74    json: &'buf str,
75
76    /// The JSON lexer.
77    lexer: Lexer<'buf>,
78
79    /// The pool with pre-allocated `Path`s.
80    path_pool: PathPool<'buf>,
81
82    /// The stack to track nested objects.
83    stack: Stack<'buf>,
84
85    /// The previous token seen.
86    token: Option<Token>,
87}
88
89/// Define our own `TokenType` so Clone can be defined on it.
90///
91/// This can be removed when `json_tools::TokenType` impl's `Clone`.
92#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
93pub enum TokenType {
94    /// `false`
95    BooleanFalse,
96    /// `true`
97    BooleanTrue,
98
99    /// `]`
100    BracketClose,
101    /// `[`
102    BracketOpen,
103
104    /// `:`
105    Colon,
106
107    /// `,`
108    Comma,
109
110    /// `}`
111    CurlyClose,
112    /// `{`
113    CurlyOpen,
114
115    /// The type of the token could not be identified.
116    /// Should be removed if this lexer is ever to be feature complete
117    Invalid,
118
119    /// `null`
120    Null,
121
122    /// A Number, like `1.1234` or `123` or `-0.0` or `-1` or `.0` or `.`
123    Number,
124
125    /// A json string , like `"foo"`
126    String,
127}
128
129impl TokenType {
130    fn as_str(self) -> &'static str {
131        match self {
132            TokenType::BooleanFalse => "false",
133            TokenType::BooleanTrue => "true",
134            TokenType::BracketClose => "]",
135            TokenType::BracketOpen => "[",
136            TokenType::Colon => ":",
137            TokenType::Comma => ",",
138            TokenType::CurlyClose => "}",
139            TokenType::CurlyOpen => "{",
140            TokenType::Invalid => "<invalid>",
141            TokenType::Null => "null",
142            TokenType::Number => "<number>",
143            TokenType::String => "<string>",
144        }
145    }
146}
147
148impl fmt::Display for TokenType {
149    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
150        f.write_str(self.as_str())
151    }
152}
153
154impl From<json_tools::TokenType> for TokenType {
155    fn from(value: json_tools::TokenType) -> Self {
156        match value {
157            json_tools::TokenType::BooleanFalse => TokenType::BooleanFalse,
158            json_tools::TokenType::BooleanTrue => TokenType::BooleanTrue,
159            json_tools::TokenType::BracketClose => TokenType::BracketClose,
160            json_tools::TokenType::BracketOpen => TokenType::BracketOpen,
161            json_tools::TokenType::CurlyClose => TokenType::CurlyClose,
162            json_tools::TokenType::CurlyOpen => TokenType::CurlyOpen,
163            json_tools::TokenType::Colon => TokenType::Colon,
164            json_tools::TokenType::Comma => TokenType::Comma,
165            json_tools::TokenType::Invalid => TokenType::Invalid,
166            json_tools::TokenType::Null => TokenType::Null,
167            json_tools::TokenType::Number => TokenType::Number,
168            json_tools::TokenType::String => TokenType::String,
169        }
170    }
171}
172
173/// A lexical token, identifying its kind and span.
174///
175/// We define our own `Token` as the `json_tools::Token` defines a `Buffer` that can be heap allocated
176/// or a `Span`. We only use the `Span` variant.
177///
178/// Our `Token` can also impl `Copy` and `Clone` as the size and semantics are acceptable.
179#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
180pub struct Token {
181    /// The exact type of the token
182    pub kind: TokenType,
183
184    /// The span allows to reference back into the source byte stream
185    /// to obtain the string making up the token.
186    /// Please note that for control characters, booleans and null (i.e
187    pub span: Span,
188}
189
190impl Token {
191    /// Return true is the token is a opening brace.
192    fn is_opening(&self) -> bool {
193        matches!(self.kind, TokenType::CurlyOpen | TokenType::BracketOpen)
194    }
195
196    /// Return true is the token is a closing brace.
197    fn is_closing(&self) -> bool {
198        matches!(self.kind, TokenType::CurlyClose | TokenType::BracketClose)
199    }
200
201    /// Return true is the token is a comma.
202    fn is_comma(&self) -> bool {
203        matches!(self.kind, TokenType::Comma)
204    }
205}
206
207impl fmt::Display for Token {
208    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
209        write!(
210            f,
211            "token: {}, ({},{})",
212            self.kind, self.span.start, self.span.end
213        )
214    }
215}
216
217impl TryFrom<json_tools::Token> for Token {
218    type Error = PartialError;
219
220    fn try_from(token: json_tools::Token) -> Result<Self, Self::Error> {
221        let json_tools::Token { kind, buf } = token;
222        let kind = kind.into();
223        let Buffer::Span(span) = &buf else {
224            return Err(InternalError::BufferType.into_partial_error(None));
225        };
226
227        let span = span
228            .try_into()
229            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
230
231        Ok(Self { kind, span })
232    }
233}
234
235impl TryFrom<&json_tools::Token> for Token {
236    type Error = PartialError;
237
238    fn try_from(token: &json_tools::Token) -> Result<Self, Self::Error> {
239        let json_tools::Token { kind, buf } = token;
240        let kind = kind.clone().into();
241        let Buffer::Span(span) = &buf else {
242            return Err(InternalError::BufferType.into_partial_error(None));
243        };
244
245        let span = span
246            .try_into()
247            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
248
249        Ok(Self { kind, span })
250    }
251}
252
253impl<'buf> Parser<'buf> {
254    pub fn new(json: &'buf str) -> Self {
255        let lexer = json_tools::Lexer::new(json.bytes(), BufferType::Span).peekable();
256
257        Self {
258            elem_count: AtomicUsize::new(0),
259            complete: false,
260            json,
261            lexer,
262            path_pool: PathPool::default(),
263            stack: Stack::new(),
264            token: None,
265        }
266    }
267
268    fn next_elem_id(&self) -> ElemId {
269        let id = self
270            .elem_count
271            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
272
273        ElemId(id)
274    }
275
276    fn expect_next(&mut self) -> Result<Token, PartialError> {
277        let Some(token) = self.lexer.next() else {
278            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
279        };
280
281        let token = token.try_into()?;
282
283        Ok(token)
284    }
285
286    /// Return an `Err` if the next token is not the expected.
287    fn expect_token(&mut self, token_type: TokenType) -> Result<(), PartialError> {
288        let Some(token) = self.lexer.next() else {
289            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
290        };
291
292        let token: Token = token.try_into()?;
293
294        if token.kind == token_type {
295            Ok(())
296        } else {
297            Err(unexpected_token(token))
298        }
299    }
300
301    fn next_event(&mut self) -> Result<Option<Event<'buf>>, Error> {
302        if self.complete {
303            return Ok(None);
304        }
305
306        let head = self.stack.pop_head();
307
308        match head {
309            None => {
310                let token = self.expect_next().with_root_path()?;
311
312                trace!(?token);
313                self.comma_checks(&token).with_root_path()?;
314
315                match token.kind {
316                    TokenType::CurlyOpen => {
317                        let parent_path = self.path_pool.root();
318                        self.stack.push_new_object(
319                            self.next_elem_id(),
320                            Rc::clone(&parent_path),
321                            &token,
322                        );
323                        Ok(Some(Event::Open {
324                            kind: ObjectKind::Object,
325                            parent_path,
326                        }))
327                    }
328                    TokenType::BracketOpen => {
329                        let parent_path = self.path_pool.root();
330                        self.stack.push_new_array(
331                            self.next_elem_id(),
332                            Rc::clone(&parent_path),
333                            &token,
334                        );
335                        Ok(Some(Event::Open {
336                            kind: ObjectKind::Array,
337                            parent_path,
338                        }))
339                    }
340                    TokenType::Number => {
341                        let value = Value::Number(token_str(self.json, &token).with_root_path()?);
342                        self.exit_with_value(token, value).with_root_path()
343                    }
344                    TokenType::Null => self.exit_with_value(token, Value::Null).with_root_path(),
345                    TokenType::String => {
346                        let value =
347                            Value::String(token_str_as_string(self.json, token).with_root_path()?);
348                        self.exit_with_value(token, value).with_root_path()
349                    }
350                    TokenType::BooleanTrue => {
351                        self.exit_with_value(token, Value::True).with_root_path()
352                    }
353                    TokenType::BooleanFalse => {
354                        self.exit_with_value(token, Value::False).with_root_path()
355                    }
356                    _ => Err(unexpected_token(token).with_root_path()),
357                }
358            }
359            Some(mut head) => {
360                let token = self.expect_next().with_head(&head)?;
361
362                trace!(?token, head = ?head.elem_type);
363                let token = if self.comma_checks(&token).with_head(&head)? {
364                    self.expect_next().with_head(&head)?
365                } else {
366                    token
367                };
368
369                let (value, token, path) = match head.elem_type {
370                    ObjectKind::Object => {
371                        let key = match token.kind {
372                            TokenType::String => {
373                                token_str_as_string(self.json, token).with_head(&head)?
374                            }
375                            TokenType::CurlyClose => {
376                                let event = self.close_element(head, &token)?;
377                                return Ok(event);
378                            }
379                            _ => return Err(unexpected_token(token).with_root_path()),
380                        };
381
382                        self.expect_token(TokenType::Colon).with_head(&head)?;
383                        let token = self.expect_next().with_head(&head)?;
384
385                        let value = match token.kind {
386                            TokenType::CurlyOpen => {
387                                let parent_path = head.parent_is_object(&mut self.path_pool, key);
388                                self.stack.push_head(head);
389                                self.stack.push_new_object(
390                                    self.next_elem_id(),
391                                    Rc::clone(&parent_path),
392                                    &token,
393                                );
394                                return Ok(Some(Event::Open {
395                                    kind: ObjectKind::Object,
396                                    parent_path,
397                                }));
398                            }
399                            TokenType::BracketOpen => {
400                                let parent_path = head.parent_is_object(&mut self.path_pool, key);
401                                self.stack.push_head(head);
402                                self.stack.push_new_array(
403                                    self.next_elem_id(),
404                                    Rc::clone(&parent_path),
405                                    &token,
406                                );
407                                return Ok(Some(Event::Open {
408                                    kind: ObjectKind::Array,
409                                    parent_path,
410                                }));
411                            }
412                            TokenType::CurlyClose => {
413                                let event = self.close_element(head, &token)?;
414                                return Ok(event);
415                            }
416                            TokenType::String => Value::String(
417                                token_str_as_string(self.json, token).with_head(&head)?,
418                            ),
419                            TokenType::Number => {
420                                Value::Number(token_str(self.json, &token).with_head(&head)?)
421                            }
422                            TokenType::Null => Value::Null,
423                            TokenType::BooleanTrue => Value::True,
424                            TokenType::BooleanFalse => Value::False,
425                            _ => return Err(unexpected_token(token).with_head(&head)),
426                        };
427
428                        (
429                            value,
430                            token,
431                            head.parent_is_object(&mut self.path_pool, key),
432                        )
433                    }
434                    ObjectKind::Array => {
435                        let value = match token.kind {
436                            TokenType::CurlyOpen => {
437                                let parent_path = head.parent_is_array(&mut self.path_pool);
438                                self.stack.push_head(head);
439                                self.stack.push_new_object(
440                                    self.next_elem_id(),
441                                    Rc::clone(&parent_path),
442                                    &token,
443                                );
444                                return Ok(Some(Event::Open {
445                                    kind: ObjectKind::Object,
446                                    parent_path,
447                                }));
448                            }
449                            TokenType::BracketOpen => {
450                                let parent_path = head.parent_is_array(&mut self.path_pool);
451                                self.stack.push_head(head);
452                                self.stack.push_new_array(
453                                    self.next_elem_id(),
454                                    Rc::clone(&parent_path),
455                                    &token,
456                                );
457                                return Ok(Some(Event::Open {
458                                    kind: ObjectKind::Array,
459                                    parent_path,
460                                }));
461                            }
462                            TokenType::BracketClose => {
463                                let event = self.close_element(head, &token)?;
464                                return Ok(event);
465                            }
466
467                            TokenType::String => Value::String(
468                                token_str_as_string(self.json, token).with_head(&head)?,
469                            ),
470                            TokenType::Number => {
471                                Value::Number(token_str(self.json, &token).with_head(&head)?)
472                            }
473                            TokenType::Null => Value::Null,
474                            TokenType::BooleanTrue => Value::True,
475                            TokenType::BooleanFalse => Value::False,
476                            _ => return Err(unexpected_token(token).with_head(&head)),
477                        };
478                        (value, token, head.parent_is_array(&mut self.path_pool))
479                    }
480                };
481
482                let event = Event::Element {
483                    kind: value.kind(),
484                    parent_path: Rc::clone(&path),
485                };
486                head.push_field(self.next_elem_id(), path, value, &token);
487
488                let peek_token = self.peek(&token).with_head(&head)?;
489
490                if !(peek_token.is_comma() || peek_token.is_closing()) {
491                    return Err(unexpected_token(peek_token).with_head(&head));
492                }
493
494                self.token.replace(token);
495                self.stack.push_head(head);
496
497                Ok(Some(event))
498            }
499        }
500    }
501
502    /// Close a [`PartialElement`] which creates an [`Element`] and returns an [`Event`]
503    fn close_element(
504        &mut self,
505        head: PartialElement<'buf>,
506        token: &Token,
507    ) -> Result<Option<Event<'buf>>, Error> {
508        let event = self.stack.head_into_element(head, token);
509
510        match event {
511            Pop::Element { kind, parent_path } => Ok(Some(Event::Element { kind, parent_path })),
512            Pop::Complete(element) => {
513                if let Some(token) = self.lexer.next() {
514                    let token = token.try_into().with_root_path()?;
515                    return Err(unexpected_token(token).with_root_path());
516                }
517
518                Ok(Some(Event::Complete(element)))
519            }
520        }
521    }
522
523    fn exit_with_value(
524        &mut self,
525        token: Token,
526        value: Value<'buf>,
527    ) -> Result<Option<Event<'buf>>, PartialError> {
528        self.complete = true;
529        let span = element_span(&token, 0);
530        let elem = Element::new(self.next_elem_id(), Rc::new(PathNode::Root), span, value);
531
532        if let Some(token) = self.lexer.next() {
533            let token = token.try_into()?;
534            return Err(unexpected_token(token));
535        }
536
537        Ok(Some(Event::Complete(elem)))
538    }
539
540    fn peek(&mut self, token: &Token) -> Result<Token, PartialError> {
541        let Some(peek_token) = self.lexer.peek() else {
542            return Err(ErrorKind::UnexpectedEOF.into_partial_error(Some(*token)));
543        };
544
545        let peek_token = peek_token.try_into()?;
546        Ok(peek_token)
547    }
548
549    /// Perform comma position checks
550    ///
551    /// Return `Err(unexpected)` if a trailing or rogue comma is found.
552    fn comma_checks(&mut self, token: &Token) -> Result<bool, PartialError> {
553        trace!(?token, "comma_checks");
554        let is_comma = token.is_comma();
555
556        if is_comma {
557            let peek_token = self.peek(token)?;
558
559            // A comma can only be followed by an opening brace or a value.
560            if peek_token.is_closing() {
561                return Err(unexpected_token(*token));
562            }
563
564            if peek_token.is_comma() {
565                return Err(unexpected_token(peek_token));
566            }
567        } else if token.is_opening() {
568            let peek_token = self.peek(token)?;
569
570            // An opening brace should not be followed by a comma.
571            if peek_token.is_comma() {
572                return Err(unexpected_token(peek_token));
573            }
574        }
575
576        Ok(is_comma)
577    }
578}
579
580/// Create an [`PartialError`] with [`ErrorKind::UnexpectedToken`].
581#[track_caller]
582fn unexpected_token(token: Token) -> PartialError {
583    ErrorKind::UnexpectedToken.into_partial_error(Some(token))
584}
585
586impl<'buf> Iterator for Parser<'buf> {
587    type Item = Result<Event<'buf>, Error>;
588
589    fn next(&mut self) -> Option<Self::Item> {
590        match self.next_event() {
591            Ok(event) => event.map(Ok),
592            Err(err) => {
593                self.complete = true;
594                Some(Err(err))
595            }
596        }
597    }
598}
599
600/// An partial `Element` that we desend into and parse it's child `Element`s.
601#[derive(Debug)]
602struct PartialElement<'buf> {
603    /// The Id of the [`Element`] to be created.
604    elem_id: ElemId,
605
606    /// The type of [`Element`].
607    elem_type: ObjectKind,
608
609    /// The child [`Element`]s.
610    ///
611    /// This is filled as we parse the current JSON [`Element`].
612    elements: Vec<Element<'buf>>,
613
614    /// The path up to the [`Element`].
615    path: PathNodeRef<'buf>,
616
617    /// The index of the [`Element`]'s first byte.
618    span_start: usize,
619}
620
621impl<'buf> PartialElement<'buf> {
622    fn parent_is_object(
623        &self,
624        path_pool: &mut PathPool<'buf>,
625        key: RawStr<'buf>,
626    ) -> PathNodeRef<'buf> {
627        path_pool.object(Rc::clone(&self.path), key)
628    }
629
630    fn parent_is_array(&self, path_pool: &mut PathPool<'buf>) -> PathNodeRef<'buf> {
631        path_pool.array(Rc::clone(&self.path), self.elements.len())
632    }
633
634    fn push_field(
635        &mut self,
636        elem_id: ElemId,
637        path: PathNodeRef<'buf>,
638        value: Value<'buf>,
639        token: &Token,
640    ) {
641        let span = element_span(token, token.span.start);
642        let elem = Element::new(elem_id, path, span, value);
643        self.elements.push(elem);
644    }
645
646    /// Resolve the `PartialElement` to an `Element`.
647    fn into_element(self, token: &Token) -> Element<'buf> {
648        let span = element_span(token, self.span_start);
649
650        let PartialElement {
651            elem_type,
652            span_start: _,
653            elements,
654            path,
655            elem_id,
656        } = self;
657
658        let value = match elem_type {
659            ObjectKind::Object => {
660                let fields = elements.into_iter().map(Field).collect();
661                Value::Object(fields)
662            }
663            ObjectKind::Array => Value::Array(elements),
664        };
665
666        Element::new(elem_id, path, span, value)
667    }
668}
669
670/// `Path`s are added and never removed.
671struct PathPool<'buf> {
672    index: usize,
673    items: Vec<Rc<PathNode<'buf>>>,
674}
675
676impl Default for PathPool<'_> {
677    fn default() -> Self {
678        Self::with_capacity(1000)
679    }
680}
681
682impl<'buf> PathPool<'buf> {
683    fn with_capacity(capacity: usize) -> Self {
684        let capacity = capacity.max(1);
685        let mut items = Vec::with_capacity(capacity);
686        items.resize_with(capacity, Default::default);
687
688        Self { index: 0, items }
689    }
690
691    #[expect(
692        clippy::indexing_slicing,
693        reason = "The root Path is added in the constructor and the capacity is always at least 1"
694    )]
695    fn root(&self) -> PathNodeRef<'buf> {
696        Rc::clone(&self.items[0])
697    }
698
699    /// Add a new `Path::Array` with the given index.
700    fn array(&mut self, parent: PathNodeRef<'buf>, index: usize) -> PathNodeRef<'buf> {
701        self.push(PathNode::Array { parent, index })
702    }
703
704    /// Add a new `Path::Object` with the given index.
705    fn object(&mut self, parent: PathNodeRef<'buf>, key: RawStr<'buf>) -> PathNodeRef<'buf> {
706        self.push(PathNode::Object { parent, key })
707    }
708
709    #[expect(clippy::indexing_slicing, reason = "Paths are only added")]
710    fn push(&mut self, new_path: PathNode<'buf>) -> PathNodeRef<'buf> {
711        const GROWTH_FACTOR: usize = 2;
712
713        let Self { index, items } = self;
714        let next_index = *index + 1;
715
716        if next_index >= items.len() {
717            items.reserve(items.len() * GROWTH_FACTOR);
718            items.resize_with(items.capacity(), Default::default);
719        }
720
721        let path = &mut items[next_index];
722        debug_assert_eq!(Rc::strong_count(path), 1, "Paths are only added");
723        let path = Rc::get_mut(path).expect("Paths are only added");
724        *path = new_path;
725
726        let path = Rc::clone(&items[next_index]);
727        *index = next_index;
728        path
729    }
730}
731
732/// The `Span` defines the range of bytes that delimits a JSON `Element`.
733#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
734pub struct Span {
735    /// Index of the first the byte
736    pub start: usize,
737
738    /// Index one past the last byte
739    pub end: usize,
740}
741
742impl TryFrom<&json_tools::Span> for Span {
743    type Error = TryFromIntError;
744
745    fn try_from(span: &json_tools::Span) -> Result<Self, Self::Error> {
746        let json_tools::Span { first, end } = span;
747        let start = usize::try_from(*first)?;
748        let end = usize::try_from(*end)?;
749        Ok(Span { start, end })
750    }
751}
752
753struct Stack<'buf>(Vec<PartialElement<'buf>>);
754
755enum Pop<'buf> {
756    /// An [`Element`] has been created and added to it's parent [`Element`].
757    Element {
758        kind: ValueKind,
759        parent_path: PathNodeRef<'buf>,
760    },
761
762    /// The parse has completed creating the tree of [`Element`]s.
763    Complete(Element<'buf>),
764}
765
766impl<'buf> Stack<'buf> {
767    fn new() -> Self {
768        Self(vec![])
769    }
770
771    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
772    /// stack `Vec` contains it.
773    fn pop_head(&mut self) -> Option<PartialElement<'buf>> {
774        self.0.pop()
775    }
776
777    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
778    /// stack `Vec` contains it.
779    fn push_head(&mut self, head: PartialElement<'buf>) {
780        self.0.push(head);
781    }
782
783    /// Convert the head `PartialElement` into an `Element` using the parent to form the path.
784    fn head_into_element(&mut self, head: PartialElement<'buf>, token: &Token) -> Pop<'buf> {
785        let elem = head.into_element(token);
786
787        if let Some(parent) = self.0.last_mut() {
788            let event = Pop::Element {
789                kind: elem.value.kind(),
790                parent_path: elem.path_node(),
791            };
792            parent.elements.push(elem);
793            event
794        } else {
795            Pop::Complete(elem)
796        }
797    }
798
799    fn push_new_object(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
800        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Object);
801    }
802
803    fn push_new_array(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
804        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Array);
805    }
806
807    fn push_new_elem(
808        &mut self,
809        elem_id: ElemId,
810        parent_path: PathNodeRef<'buf>,
811        token: &Token,
812        elem_type: ObjectKind,
813    ) {
814        let partial = PartialElement {
815            elements: vec![],
816            elem_type,
817            path: parent_path,
818            span_start: token.span.start,
819            elem_id,
820        };
821        self.0.push(partial);
822    }
823}
824
825/// A parsing Error that keeps track of the token being parsed when the Error occurred and
826/// the slice of JSON surrounding the Error location.
827pub struct Error(Box<ErrorImpl>);
828
829impl fmt::Debug for Error {
830    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
831        fmt::Debug::fmt(&self.0, f)
832    }
833}
834
835impl From<ErrorImpl> for Error {
836    fn from(err: ErrorImpl) -> Self {
837        Self(err.into())
838    }
839}
840
841struct ErrorImpl {
842    /// The kind of error that occurred.
843    kind: ErrorKind,
844
845    /// The location the [`Error`] happened in the source code.
846    loc: &'static std::panic::Location<'static>,
847
848    /// The path to the [`Element`] the error occurred in.
849    path: Path,
850
851    /// The span of the JSON string the error occurred in.
852    span: Span,
853
854    /// The token being parsed at the time of the [`Error`].
855    token: Option<Token>,
856}
857
858impl fmt::Debug for ErrorImpl {
859    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
860        f.debug_struct("Error")
861            .field("kind", &self.kind)
862            .field("loc", &self.loc)
863            .field("path", &self.path)
864            .field("span", &self.span)
865            .field("token", &self.token)
866            .finish()
867    }
868}
869
870impl Error {
871    /// The kind of error that occurred.
872    pub fn kind(&self) -> &ErrorKind {
873        &self.0.kind
874    }
875
876    /// The path to the element the error occurred in.
877    pub fn path(&self) -> &Path {
878        &self.0.path
879    }
880
881    /// The span of the [`Element`] the error occurred in.
882    pub fn span(&self) -> Span {
883        self.0.span
884    }
885
886    pub fn token(&self) -> Option<&Token> {
887        self.0.token.as_ref()
888    }
889
890    /// Break the Error into it's constituent parts.
891    pub fn into_parts(self) -> (ErrorKind, Path, Span) {
892        let ErrorImpl {
893            kind,
894            loc: _,
895            path,
896            span,
897            token: _,
898        } = *self.0;
899        (kind, path, span)
900    }
901
902    /// Convert the Error into a more comprehensive report using the source JSON to provide
903    /// human readable context.
904    pub fn into_report(self, json: &str) -> ErrorReport<'_> {
905        ErrorReport::from_error(self, json)
906    }
907}
908
909/// A more comprehensive report on the [`Error`] using the source JSON `&str` to provide
910/// human readable context.
911#[derive(Debug)]
912pub struct ErrorReport<'buf> {
913    /// The [`Error`] that occurred.
914    error: Error,
915
916    /// The slice of JSON as defined by the `Error::span`.
917    json_context: &'buf str,
918
919    /// The slice of JSON as defined by the `Error::span` and expanded out to the
920    /// start and end of the line.
921    expanded_json_context: &'buf str,
922
923    /// The line and col indices of the start and end of the span.
924    span_bounds: SpanBounds,
925}
926
927impl<'buf> ErrorReport<'buf> {
928    /// Create the `ErrorReport` from the `Error` and source `&str`.
929    fn from_error(error: Error, json: &'buf str) -> Self {
930        let span = error.span();
931        debug!(?error, ?span, json, "from_error");
932        let json_context = &json.get(span.start..span.end).unwrap_or(json);
933
934        let start = {
935            let s = &json.get(0..span.start).unwrap_or_default();
936            line_col(s)
937        };
938        let end = {
939            let relative_end = line_col(json_context);
940            let line = start.line + relative_end.line;
941
942            if start.line == line {
943                LineCol {
944                    line,
945                    col: start.col + relative_end.col,
946                }
947            } else {
948                LineCol {
949                    line,
950                    col: relative_end.col,
951                }
952            }
953        };
954        let (prev, next) = find_expanded_newlines(json, span.start);
955        let expanded_json_context = &json.get(prev..next).unwrap_or(json_context);
956
957        let span_bounds = SpanBounds { start, end };
958
959        Self {
960            error,
961            json_context,
962            expanded_json_context,
963            span_bounds,
964        }
965    }
966
967    /// Return the slice of JSON as defined by the `Error::span`.
968    pub fn json_context(&self) -> &'buf str {
969        self.json_context
970    }
971
972    /// Return the slice of JSON as defined by the `Error::span` and expanded out to the
973    /// start and end of the line.
974    pub fn expand_json_context(&self) -> &'buf str {
975        self.expanded_json_context
976    }
977
978    /// Return the line and col number of each end of the span
979    pub fn span_bounds(&self) -> &SpanBounds {
980        &self.span_bounds
981    }
982
983    /// Discard the `Report` and take ownership of the `Error`.
984    pub fn into_error(self) -> Error {
985        self.error
986    }
987}
988
989fn find_expanded_newlines(json: &str, byte_index: usize) -> (usize, usize) {
990    let pre = json.get(..byte_index).unwrap_or(json);
991    let post = json.get(byte_index..).unwrap_or(json);
992
993    let mut bytes = pre.as_bytes().iter().rev();
994    let prev = pre.len() - bytes.position(|b| *b == b'\n').unwrap_or_default();
995
996    let mut bytes = post.as_bytes().iter();
997    let next = bytes
998        .position(|b| *b == b'\n')
999        .map(|idx| idx + byte_index)
1000        .unwrap_or(prev + post.len());
1001
1002    (prev, next)
1003}
1004
1005/// The line and col indices of the start and end of the span.
1006#[derive(Clone, Debug)]
1007pub struct SpanBounds {
1008    /// The start of the `Span` expressed as line and column index.
1009    pub start: LineCol,
1010
1011    /// The end of the `Span` expressed as line and column index.
1012    pub end: LineCol,
1013}
1014
1015/// A file location expressed as line and column.
1016#[derive(Clone, Debug)]
1017pub struct LineCol {
1018    /// The line index is 0 based.
1019    pub line: u32,
1020
1021    /// The col index is 0 based.
1022    pub col: u32,
1023}
1024
1025impl From<(u32, u32)> for LineCol {
1026    fn from(value: (u32, u32)) -> Self {
1027        Self {
1028            line: value.0,
1029            col: value.1,
1030        }
1031    }
1032}
1033
1034impl From<LineCol> for (u32, u32) {
1035    fn from(value: LineCol) -> Self {
1036        (value.line, value.col)
1037    }
1038}
1039
1040impl PartialEq<(u32, u32)> for LineCol {
1041    fn eq(&self, other: &(u32, u32)) -> bool {
1042        self.line == other.0 && self.col == other.1
1043    }
1044}
1045
1046impl fmt::Display for LineCol {
1047    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1048        write!(f, "{}:{}", self.line, self.col)
1049    }
1050}
1051
1052/// Return the line and column indices of the end of the slice.
1053///
1054/// The line and column indices are zero based.
1055pub fn line_col(s: &str) -> LineCol {
1056    let mut chars = s.chars().rev();
1057    let mut line = 0;
1058    let mut col = 0;
1059
1060    // The col only needs to be calculated on the final line so we iterate from the last char
1061    // back to the start of the line and then only continue to count the lines after that.
1062    //
1063    // This is less work than continuously counting chars from the front of the slice.
1064    for c in chars.by_ref() {
1065        // If the `&str` is multiline, we count the line and stop accumulating the col count too.
1066        if c == '\n' {
1067            line += 1;
1068            break;
1069        }
1070        col += 1;
1071    }
1072
1073    // The col is now known, continue to the start of the str counting newlines as we go.
1074    for c in chars {
1075        if c == '\n' {
1076            line += 1;
1077        }
1078    }
1079
1080    LineCol { line, col }
1081}
1082
1083/// An error that has yet to be resolved with a [`Span`].
1084#[derive(Debug)]
1085pub struct PartialError {
1086    /// The location the [`PartialError`] happened in the source code.
1087    kind: ErrorKind,
1088
1089    /// The location the [`PartialError`] happened in the source code.
1090    loc: &'static std::panic::Location<'static>,
1091
1092    /// The token being parsed at the time of the [`PartialError`].
1093    token: Option<Token>,
1094}
1095
1096/// Convert a [`PartialError`] into an [`Error`] by providing a [`PartialElement`].
1097trait PartialIntoError<T> {
1098    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1099    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error>;
1100
1101    /// Converts a [`PartialError`] into an [`Error`] with a root path.
1102    ///
1103    /// This can be used If the path is unknown or the [`Error`] occurred at the root.
1104    fn with_root_path(self) -> Result<T, Error>;
1105}
1106
1107impl<T> PartialIntoError<T> for Result<T, PartialError> {
1108    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error> {
1109        match self {
1110            Ok(v) => Ok(v),
1111            Err(err) => Err(err.with_head(head)),
1112        }
1113    }
1114
1115    fn with_root_path(self) -> Result<T, Error> {
1116        match self {
1117            Ok(v) => Ok(v),
1118            Err(err) => Err(err.with_root_path()),
1119        }
1120    }
1121}
1122
1123impl PartialError {
1124    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1125    fn with_head(self, parent: &PartialElement<'_>) -> Error {
1126        let Self { loc, kind, token } = self;
1127        let span_end = token.map(|t| t.span.end).unwrap_or_default();
1128
1129        let (path, span) = if let Some(elem) = parent.elements.last() {
1130            (
1131                Path::from_node(Rc::clone(&elem.path_node)),
1132                Span {
1133                    start: elem.span.start,
1134                    end: span_end,
1135                },
1136            )
1137        } else {
1138            (
1139                Path::from_node(Rc::clone(&parent.path)),
1140                Span {
1141                    start: parent.span_start,
1142                    end: span_end,
1143                },
1144            )
1145        };
1146
1147        ErrorImpl {
1148            kind,
1149            loc,
1150            path,
1151            span,
1152            token,
1153        }
1154        .into()
1155    }
1156
1157    /// Converts a `PartialError` into an `Error` with a root path.
1158    ///
1159    /// This can be used If the path is unknown or the `Error` occurred at the root.
1160    pub fn with_root_path(self) -> Error {
1161        let Self { loc, kind, token } = self;
1162        let (span_start, span_end) = match (&kind, token) {
1163            (ErrorKind::UnexpectedToken, Some(t)) => (t.span.start, t.span.end),
1164            (_, Some(t)) => (0, t.span.end),
1165            (_, None) => (0, 0),
1166        };
1167        ErrorImpl {
1168            loc,
1169            kind,
1170            path: Path::root(),
1171            span: Span {
1172                start: span_start,
1173                end: span_end,
1174            },
1175            token,
1176        }
1177        .into()
1178    }
1179}
1180
1181/// The kind of Errors that can occur while parsing JSON.
1182#[derive(Debug)]
1183pub enum ErrorKind {
1184    /// An internal programming error.
1185    Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
1186
1187    /// The `Lexer` had no more tokens when more were expected.
1188    UnexpectedEOF,
1189
1190    /// An unexpected token was emitted by the `Lexer`.
1191    UnexpectedToken,
1192}
1193
1194impl ErrorKind {
1195    #[track_caller]
1196    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1197        PartialError {
1198            kind: self,
1199            loc: std::panic::Location::caller(),
1200            token,
1201        }
1202    }
1203
1204    #[track_caller]
1205    pub fn into_partial_error_without_token(self) -> PartialError {
1206        PartialError {
1207            kind: self,
1208            loc: std::panic::Location::caller(),
1209            token: None,
1210        }
1211    }
1212}
1213
1214impl std::error::Error for Error {}
1215
1216impl fmt::Display for Error {
1217    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1218        let ErrorImpl {
1219            kind,
1220            loc,
1221            path,
1222            span: _,
1223            token,
1224        } = &*self.0;
1225
1226        write!(
1227            f,
1228            "Error: code location: {loc}; while parsing element at `{path}`"
1229        )?;
1230
1231        if let Some(token) = token {
1232            write!(f, " token: `{}`", token.kind)?;
1233        }
1234
1235        match kind {
1236            ErrorKind::Internal(err) => write!(f, "Internal: {err}"),
1237            ErrorKind::UnexpectedEOF => f.write_str("Unexpected EOF"),
1238            ErrorKind::UnexpectedToken => write!(f, "unexpected token"),
1239        }
1240    }
1241}
1242
1243/// A programming Error resulting from fautly logic.
1244///
1245/// This should not be exposed on the public API.
1246#[derive(Debug)]
1247enum InternalError {
1248    /// Slicing into the JSON buf failed.
1249    ///
1250    /// This should not happen during parsing, as the parsing and `Span` calculations are all
1251    /// contained within the same callstack of functions.
1252    ///
1253    /// This can only happen if there's a mistake in the `Span` offset/range calculations.
1254    BufferSlice(Span),
1255
1256    /// The type of `Buffer` is invalid.
1257    ///
1258    /// The `json_tools::Lexer::next` is called in a few places and the `json_tools::Token` it
1259    /// emits is converted into a local `Token` with only a `Span` based buffer to avoid checking
1260    /// the buffer type each time it's used.
1261    ///
1262    /// The lexer is configured to only use a `Span` based buffer so the only way this Error can
1263    /// occur is if the code is changed so that the lexer uses a `String` based buffer.
1264    BufferType,
1265
1266    /// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1267    /// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1268    /// converted to the locally defined `Span` that uses `usize` based fields.
1269    ///
1270    /// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1271    FromInt(TryFromIntError),
1272
1273    /// A String was parsed without surrounding double quotes.
1274    ///
1275    /// This is only possible if the `json_tools` crate changes the implementation details of
1276    /// how they parse JSON strings.
1277    StringWithoutQuotes,
1278
1279    /// A `RawStr` was made using a token that is not a `String`.
1280    ///
1281    /// `RawStr`s are only creatable from inside the crate so the only way this can occur is
1282    /// through a programming error.
1283    RawStringFromInvalidToken,
1284}
1285
1286impl InternalError {
1287    #[track_caller]
1288    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1289        ErrorKind::Internal(Box::new(self)).into_partial_error(token)
1290    }
1291}
1292
1293impl std::error::Error for InternalError {}
1294
1295/// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1296/// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1297/// converted to the locally defined `Span` that uses `usize` based fields.
1298///
1299/// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1300impl From<TryFromIntError> for InternalError {
1301    fn from(err: TryFromIntError) -> Self {
1302        InternalError::FromInt(err)
1303    }
1304}
1305
1306impl From<InternalError> for Error {
1307    #[track_caller]
1308    fn from(err: InternalError) -> Self {
1309        ErrorImpl {
1310            kind: ErrorKind::Internal(Box::new(err)),
1311            loc: std::panic::Location::caller(),
1312            path: Path::root(),
1313            span: Span { start: 0, end: 0 },
1314            token: None,
1315        }
1316        .into()
1317    }
1318}
1319
1320impl fmt::Display for InternalError {
1321    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1322        match self {
1323            InternalError::BufferSlice(span) => {
1324                write!(f, "Slice into buffer failed; span: {span:?}")
1325            }
1326            InternalError::BufferType => write!(f, "The tokens buffer is not a `Span`"),
1327            InternalError::FromInt(err) => write!(f, "{err}"),
1328            InternalError::StringWithoutQuotes => {
1329                write!(f, "A String was parsed without surrounding double quotes.")
1330            }
1331
1332            InternalError::RawStringFromInvalidToken => {
1333                write!(
1334                    f,
1335                    "A `RawString` was created using a `Token` that's not a `String`"
1336                )
1337            }
1338        }
1339    }
1340}
1341
1342trait InternalErrorIntoPartial<T> {
1343    #[track_caller]
1344    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1345    where
1346        F: FnOnce() -> Token;
1347}
1348
1349impl<T> InternalErrorIntoPartial<T> for Result<T, InternalError> {
1350    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1351    where
1352        F: FnOnce() -> Token,
1353    {
1354        match self {
1355            Ok(v) => Ok(v),
1356            Err(err) => {
1357                let token = f();
1358                Err(err.into_partial_error(Some(token)))
1359            }
1360        }
1361    }
1362}
1363
1364/// Create the `Span` of an `Element` given the start and the closing token.
1365fn element_span(token_end: &Token, start: usize) -> Span {
1366    Span {
1367        start,
1368        end: token_end.span.end,
1369    }
1370}
1371
1372/// Return the content of the `Token` as a `&str`.
1373///
1374/// This in only useful for `Token`'s that contain variable data, such as `String`, `Number` etc.
1375#[track_caller]
1376fn token_str<'buf>(json: &'buf str, token: &Token) -> Result<&'buf str, PartialError> {
1377    let start = token.span.start;
1378    let end = token.span.end;
1379    let s = &json
1380        .get(start..end)
1381        .ok_or(InternalError::BufferSlice(Span { start, end }))
1382        .into_partial_error(|| *token)?;
1383    Ok(s)
1384}
1385
1386/// A `&str` with surrounding quotes removed and it hasn't been analyzed for escapes codes.
1387#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Default)]
1388pub struct RawStr<'buf> {
1389    /// An unescaped `&str` with surrounding quotes removed.
1390    source: &'buf str,
1391
1392    /// The `String` token that produced the source `&str`.
1393    span: Span,
1394}
1395
1396/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1397impl Borrow<str> for RawStr<'_> {
1398    fn borrow(&self) -> &str {
1399        self.source
1400    }
1401}
1402
1403/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1404impl Borrow<str> for &RawStr<'_> {
1405    fn borrow(&self) -> &str {
1406        self.source
1407    }
1408}
1409
1410impl<'buf> RawStr<'buf> {
1411    pub(super) fn from_str(source: &'buf str, span: Span) -> Self {
1412        Self { source, span }
1413    }
1414
1415    /// Create new `RawStr` from a string with surrounding quotes.
1416    #[track_caller]
1417    pub(super) fn from_quoted_str(
1418        s: &'buf str,
1419        token: Token,
1420    ) -> Result<RawStr<'buf>, PartialError> {
1421        const QUOTE: char = '"';
1422
1423        if token.kind != TokenType::String {
1424            return Err(InternalError::RawStringFromInvalidToken.into_partial_error(Some(token)));
1425        }
1426
1427        // remove double quotes
1428        let (_, s) = s
1429            .split_once(QUOTE)
1430            .ok_or(InternalError::StringWithoutQuotes)
1431            .into_partial_error(|| token)?;
1432
1433        let (source, _) = s
1434            .rsplit_once(QUOTE)
1435            .ok_or(InternalError::StringWithoutQuotes)
1436            .into_partial_error(|| token)?;
1437
1438        Ok(Self {
1439            source,
1440            span: token.span,
1441        })
1442    }
1443
1444    /// Return the raw unescaped `&str`.
1445    pub(crate) fn as_raw(&self) -> &'buf str {
1446        self.source
1447    }
1448
1449    /// Return the `&str` with all escapes decoded.
1450    pub(crate) fn decode_escapes(
1451        &self,
1452        elem: &Element<'buf>,
1453    ) -> Caveat<Cow<'_, str>, decode::WarningKind> {
1454        unescape_str(self.source, elem)
1455    }
1456
1457    /// Return a `&str` marked as either having escapes or not.
1458    pub(crate) fn has_escapes(
1459        &self,
1460        elem: &Element<'buf>,
1461    ) -> Caveat<decode::PendingStr<'_>, decode::WarningKind> {
1462        decode::analyze(self.source, elem)
1463    }
1464
1465    /// Return the [`Span`] of the [`Token`] that generated this string.
1466    pub fn span(&self) -> Span {
1467        self.span
1468    }
1469}
1470
1471impl fmt::Display for RawStr<'_> {
1472    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1473        fmt::Display::fmt(self.source, f)
1474    }
1475}
1476
1477#[track_caller]
1478fn token_str_as_string(json: &str, token: Token) -> Result<RawStr<'_>, PartialError> {
1479    let s = token_str(json, &token)?;
1480    let raw = RawStr::from_quoted_str(s, token)?;
1481    Ok(raw)
1482}
1483
1484#[cfg(test)]
1485mod test_raw_str {
1486    use assert_matches::assert_matches;
1487
1488    use crate::test;
1489
1490    use super::{ErrorKind, InternalError, RawStr, Span, Token, TokenType};
1491
1492    #[test]
1493    fn should_fail_to_create_raw_str_from_non_string_token() {
1494        test::setup();
1495
1496        let err = RawStr::from_quoted_str("fail", TokenType::Number.into()).unwrap_err();
1497        let err = assert_matches!(err.kind, ErrorKind::Internal(err) => err);
1498        let err = err.downcast_ref::<InternalError>().unwrap();
1499        assert_matches!(err, InternalError::RawStringFromInvalidToken);
1500    }
1501
1502    #[test]
1503    fn should_fail_to_create_raw_str_from_string_without_quotes() {
1504        test::setup();
1505
1506        let err = RawStr::from_quoted_str("fail", TokenType::String.into()).unwrap_err();
1507        let err = assert_matches!(err.kind, ErrorKind::Internal(err) => err);
1508        let err = err.downcast_ref::<InternalError>().unwrap();
1509        assert_matches!(err, InternalError::StringWithoutQuotes);
1510    }
1511
1512    impl From<TokenType> for Token {
1513        fn from(kind: TokenType) -> Self {
1514            Self {
1515                kind,
1516                span: Span::default(),
1517            }
1518        }
1519    }
1520}
1521
1522#[cfg(test)]
1523mod test_line_col {
1524    use super::{line_col, LineCol};
1525
1526    #[test]
1527    fn should_line_col_empty_str() {
1528        let json = "";
1529        let LineCol { line, col } = line_col(json);
1530        assert_eq!(line, 0);
1531        assert_eq!(col, 0);
1532    }
1533
1534    #[test]
1535    fn should_line_col_one_line_one_char_str() {
1536        let json = "1";
1537        let LineCol { line, col } = line_col(json);
1538        assert_eq!(line, 0);
1539        assert_eq!(col, 1);
1540    }
1541
1542    #[test]
1543    fn should_line_col_one_line_many_chars_str() {
1544        let json = "1234";
1545        let LineCol { line, col } = line_col(json);
1546        assert_eq!(line, 0);
1547        assert_eq!(col, 4);
1548    }
1549
1550    #[test]
1551    fn should_line_col_two_line_one_col_str() {
1552        let json = "1234\n1";
1553        let LineCol { line, col } = line_col(json);
1554        assert_eq!(line, 1);
1555        assert_eq!(col, 1);
1556    }
1557}
1558
1559#[cfg(test)]
1560mod test_parser {
1561    use assert_matches::assert_matches;
1562
1563    use crate::{
1564        json::{PathNode, ValueKind},
1565        test,
1566    };
1567
1568    use super::{Event, ObjectKind, Parser};
1569
1570    #[test]
1571    fn should_emit_events_for_object_with_single_field() {
1572        const JSON: &str = r#"{
1573    "field_a": 404
1574}"#;
1575
1576        test::setup();
1577
1578        let mut parser = Parser::new(JSON);
1579        let event = parser.next().unwrap().unwrap();
1580
1581        let path = assert_matches!(
1582            event,
1583            Event::Open {
1584                kind: ObjectKind::Object,
1585                parent_path
1586            }
1587             => parent_path
1588        );
1589        assert_matches!(*path, PathNode::Root);
1590
1591        let event = parser.next().unwrap().unwrap();
1592
1593        let path = assert_matches!(
1594            event,
1595            Event::Element {
1596                kind: ValueKind::Number,
1597                parent_path
1598            }
1599             => parent_path
1600        );
1601
1602        assert_eq!(*path, "$.field_a");
1603    }
1604
1605    #[test]
1606    fn should_emit_events_for_object_with_two_fields() {
1607        const JSON: &str = r#"{
1608    "field_a": 404,
1609    "field_b": "name"
1610}"#;
1611
1612        test::setup();
1613
1614        let mut parser = Parser::new(JSON);
1615        let event = parser.next().unwrap().unwrap();
1616
1617        let path = assert_matches!(
1618            event,
1619            Event::Open {
1620                kind: ObjectKind::Object,
1621                parent_path
1622            }
1623             => parent_path
1624        );
1625        assert_matches!(*path, PathNode::Root);
1626
1627        let event = parser.next().unwrap().unwrap();
1628
1629        let path = assert_matches!(
1630            event,
1631            Event::Element {
1632                kind: ValueKind::Number,
1633                parent_path
1634            }
1635             => parent_path
1636        );
1637
1638        assert_eq!(*path, "$.field_a");
1639
1640        let event = parser.next().unwrap().unwrap();
1641
1642        let path = assert_matches!(
1643            event,
1644            Event::Element {
1645                kind: ValueKind::String,
1646                parent_path
1647            }
1648             => parent_path
1649        );
1650
1651        assert_eq!(*path, "$.field_b");
1652    }
1653
1654    #[test]
1655    fn should_emit_events_for_object_with_nested_fields() {
1656        const JSON: &str = r#"{
1657    "field_a": 404,
1658    "field_b": {
1659        "field_c": "name"
1660    }
1661}"#;
1662
1663        test::setup();
1664
1665        let mut parser = Parser::new(JSON);
1666        let event = parser.next().unwrap().unwrap();
1667
1668        let path = assert_matches!(
1669            event,
1670            Event::Open {
1671                kind: ObjectKind::Object,
1672                parent_path
1673            }
1674             => parent_path
1675        );
1676        assert_matches!(*path, PathNode::Root);
1677
1678        let event = parser.next().unwrap().unwrap();
1679
1680        let path = assert_matches!(
1681            event,
1682            Event::Element {
1683                kind: ValueKind::Number,
1684                parent_path
1685            }
1686             => parent_path
1687        );
1688
1689        assert_eq!(*path, "$.field_a");
1690
1691        let event = parser.next().unwrap().unwrap();
1692
1693        let path = assert_matches!(
1694            event,
1695            Event::Open {
1696                kind: ObjectKind::Object,
1697                parent_path
1698            }
1699             => parent_path
1700        );
1701        assert_eq!(*path, "$.field_b");
1702
1703        let event = parser.next().unwrap().unwrap();
1704
1705        let path = assert_matches!(
1706            event,
1707            Event::Element {
1708                kind: ValueKind::String,
1709                parent_path
1710            }
1711             => parent_path
1712        );
1713
1714        assert_eq!(*path, "$.field_b.field_c");
1715    }
1716
1717    #[test]
1718    fn should_emit_events_for_array_with_single_field() {
1719        const JSON: &str = r#"["field_a"]"#;
1720
1721        test::setup();
1722
1723        let mut parser = Parser::new(JSON);
1724        let event = parser.next().unwrap().unwrap();
1725
1726        let path = assert_matches!(
1727            event,
1728            Event::Open {
1729                kind: ObjectKind::Array,
1730                parent_path
1731            }
1732             => parent_path
1733        );
1734        assert_matches!(*path, PathNode::Root);
1735
1736        let event = parser.next().unwrap().unwrap();
1737
1738        let path = assert_matches!(
1739            event,
1740            Event::Element {
1741                kind: ValueKind::String,
1742                parent_path
1743            }
1744             => parent_path
1745        );
1746
1747        assert_eq!(*path, "$.0");
1748    }
1749
1750    #[test]
1751    fn should_emit_events_for_array_with_two_fields() {
1752        const JSON: &str = r#"{
1753    "field_a": 404,
1754    "field_b": "name"
1755}"#;
1756
1757        test::setup();
1758
1759        let mut parser = Parser::new(JSON);
1760        let event = parser.next().unwrap().unwrap();
1761
1762        let path = assert_matches!(
1763            event,
1764            Event::Open {
1765                kind: ObjectKind::Object,
1766                parent_path
1767            }
1768             => parent_path
1769        );
1770        assert_matches!(*path, PathNode::Root);
1771
1772        let event = parser.next().unwrap().unwrap();
1773
1774        let path = assert_matches!(
1775            event,
1776            Event::Element {
1777                kind: ValueKind::Number,
1778                parent_path
1779            }
1780             => parent_path
1781        );
1782
1783        assert_eq!(*path, "$.field_a");
1784
1785        let event = parser.next().unwrap().unwrap();
1786
1787        let path = assert_matches!(
1788            event,
1789            Event::Element {
1790                kind: ValueKind::String,
1791                parent_path
1792            }
1793             => parent_path
1794        );
1795
1796        assert_eq!(*path, "$.field_b");
1797    }
1798
1799    #[test]
1800    fn should_emit_events_for_array_with_nested_fields() {
1801        const JSON: &str = r#"{
1802    "field_a": 404,
1803    "field_b": {
1804        "field_c": "name"
1805    }
1806}"#;
1807
1808        test::setup();
1809
1810        let mut parser = Parser::new(JSON);
1811        let event = parser.next().unwrap().unwrap();
1812
1813        let path = assert_matches!(
1814            event,
1815            Event::Open {
1816                kind: ObjectKind::Object,
1817                parent_path
1818            }
1819             => parent_path
1820        );
1821        assert_matches!(*path, PathNode::Root);
1822
1823        let event = parser.next().unwrap().unwrap();
1824
1825        let path = assert_matches!(
1826            event,
1827            Event::Element {
1828                kind: ValueKind::Number,
1829                parent_path
1830            }
1831             => parent_path
1832        );
1833
1834        assert_eq!(*path, "$.field_a");
1835
1836        let event = parser.next().unwrap().unwrap();
1837
1838        let path = assert_matches!(
1839            event,
1840            Event::Open {
1841                kind: ObjectKind::Object,
1842                parent_path
1843            }
1844             => parent_path
1845        );
1846        assert_eq!(*path, "$.field_b");
1847
1848        let event = parser.next().unwrap().unwrap();
1849
1850        let path = assert_matches!(
1851            event,
1852            Event::Element {
1853                kind: ValueKind::String,
1854                parent_path
1855            }
1856             => parent_path
1857        );
1858
1859        assert_eq!(*path, "$.field_b.field_c");
1860    }
1861}
1862
1863#[cfg(test)]
1864pub mod test {
1865    #![allow(clippy::string_slice, reason = "tests are allowed to panic")]
1866
1867    use super::{Error, ErrorKind, Span};
1868
1869    pub fn spanned_json(span: Span, json: &str) -> &str {
1870        &json[span.start..span.end]
1871    }
1872
1873    #[test]
1874    const fn error_should_be_send_and_sync() {
1875        const fn f<T: Send + Sync>() {}
1876
1877        f::<Error>();
1878        f::<ErrorKind>();
1879    }
1880}
1881
1882#[cfg(test)]
1883mod test_parser_basic_happy_structure {
1884    use assert_matches::assert_matches;
1885
1886    use crate::{json::Value, test};
1887
1888    use super::{parse, test::spanned_json, Element, PathNode};
1889
1890    #[test]
1891    fn should_parse_nested_object() {
1892        test::setup();
1893
1894        let json = r#"{ "field_a": "one", "field_b": { "field_ba": "two", "field_bb": "three" } }"#;
1895        let elem = parse(json).unwrap();
1896        let Element {
1897            path_node: path,
1898            value,
1899            span,
1900            id: _,
1901        } = elem;
1902
1903        assert_eq!(*path, PathNode::Root);
1904        assert_eq!(spanned_json(span, json), json);
1905
1906        let fields = assert_matches!(value, Value::Object(elems) => elems);
1907        let [field_a, field_b] = fields.try_into().unwrap();
1908
1909        {
1910            let (_id, path, span, value) = field_a.into_parts();
1911
1912            assert_eq!(*path, "$.field_a");
1913            assert_eq!(spanned_json(span, json), r#""one""#);
1914            let s = assert_matches!(value, Value::String(s) => s);
1915            assert_eq!(s.as_raw(), "one");
1916        }
1917
1918        {
1919            let (_id, path, span, value) = field_b.into_parts();
1920            assert_eq!(*path, "$.field_b");
1921            assert_eq!(
1922                spanned_json(span, json),
1923                r#"{ "field_ba": "two", "field_bb": "three" }"#
1924            );
1925
1926            let fields = assert_matches!(value, Value::Object(fields) => fields);
1927            let [field_b_a, field_b_b] = fields.try_into().unwrap();
1928
1929            {
1930                let (_id, path, span, value) = field_b_a.into_parts();
1931
1932                assert_eq!(spanned_json(span, json), r#""two""#);
1933                assert_eq!(*path, "$.field_b.field_ba");
1934                let s = assert_matches!(value, Value::String(s) => s);
1935                assert_eq!(s.as_raw(), "two");
1936            }
1937
1938            {
1939                let (_id, path, span, value) = field_b_b.into_parts();
1940
1941                assert_eq!(spanned_json(span, json), r#""three""#);
1942                assert_eq!(*path, "$.field_b.field_bb");
1943                let s = assert_matches!(value, Value::String(s) => s);
1944                assert_eq!(s.as_raw(), "three");
1945            }
1946        }
1947    }
1948
1949    #[test]
1950    fn should_parse_object_with_nested_array() {
1951        test::setup();
1952
1953        let json = r#"{ "field_a": "one", "field_b": [ "two", "three" ] }"#;
1954        let elem = parse(json).unwrap();
1955        let Element {
1956            path_node: path,
1957            value,
1958            span,
1959            id: _,
1960        } = elem;
1961
1962        assert_eq!(*path, PathNode::Root);
1963        assert_eq!(spanned_json(span, json), json);
1964
1965        let fields = assert_matches!(value, Value::Object(fields) => fields);
1966        let [field_a, field_b] = fields.try_into().unwrap();
1967
1968        {
1969            let (_id, path, span, value) = field_a.into_parts();
1970
1971            assert_eq!(spanned_json(span, json), r#""one""#);
1972            assert_eq!(*path, "$.field_a");
1973            let s = assert_matches!(value, Value::String(s) => s);
1974            assert_eq!(s.as_raw(), "one");
1975        }
1976
1977        {
1978            let (_id, path, span, value) = field_b.into_parts();
1979            assert_eq!(*path, "$.field_b");
1980            assert_eq!(spanned_json(span, json), r#"[ "two", "three" ]"#);
1981
1982            let elems = assert_matches!(value, Value::Array(elems) => elems);
1983            let [elem_b_a, elem_b_b] = elems.try_into().unwrap();
1984
1985            {
1986                let (_id, path, span, value) = elem_b_a.into_parts();
1987
1988                assert_eq!(spanned_json(span, json), r#""two""#);
1989                assert_eq!(*path, "$.field_b.0");
1990                let s = assert_matches!(value, Value::String(s) => s);
1991                assert_eq!(s.as_raw(), "two");
1992            }
1993
1994            {
1995                let (_id, path, span, value) = elem_b_b.into_parts();
1996
1997                assert_eq!(spanned_json(span, json), r#""three""#);
1998                assert_eq!(*path, "$.field_b.1");
1999                let s = assert_matches!(value, Value::String(s) => s);
2000                assert_eq!(s.as_raw(), "three");
2001            }
2002        }
2003    }
2004
2005    #[test]
2006    fn should_parse_nested_array() {
2007        test::setup();
2008
2009        let json = r#"[ "one", ["two", "three"] ]"#;
2010        let elem = parse(json).unwrap();
2011        let Element {
2012            path_node: path,
2013            value,
2014            span,
2015            id: _,
2016        } = elem;
2017
2018        assert_eq!(*path, PathNode::Root);
2019        assert_eq!(spanned_json(span, json), json);
2020
2021        let elems = assert_matches!(value, Value::Array(elems) => elems);
2022        let [elem_a, elem_b] = elems.try_into().unwrap();
2023
2024        {
2025            let Element {
2026                path_node: path,
2027                value,
2028                span,
2029                id: _,
2030            } = elem_a;
2031
2032            assert_eq!(spanned_json(span, json), r#""one""#);
2033            assert_eq!(*path, "$.0");
2034            let s = assert_matches!(value, Value::String(s) => s);
2035            assert_eq!(s.as_raw(), "one");
2036        }
2037
2038        {
2039            let Element {
2040                path_node: path,
2041                value,
2042                span,
2043                id: _,
2044            } = elem_b;
2045            assert_eq!(*path, "$.1");
2046            assert_eq!(spanned_json(span, json), r#"["two", "three"]"#);
2047
2048            let elems = assert_matches!(value, Value::Array(elems) => elems);
2049            let [elem_b_a, elem_b_b] = elems.try_into().unwrap();
2050
2051            {
2052                let Element {
2053                    path_node: path,
2054                    value,
2055                    span,
2056                    id: _,
2057                } = elem_b_a;
2058
2059                assert_eq!(spanned_json(span, json), r#""two""#);
2060                assert_eq!(*path, "$.1.0");
2061                let s = assert_matches!(value, Value::String(s) => s);
2062                assert_eq!(s.as_raw(), "two");
2063            }
2064
2065            {
2066                let Element {
2067                    path_node: path,
2068                    value,
2069                    span,
2070                    id: _,
2071                } = elem_b_b;
2072
2073                assert_eq!(spanned_json(span, json), r#""three""#);
2074                assert_eq!(*path, "$.1.1");
2075                let s = assert_matches!(value, Value::String(s) => s);
2076                assert_eq!(s.as_raw(), "three");
2077            }
2078        }
2079    }
2080
2081    #[test]
2082    fn should_parse_array_with_nested_object() {
2083        test::setup();
2084
2085        let json = r#"[ "one", {"field_a": "two", "field_b": "three"} ]"#;
2086        let elem = parse(json).unwrap();
2087        let Element {
2088            path_node: path,
2089            value,
2090            span,
2091            id: _,
2092        } = elem;
2093
2094        assert_eq!(*path, PathNode::Root);
2095        assert_eq!(spanned_json(span, json), json);
2096
2097        let elems = assert_matches!(value, Value::Array(elems) => elems);
2098        let [elem_a, elem_b] = elems.try_into().unwrap();
2099
2100        {
2101            let Element {
2102                path_node: path,
2103                value,
2104                span,
2105                id: _,
2106            } = elem_a;
2107
2108            assert_eq!(spanned_json(span, json), r#""one""#);
2109            assert_eq!(*path, "$.0");
2110            let s = assert_matches!(value, Value::String(s) => s);
2111            assert_eq!(s.as_raw(), "one");
2112        }
2113
2114        {
2115            let Element {
2116                path_node: path,
2117                value,
2118                span,
2119                id: _,
2120            } = elem_b;
2121            assert_eq!(*path, "$.1");
2122            assert_eq!(
2123                spanned_json(span, json),
2124                r#"{"field_a": "two", "field_b": "three"}"#
2125            );
2126
2127            let fields = assert_matches!(value, Value::Object(fields) => fields);
2128            let [field_b_a, field_b_b] = fields.try_into().unwrap();
2129
2130            {
2131                let (_id, path, span, value) = field_b_a.into_parts();
2132
2133                assert_eq!(spanned_json(span, json), r#""two""#);
2134                assert_eq!(*path, "$.1.field_a");
2135                let s = assert_matches!(value, Value::String(s) => s);
2136                assert_eq!(s.as_raw(), "two");
2137            }
2138
2139            {
2140                let (_id, path, span, value) = field_b_b.into_parts();
2141
2142                assert_eq!(spanned_json(span, json), r#""three""#);
2143                assert_eq!(*path, "$.1.field_b");
2144                let s = assert_matches!(value, Value::String(s) => s);
2145                assert_eq!(s.as_raw(), "three");
2146            }
2147        }
2148    }
2149}
2150
2151#[cfg(test)]
2152mod test_parser_error_reporting {
2153    #![allow(
2154        clippy::string_slice,
2155        clippy::as_conversions,
2156        reason = "panicking is tests is allowed"
2157    )]
2158
2159    use assert_matches::assert_matches;
2160
2161    use crate::test;
2162
2163    use super::{parse, ErrorKind, SpanBounds, TokenType};
2164
2165    #[test]
2166    fn should_report_trailing_comma() {
2167        const JSON: &str = r#"{
2168   "field_a": "one",
2169   "field_b": "two",
2170}"#;
2171
2172        test::setup();
2173
2174        let err = parse(JSON).unwrap_err();
2175
2176        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2177        assert_matches!(
2178            err.token().unwrap().kind,
2179            TokenType::Comma,
2180            "We are parsing a comma when we realize that it should not be there"
2181        );
2182        assert_eq!(*err.path(), "$.field_b");
2183
2184        let report = err.into_report(JSON);
2185
2186        assert_eq!(report.json_context(), r#""two","#);
2187        let SpanBounds { start, end } = report.span_bounds();
2188        assert_eq!(*start, (2, 14));
2189        assert_eq!(*end, (2, 20));
2190        assert_eq!(report.expand_json_context(), r#"   "field_b": "two","#);
2191    }
2192
2193    #[test]
2194    fn should_report_invalid_json() {
2195        const JSON: &str = r#"{
2196"field_"#;
2197
2198        test::setup();
2199
2200        let err = parse(JSON).unwrap_err();
2201
2202        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2203        assert_matches!(
2204            err.token().unwrap().kind,
2205            TokenType::Invalid,
2206            "We are parsing a string not ended be a double quote"
2207        );
2208        assert_eq!(*err.path(), "$");
2209
2210        let report = err.into_report(JSON);
2211
2212        assert_eq!(report.json_context(), r#""field_"#);
2213        let SpanBounds { start, end } = report.span_bounds();
2214        assert_eq!(*start, (1, 0));
2215        assert_eq!(*end, (1, 7));
2216        assert_eq!(report.expand_json_context(), r#""field_"#);
2217    }
2218
2219    #[test]
2220    fn should_report_invalid_json_in_some_place() {
2221        const JSON: &str = r#"{
2222"field_a": "Barney",
2223"field_"#;
2224
2225        test::setup();
2226
2227        let err = parse(JSON).unwrap_err();
2228
2229        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2230        assert_matches!(
2231            err.token().unwrap().kind,
2232            TokenType::Invalid,
2233            "We are parsing a string not ended be a double quote"
2234        );
2235        assert_eq!(*err.path(), "$");
2236
2237        let report = err.into_report(JSON);
2238
2239        assert_eq!(report.json_context(), r#""field_"#);
2240        let SpanBounds { start, end } = report.span_bounds();
2241        assert_eq!(*start, (2, 0));
2242        assert_eq!(*end, (2, 7));
2243        assert_eq!(report.expand_json_context(), r#""field_"#);
2244    }
2245
2246    #[test]
2247    fn should_report_invalid_json_in_some_place_in_the_middle() {
2248        const JSON: &str = r#"{
2249"field_a": "Barney",
2250"field_b",
2251"field_c": "Fred" }
2252"#;
2253
2254        test::setup();
2255
2256        let err = parse(JSON).unwrap_err();
2257
2258        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2259        assert_matches!(
2260            err.token().unwrap().kind,
2261            TokenType::Comma,
2262            "We are parsing a key value pair but the key is followed by comma."
2263        );
2264        assert_eq!(*err.path(), "$.field_a");
2265
2266        let report = err.into_report(JSON);
2267
2268        assert_eq!(
2269            report.json_context(),
2270            r#""Barney",
2271"field_b","#
2272        );
2273        let SpanBounds { start, end } = report.span_bounds();
2274        assert_eq!(*start, (1, 11));
2275        assert_eq!(*end, (2, 10));
2276        assert_eq!(report.expand_json_context(), r#""field_a": "Barney","#);
2277    }
2278
2279    #[test]
2280    fn should_report_missing_comma() {
2281        const JSON: &str = r#"{
2282   "field_a": "one"
2283   "field_b": "two"
2284}"#;
2285
2286        test::setup();
2287
2288        let err = parse(JSON).unwrap_err();
2289
2290        assert_matches!(err.kind(), ErrorKind::UnexpectedToken);
2291        assert_matches!(
2292            err.token().unwrap().kind,
2293            TokenType::String,
2294            "We are parsing a String when we realize that there should be a comma"
2295        );
2296        assert_eq!(*err.path(), "$.field_a");
2297
2298        let report = err.into_report(JSON);
2299
2300        assert_eq!(
2301            report.json_context(),
2302            r#""one"
2303   "field_b""#
2304        );
2305        let SpanBounds { start, end } = report.span_bounds();
2306        assert_eq!(*start, (1, 14));
2307        assert_eq!(*end, (2, 12));
2308        assert_eq!(report.expand_json_context(), r#"   "field_a": "one""#);
2309    }
2310}
2311
2312#[cfg(test)]
2313mod test_type_sizes {
2314    use std::mem::size_of;
2315
2316    use super::{
2317        Element, Error, ErrorImpl, PartialElement, Path, PathNode, PathNodeRef, RawStr, Span,
2318        Token, TokenType, Value,
2319    };
2320
2321    #[test]
2322    #[cfg(target_pointer_width = "64")]
2323    fn should_match_sizes() {
2324        assert_eq!(size_of::<Element<'_>>(), 72);
2325        assert_eq!(size_of::<Error>(), 8);
2326        assert_eq!(size_of::<ErrorImpl>(), 96);
2327        assert_eq!(size_of::<PartialElement<'_>>(), 56);
2328        assert_eq!(size_of::<Path>(), 24);
2329        assert_eq!(size_of::<PathNode<'_>>(), 48);
2330        assert_eq!(size_of::<PathNodeRef<'_>>(), 8);
2331        assert_eq!(size_of::<RawStr<'_>>(), 32);
2332        assert_eq!(size_of::<Span>(), 16);
2333        assert_eq!(size_of::<Token>(), 24);
2334        assert_eq!(size_of::<TokenType>(), 1);
2335        assert_eq!(size_of::<Value<'_>>(), 40);
2336    }
2337}