Skip to main content

ocpi_tariffs/json/
parser.rs

1//! A JSON parse built to keep track of where each element came from in the input.
2#[cfg(test)]
3pub(crate) mod test;
4
5#[cfg(test)]
6mod test_basic_happy_structure;
7
8#[cfg(test)]
9mod test_error_reporting;
10
11#[cfg(test)]
12mod test_type_sizes;
13
14#[cfg(test)]
15mod test_line_col;
16
17#[cfg(test)]
18mod test_raw_str;
19
20#[cfg(test)]
21mod test_parser;
22
23use std::borrow::{Borrow, Cow};
24use std::fmt;
25use std::iter::Peekable;
26use std::num::TryFromIntError;
27use std::str::Bytes;
28use std::sync::atomic::AtomicUsize;
29use std::sync::Arc;
30
31use json_tools::{Buffer, BufferType};
32use tracing::{debug, trace};
33
34use crate::{warning, Caveat};
35
36use super::{
37    decode::{self, unescape_str},
38    Element, Field, ObjectKind, PathNode, PathNodeRef, Value, ValueKind,
39};
40use super::{ElemId, Path};
41
42/// We peek at the next `Token` when asserting on trailing commas.
43type Lexer<'buf> = Peekable<json_tools::Lexer<Bytes<'buf>>>;
44
45/// Parse the JSON into a tree of [`Element`]s.
46pub(crate) fn parse(json: &str) -> Result<Element<'_>, Error> {
47    let parser = Parser::new(json);
48
49    // When just parsing the JSON into an `Element` we only care about the final event
50    // when the JSON has been completely transformed into a root element.
51    for event in parser {
52        if let Event::Complete(element) = event? {
53            return Ok(element);
54        }
55    }
56
57    Err(ErrorKind::UnexpectedEOF
58        .into_partial_error_without_token()
59        .with_root_path())
60}
61
62/// A parsing event emitted for each call to the `<Parser as Iterator>::next` function.
63#[derive(Debug)]
64pub(crate) enum Event<'buf> {
65    /// An [`Element`] has been opened and it's construction is in progress.
66    Open {
67        kind: ObjectKind,
68        parent_path: PathNodeRef<'buf>,
69    },
70
71    /// An [`Element`] has been created and added to its parent [`Element`].
72    ///
73    /// If the kind is `Array` or `Object` that means that this element is closed: its construction is complete.
74    Element {
75        /// The kind of JSON value the [`Element`] is.
76        kind: ValueKind,
77        /// The path to the parent [`Element`].
78        parent_path: PathNodeRef<'buf>,
79    },
80
81    /// The parse has completed creating the tree of [`Element`]s.
82    Complete(Element<'buf>),
83}
84
85/// The context needed to parse a single chunk of JSON.
86pub(crate) struct Parser<'buf> {
87    /// Used to assign a unique [`ElemId`] to each [`Element`].
88    elem_count: AtomicUsize,
89
90    /// True if the `Parser` is complete.
91    ///
92    /// Any further calls to [`Parser::next`] will return `None`.
93    complete: bool,
94
95    /// The source JSON we're parsing.
96    json: &'buf str,
97
98    /// The JSON lexer.
99    lexer: Lexer<'buf>,
100
101    /// The pool with pre-allocated `Path`s.
102    path_pool: PathPool<'buf>,
103
104    /// The stack to track nested objects.
105    stack: Stack<'buf>,
106
107    /// The previous token seen.
108    token: Option<Token>,
109}
110
111/// Define our own `TokenType` so Clone can be defined on it.
112///
113/// This can be removed when `json_tools::TokenType` impls `Clone`.
114#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
115pub enum TokenType {
116    /// `false`
117    BooleanFalse,
118    /// `true`
119    BooleanTrue,
120
121    /// `]`
122    BracketClose,
123    /// `[`
124    BracketOpen,
125
126    /// `:`
127    Colon,
128
129    /// `,`
130    Comma,
131
132    /// `}`
133    CurlyClose,
134    /// `{`
135    CurlyOpen,
136
137    /// The type of the token could not be identified.
138    /// Should be removed if this lexer is ever to be feature complete
139    Invalid,
140
141    /// `null`
142    Null,
143
144    /// A Number, like `1.1234` or `123` or `-0.0` or `-1` or `.0` or `.`
145    Number,
146
147    /// A JSON string, like `"foo"`
148    String,
149}
150
151impl TokenType {
152    fn as_str(self) -> &'static str {
153        match self {
154            TokenType::BooleanFalse => "false",
155            TokenType::BooleanTrue => "true",
156            TokenType::BracketClose => "]",
157            TokenType::BracketOpen => "[",
158            TokenType::Colon => ":",
159            TokenType::Comma => ",",
160            TokenType::CurlyClose => "}",
161            TokenType::CurlyOpen => "{",
162            TokenType::Invalid => "<invalid>",
163            TokenType::Null => "null",
164            TokenType::Number => "<number>",
165            TokenType::String => "<string>",
166        }
167    }
168}
169
170impl fmt::Display for TokenType {
171    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172        f.write_str(self.as_str())
173    }
174}
175
176impl From<json_tools::TokenType> for TokenType {
177    fn from(value: json_tools::TokenType) -> Self {
178        match value {
179            json_tools::TokenType::BooleanFalse => TokenType::BooleanFalse,
180            json_tools::TokenType::BooleanTrue => TokenType::BooleanTrue,
181            json_tools::TokenType::BracketClose => TokenType::BracketClose,
182            json_tools::TokenType::BracketOpen => TokenType::BracketOpen,
183            json_tools::TokenType::CurlyClose => TokenType::CurlyClose,
184            json_tools::TokenType::CurlyOpen => TokenType::CurlyOpen,
185            json_tools::TokenType::Colon => TokenType::Colon,
186            json_tools::TokenType::Comma => TokenType::Comma,
187            json_tools::TokenType::Invalid => TokenType::Invalid,
188            json_tools::TokenType::Null => TokenType::Null,
189            json_tools::TokenType::Number => TokenType::Number,
190            json_tools::TokenType::String => TokenType::String,
191        }
192    }
193}
194
195/// A lexical token, identifying its kind and span.
196///
197/// We define our own `Token` as the `json_tools::Token` defines a `Buffer` that can be heap allocated
198/// or a `Span`. We only use the `Span` variant.
199///
200/// Our `Token` can also impl `Copy` and `Clone` as the size and semantics are acceptable.
201#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
202pub struct Token {
203    /// The exact type of the token
204    pub kind: TokenType,
205
206    /// The span allows the caller to reference back into the source byte stream
207    /// to obtain the string making up the token.
208    pub span: Span,
209}
210
211impl Token {
212    /// Return true is the token is a opening brace.
213    fn is_opening(&self) -> bool {
214        matches!(self.kind, TokenType::CurlyOpen | TokenType::BracketOpen)
215    }
216
217    /// Return true is the token is a closing brace.
218    fn is_closing(&self) -> bool {
219        matches!(self.kind, TokenType::CurlyClose | TokenType::BracketClose)
220    }
221
222    /// Return true is the token is a comma.
223    fn is_comma(&self) -> bool {
224        matches!(self.kind, TokenType::Comma)
225    }
226}
227
228impl fmt::Display for Token {
229    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230        write!(
231            f,
232            "token: {}, ({},{})",
233            self.kind, self.span.start, self.span.end
234        )
235    }
236}
237
238impl TryFrom<json_tools::Token> for Token {
239    type Error = PartialError;
240
241    fn try_from(token: json_tools::Token) -> Result<Self, Self::Error> {
242        let json_tools::Token { kind, buf } = token;
243        let kind = kind.into();
244        let Buffer::Span(span) = &buf else {
245            return Err(InternalError::BufferType.into_partial_error(None));
246        };
247
248        let span = span
249            .try_into()
250            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
251
252        Ok(Self { kind, span })
253    }
254}
255
256impl TryFrom<&json_tools::Token> for Token {
257    type Error = PartialError;
258
259    fn try_from(token: &json_tools::Token) -> Result<Self, Self::Error> {
260        let json_tools::Token { kind, buf } = token;
261        let kind = kind.clone().into();
262        let Buffer::Span(span) = &buf else {
263            return Err(InternalError::BufferType.into_partial_error(None));
264        };
265
266        let span = span
267            .try_into()
268            .map_err(|err| InternalError::from(err).into_partial_error(None))?;
269
270        Ok(Self { kind, span })
271    }
272}
273
274impl<'buf> Parser<'buf> {
275    pub fn new(json: &'buf str) -> Self {
276        let lexer = json_tools::Lexer::new(json.bytes(), BufferType::Span).peekable();
277
278        Self {
279            elem_count: AtomicUsize::new(0),
280            complete: false,
281            json,
282            lexer,
283            path_pool: PathPool::default(),
284            stack: Stack::new(),
285            token: None,
286        }
287    }
288
289    fn next_elem_id(&self) -> ElemId {
290        let id = self
291            .elem_count
292            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
293
294        ElemId(id)
295    }
296
297    fn expect_next(&mut self) -> Result<Token, PartialError> {
298        let Some(token) = self.lexer.next() else {
299            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
300        };
301
302        let token = token.try_into()?;
303
304        Ok(token)
305    }
306
307    /// Return an `Err` if the next token is not the expected.
308    fn expect_token(&mut self, token_type: TokenType) -> Result<(), PartialError> {
309        let Some(token) = self.lexer.next() else {
310            return Err(ErrorKind::UnexpectedEOF.into_partial_error(self.token.take()));
311        };
312
313        let token: Token = token.try_into()?;
314
315        if token.kind == token_type {
316            Ok(())
317        } else {
318            Err(unexpected_token(token))
319        }
320    }
321
322    fn next_event(&mut self) -> Result<Option<Event<'buf>>, Error> {
323        if self.complete {
324            return Ok(None);
325        }
326
327        let head = self.stack.pop_head();
328
329        match head {
330            None => {
331                let token = self.expect_next().with_root_path()?;
332
333                trace!(?token);
334                self.comma_checks(&token).with_root_path()?;
335
336                match token.kind {
337                    TokenType::CurlyOpen => {
338                        let parent_path = self.path_pool.root();
339                        self.stack.push_new_object(
340                            self.next_elem_id(),
341                            Arc::clone(&parent_path),
342                            &token,
343                        );
344                        Ok(Some(Event::Open {
345                            kind: ObjectKind::Object,
346                            parent_path,
347                        }))
348                    }
349                    TokenType::BracketOpen => {
350                        let parent_path = self.path_pool.root();
351                        self.stack.push_new_array(
352                            self.next_elem_id(),
353                            Arc::clone(&parent_path),
354                            &token,
355                        );
356                        Ok(Some(Event::Open {
357                            kind: ObjectKind::Array,
358                            parent_path,
359                        }))
360                    }
361                    TokenType::Number => {
362                        let value = Value::Number(token_str(self.json, &token).with_root_path()?);
363                        self.exit_with_value(token, value).with_root_path()
364                    }
365                    TokenType::Null => self.exit_with_value(token, Value::Null).with_root_path(),
366                    TokenType::String => {
367                        let value =
368                            Value::String(token_str_as_string(self.json, token).with_root_path()?);
369                        self.exit_with_value(token, value).with_root_path()
370                    }
371                    TokenType::BooleanTrue => {
372                        self.exit_with_value(token, Value::True).with_root_path()
373                    }
374                    TokenType::BooleanFalse => {
375                        self.exit_with_value(token, Value::False).with_root_path()
376                    }
377                    _ => Err(unexpected_token(token).with_root_path()),
378                }
379            }
380            Some(mut head) => {
381                let token = self.expect_next().with_head(&head)?;
382
383                trace!(?token, head = ?head.elem_type);
384                let token = if self.comma_checks(&token).with_head(&head)? {
385                    self.expect_next().with_head(&head)?
386                } else {
387                    token
388                };
389
390                let (value, token, path) = match head.elem_type {
391                    ObjectKind::Object => {
392                        let key = match token.kind {
393                            TokenType::String => {
394                                token_str_as_string(self.json, token).with_head(&head)?
395                            }
396                            TokenType::CurlyClose => {
397                                let event = self.close_element(head, &token)?;
398                                return Ok(event);
399                            }
400                            _ => return Err(unexpected_token(token).with_root_path()),
401                        };
402
403                        self.expect_token(TokenType::Colon).with_head(&head)?;
404                        let token = self.expect_next().with_head(&head)?;
405
406                        let value = match token.kind {
407                            TokenType::CurlyOpen => {
408                                let Some(parent_path) =
409                                    head.parent_is_object(&mut self.path_pool, key)
410                                else {
411                                    return Ok(None);
412                                };
413                                self.stack.push_head(head);
414                                self.stack.push_new_object(
415                                    self.next_elem_id(),
416                                    Arc::clone(&parent_path),
417                                    &token,
418                                );
419                                return Ok(Some(Event::Open {
420                                    kind: ObjectKind::Object,
421                                    parent_path,
422                                }));
423                            }
424                            TokenType::BracketOpen => {
425                                let Some(parent_path) =
426                                    head.parent_is_object(&mut self.path_pool, key)
427                                else {
428                                    return Ok(None);
429                                };
430                                self.stack.push_head(head);
431                                self.stack.push_new_array(
432                                    self.next_elem_id(),
433                                    Arc::clone(&parent_path),
434                                    &token,
435                                );
436                                return Ok(Some(Event::Open {
437                                    kind: ObjectKind::Array,
438                                    parent_path,
439                                }));
440                            }
441                            TokenType::CurlyClose => {
442                                let event = self.close_element(head, &token)?;
443                                return Ok(event);
444                            }
445                            TokenType::String => Value::String(
446                                token_str_as_string(self.json, token).with_head(&head)?,
447                            ),
448                            TokenType::Number => {
449                                Value::Number(token_str(self.json, &token).with_head(&head)?)
450                            }
451                            TokenType::Null => Value::Null,
452                            TokenType::BooleanTrue => Value::True,
453                            TokenType::BooleanFalse => Value::False,
454                            _ => return Err(unexpected_token(token).with_head(&head)),
455                        };
456
457                        let Some(path) = head.parent_is_object(&mut self.path_pool, key) else {
458                            return Ok(None);
459                        };
460                        (value, token, path)
461                    }
462                    ObjectKind::Array => {
463                        let value = match token.kind {
464                            TokenType::CurlyOpen => {
465                                let Some(parent_path) = head.parent_is_array(&mut self.path_pool)
466                                else {
467                                    return Ok(None);
468                                };
469                                self.stack.push_head(head);
470                                self.stack.push_new_object(
471                                    self.next_elem_id(),
472                                    Arc::clone(&parent_path),
473                                    &token,
474                                );
475                                return Ok(Some(Event::Open {
476                                    kind: ObjectKind::Object,
477                                    parent_path,
478                                }));
479                            }
480                            TokenType::BracketOpen => {
481                                let Some(parent_path) = head.parent_is_array(&mut self.path_pool)
482                                else {
483                                    return Ok(None);
484                                };
485                                self.stack.push_head(head);
486                                self.stack.push_new_array(
487                                    self.next_elem_id(),
488                                    Arc::clone(&parent_path),
489                                    &token,
490                                );
491                                return Ok(Some(Event::Open {
492                                    kind: ObjectKind::Array,
493                                    parent_path,
494                                }));
495                            }
496                            TokenType::BracketClose => {
497                                let event = self.close_element(head, &token)?;
498                                return Ok(event);
499                            }
500
501                            TokenType::String => Value::String(
502                                token_str_as_string(self.json, token).with_head(&head)?,
503                            ),
504                            TokenType::Number => {
505                                Value::Number(token_str(self.json, &token).with_head(&head)?)
506                            }
507                            TokenType::Null => Value::Null,
508                            TokenType::BooleanTrue => Value::True,
509                            TokenType::BooleanFalse => Value::False,
510                            _ => return Err(unexpected_token(token).with_head(&head)),
511                        };
512                        let Some(path) = head.parent_is_array(&mut self.path_pool) else {
513                            return Ok(None);
514                        };
515                        (value, token, path)
516                    }
517                };
518
519                let event = Event::Element {
520                    kind: value.kind(),
521                    parent_path: Arc::clone(&path),
522                };
523                head.push_field(self.next_elem_id(), path, value, &token);
524
525                let peek_token = self.peek(&token).with_head(&head)?;
526
527                if !(peek_token.is_comma() || peek_token.is_closing()) {
528                    return Err(unexpected_token(peek_token).with_head(&head));
529                }
530
531                self.token.replace(token);
532                self.stack.push_head(head);
533
534                Ok(Some(event))
535            }
536        }
537    }
538
539    /// Close a [`PartialElement`] which creates an [`Element`] and returns an [`Event`]
540    fn close_element(
541        &mut self,
542        head: PartialElement<'buf>,
543        token: &Token,
544    ) -> Result<Option<Event<'buf>>, Error> {
545        let event = self.stack.head_into_element(head, token);
546
547        match event {
548            Pop::Element { kind, parent_path } => Ok(Some(Event::Element { kind, parent_path })),
549            Pop::Complete(element) => {
550                if let Some(token) = self.lexer.next() {
551                    let token = token.try_into().with_root_path()?;
552                    return Err(unexpected_token(token).with_root_path());
553                }
554
555                Ok(Some(Event::Complete(element)))
556            }
557        }
558    }
559
560    fn exit_with_value(
561        &mut self,
562        token: Token,
563        value: Value<'buf>,
564    ) -> Result<Option<Event<'buf>>, PartialError> {
565        self.complete = true;
566        let span = element_span(&token, 0);
567        let elem = Element::new(self.next_elem_id(), Arc::new(PathNode::Root), span, value);
568
569        if let Some(token) = self.lexer.next() {
570            let token = token.try_into()?;
571            return Err(unexpected_token(token));
572        }
573
574        Ok(Some(Event::Complete(elem)))
575    }
576
577    fn peek(&mut self, token: &Token) -> Result<Token, PartialError> {
578        let Some(peek_token) = self.lexer.peek() else {
579            return Err(ErrorKind::UnexpectedEOF.into_partial_error(Some(*token)));
580        };
581
582        let peek_token = peek_token.try_into()?;
583        Ok(peek_token)
584    }
585
586    /// Perform comma position checks
587    ///
588    /// Return `Err(unexpected)` if a trailing or rogue comma is found.
589    fn comma_checks(&mut self, token: &Token) -> Result<bool, PartialError> {
590        trace!(?token, "comma_checks");
591        let is_comma = token.is_comma();
592
593        if is_comma {
594            let peek_token = self.peek(token)?;
595
596            // A comma can only be followed by an opening brace or a value.
597            if peek_token.is_closing() {
598                return Err(unexpected_token(*token));
599            }
600
601            if peek_token.is_comma() {
602                return Err(unexpected_token(peek_token));
603            }
604        } else if token.is_opening() {
605            let peek_token = self.peek(token)?;
606
607            // An opening brace should not be followed by a comma.
608            if peek_token.is_comma() {
609                return Err(unexpected_token(peek_token));
610            }
611        }
612
613        Ok(is_comma)
614    }
615}
616
617/// Create an [`PartialError`] with [`ErrorKind::UnexpectedToken`].
618#[track_caller]
619fn unexpected_token(token: Token) -> PartialError {
620    ErrorKind::UnexpectedToken.into_partial_error(Some(token))
621}
622
623impl<'buf> Iterator for Parser<'buf> {
624    type Item = Result<Event<'buf>, Error>;
625
626    fn next(&mut self) -> Option<Self::Item> {
627        match self.next_event() {
628            Ok(event) => event.map(Ok),
629            Err(err) => {
630                self.complete = true;
631                Some(Err(err))
632            }
633        }
634    }
635}
636
637/// An partial `Element` that we desend into and parse it's child `Element`s.
638#[derive(Debug)]
639struct PartialElement<'buf> {
640    /// The Id of the [`Element`] to be created.
641    elem_id: ElemId,
642
643    /// The type of [`Element`].
644    elem_type: ObjectKind,
645
646    /// The child [`Element`]s.
647    ///
648    /// This is filled as we parse the current JSON [`Element`].
649    elements: Vec<Element<'buf>>,
650
651    /// The path up to the [`Element`].
652    path: PathNodeRef<'buf>,
653
654    /// The index of the [`Element`]'s first byte.
655    span_start: usize,
656}
657
658impl<'buf> PartialElement<'buf> {
659    fn parent_is_object(
660        &self,
661        path_pool: &mut PathPool<'buf>,
662        key: RawStr<'buf>,
663    ) -> Option<PathNodeRef<'buf>> {
664        path_pool.object(Arc::clone(&self.path), key)
665    }
666
667    fn parent_is_array(&self, path_pool: &mut PathPool<'buf>) -> Option<PathNodeRef<'buf>> {
668        path_pool.array(Arc::clone(&self.path), self.elements.len())
669    }
670
671    fn push_field(
672        &mut self,
673        elem_id: ElemId,
674        path: PathNodeRef<'buf>,
675        value: Value<'buf>,
676        token: &Token,
677    ) {
678        let span = element_span(token, token.span.start);
679        let elem = Element::new(elem_id, path, span, value);
680        self.elements.push(elem);
681    }
682
683    /// Resolve the `PartialElement` to an `Element`.
684    fn into_element(self, token: &Token) -> Element<'buf> {
685        let span = element_span(token, self.span_start);
686
687        let PartialElement {
688            elem_type,
689            span_start: _,
690            elements,
691            path,
692            elem_id,
693        } = self;
694
695        let value = match elem_type {
696            ObjectKind::Object => {
697                let fields = elements.into_iter().map(Field).collect();
698                Value::Object(fields)
699            }
700            ObjectKind::Array => Value::Array(elements),
701        };
702
703        Element::new(elem_id, path, span, value)
704    }
705}
706
707/// `Path`s are added and never removed.
708struct PathPool<'buf> {
709    index: usize,
710    items: Vec<PathNodeRef<'buf>>,
711}
712
713impl Default for PathPool<'_> {
714    fn default() -> Self {
715        Self::with_capacity(1000)
716    }
717}
718
719impl<'buf> PathPool<'buf> {
720    fn with_capacity(capacity: usize) -> Self {
721        let capacity = capacity.max(1);
722        let mut items = Vec::with_capacity(capacity);
723        items.resize_with(capacity, Default::default);
724
725        Self { index: 0, items }
726    }
727
728    #[expect(
729        clippy::indexing_slicing,
730        reason = "The root Path is added in the constructor and the capacity is always at least 1"
731    )]
732    fn root(&self) -> PathNodeRef<'buf> {
733        Arc::clone(&self.items[0])
734    }
735
736    /// Add a new `Path::Array` with the given index.
737    fn array(&mut self, parent: PathNodeRef<'buf>, index: usize) -> Option<PathNodeRef<'buf>> {
738        self.push(PathNode::Array { parent, index })
739    }
740
741    /// Add a new `Path::Object` with the given index.
742    fn object(
743        &mut self,
744        parent: PathNodeRef<'buf>,
745        key: RawStr<'buf>,
746    ) -> Option<PathNodeRef<'buf>> {
747        self.push(PathNode::Object { parent, key })
748    }
749
750    fn push(&mut self, new_path: PathNode<'buf>) -> Option<PathNodeRef<'buf>> {
751        const GROWTH_FACTOR: usize = 2;
752
753        let Self { index, items } = self;
754        let next_index = index.checked_add(1)?;
755
756        if next_index >= items.len() {
757            items.reserve(items.len().saturating_mul(GROWTH_FACTOR));
758            items.resize_with(items.capacity(), Default::default);
759        }
760
761        let path = items.get_mut(next_index)?;
762        debug_assert_eq!(Arc::strong_count(path), 1, "Paths are only added");
763        let path = Arc::get_mut(path)?;
764        *path = new_path;
765
766        let path = items.get_mut(next_index)?;
767        let path_result = Arc::clone(path);
768
769        *index = next_index;
770        Some(path_result)
771    }
772}
773
774/// The `Span` defines the range of bytes that delimits a JSON `Element`.
775#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
776pub struct Span {
777    /// Index of the first the byte
778    pub start: usize,
779
780    /// Index one past the last byte
781    pub end: usize,
782}
783
784impl TryFrom<&json_tools::Span> for Span {
785    type Error = TryFromIntError;
786
787    fn try_from(span: &json_tools::Span) -> Result<Self, Self::Error> {
788        let json_tools::Span { first, end } = span;
789        let start = usize::try_from(*first)?;
790        let end = usize::try_from(*end)?;
791        Ok(Span { start, end })
792    }
793}
794
795struct Stack<'buf>(Vec<PartialElement<'buf>>);
796
797enum Pop<'buf> {
798    /// An [`Element`] has been created and added to it's parent [`Element`].
799    Element {
800        kind: ValueKind,
801        parent_path: PathNodeRef<'buf>,
802    },
803
804    /// The parse has completed creating the tree of [`Element`]s.
805    Complete(Element<'buf>),
806}
807
808impl<'buf> Stack<'buf> {
809    fn new() -> Self {
810        Self(vec![])
811    }
812
813    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
814    /// stack `Vec` contains it.
815    fn pop_head(&mut self) -> Option<PartialElement<'buf>> {
816        self.0.pop()
817    }
818
819    /// The head `PartialElement` is popped off the stack temporarily to avoid lifetime issues if the
820    /// stack `Vec` contains it.
821    fn push_head(&mut self, head: PartialElement<'buf>) {
822        self.0.push(head);
823    }
824
825    /// Convert the head `PartialElement` into an `Element` using the parent to form the path.
826    fn head_into_element(&mut self, head: PartialElement<'buf>, token: &Token) -> Pop<'buf> {
827        let elem = head.into_element(token);
828
829        if let Some(parent) = self.0.last_mut() {
830            let event = Pop::Element {
831                kind: elem.value.kind(),
832                parent_path: elem.path_node(),
833            };
834            parent.elements.push(elem);
835            event
836        } else {
837            Pop::Complete(elem)
838        }
839    }
840
841    fn push_new_object(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
842        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Object);
843    }
844
845    fn push_new_array(&mut self, elem_id: ElemId, parent_path: PathNodeRef<'buf>, token: &Token) {
846        self.push_new_elem(elem_id, parent_path, token, ObjectKind::Array);
847    }
848
849    fn push_new_elem(
850        &mut self,
851        elem_id: ElemId,
852        parent_path: PathNodeRef<'buf>,
853        token: &Token,
854        elem_type: ObjectKind,
855    ) {
856        let partial = PartialElement {
857            elements: vec![],
858            elem_type,
859            path: parent_path,
860            span_start: token.span.start,
861            elem_id,
862        };
863        self.0.push(partial);
864    }
865}
866
867/// A parsing Error that keeps track of the token being parsed when the Error occurred and
868/// the slice of JSON surrounding the Error location.
869pub struct Error(Box<ErrorImpl>);
870
871impl crate::Warning for Error {
872    fn id(&self) -> warning::Id {
873        match self.0.kind {
874            ErrorKind::Internal(_) => warning::Id::from_static("internal"),
875            ErrorKind::UnexpectedEOF => warning::Id::from_static("unexpected_eof"),
876            ErrorKind::UnexpectedToken => warning::Id::from_static("unexpected_token"),
877        }
878    }
879}
880
881impl fmt::Debug for Error {
882    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
883        fmt::Debug::fmt(&self.0, f)
884    }
885}
886
887impl From<ErrorImpl> for Error {
888    fn from(err: ErrorImpl) -> Self {
889        Self(err.into())
890    }
891}
892
893struct ErrorImpl {
894    /// The kind of error that occurred.
895    kind: ErrorKind,
896
897    /// The location the [`Error`] happened in the source code.
898    loc: &'static std::panic::Location<'static>,
899
900    /// The path to the [`Element`] the error occurred in.
901    path: Path,
902
903    /// The span of the JSON string the error occurred in.
904    span: Span,
905
906    /// The token being parsed at the time of the [`Error`].
907    token: Option<Token>,
908}
909
910impl fmt::Debug for ErrorImpl {
911    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
912        f.debug_struct("Error")
913            .field("kind", &self.kind)
914            .field("loc", &self.loc)
915            .field("path", &self.path)
916            .field("span", &self.span)
917            .field("token", &self.token)
918            .finish()
919    }
920}
921
922impl Error {
923    /// The kind of error that occurred.
924    pub fn kind(&self) -> &ErrorKind {
925        &self.0.kind
926    }
927
928    /// The path to the element the error occurred in.
929    pub fn path(&self) -> &Path {
930        &self.0.path
931    }
932
933    /// The span of the [`Element`] the error occurred in.
934    pub fn span(&self) -> Span {
935        self.0.span
936    }
937
938    pub fn token(&self) -> Option<&Token> {
939        self.0.token.as_ref()
940    }
941
942    /// Break the Error into its constituent parts.
943    pub fn into_parts(self) -> (ErrorKind, Path, Span) {
944        let ErrorImpl {
945            kind,
946            loc: _,
947            path,
948            span,
949            token: _,
950        } = *self.0;
951        (kind, path, span)
952    }
953
954    /// Convert the Error into a more comprehensive report using the source JSON to provide
955    /// human readable context.
956    pub fn into_report(self, json: &str) -> ErrorReport<'_> {
957        ErrorReport::from_error(self, json)
958    }
959}
960
961/// A more comprehensive report on the [`Error`] using the source JSON `&str` to provide
962/// human readable context.
963#[derive(Debug)]
964pub struct ErrorReport<'buf> {
965    /// The [`Error`] that occurred.
966    error: Error,
967
968    /// The slice of JSON as defined by the `Error::span`.
969    json_context: &'buf str,
970
971    /// The slice of JSON as defined by the `Error::span` and expanded out to the
972    /// start and end of the line.
973    expanded_json_context: &'buf str,
974
975    /// The line and col indices of the start and end of the span.
976    span_bounds: SpanBounds,
977}
978
979impl<'buf> ErrorReport<'buf> {
980    /// Create the `ErrorReport` from the `Error` and source `&str`.
981    fn from_error(error: Error, json: &'buf str) -> Self {
982        let span = error.span();
983        debug!(?error, ?span, json, "from_error");
984        let json_context = &json.get(span.start..span.end).unwrap_or(json);
985
986        let start = {
987            let s = &json.get(0..span.start).unwrap_or_default();
988            line_col(s)
989        };
990        let end = {
991            let relative_end = line_col(json_context);
992            let line = start.line.saturating_add(relative_end.line);
993
994            if start.line == line {
995                LineCol {
996                    line,
997                    col: start.col.saturating_add(relative_end.col),
998                }
999            } else {
1000                LineCol {
1001                    line,
1002                    col: relative_end.col,
1003                }
1004            }
1005        };
1006        let (prev, next) = find_expanded_newlines(json, span.start);
1007        let expanded_json_context = &json.get(prev..next).unwrap_or(json_context);
1008
1009        let span_bounds = SpanBounds { start, end };
1010
1011        Self {
1012            error,
1013            json_context,
1014            expanded_json_context,
1015            span_bounds,
1016        }
1017    }
1018
1019    /// Return the slice of JSON as defined by the `Error::span`.
1020    pub fn json_context(&self) -> &'buf str {
1021        self.json_context
1022    }
1023
1024    /// Return the slice of JSON as defined by the `Error::span` and expanded out to the
1025    /// start and end of the line.
1026    pub fn expand_json_context(&self) -> &'buf str {
1027        self.expanded_json_context
1028    }
1029
1030    /// Return the line and col number of each end of the span
1031    pub fn span_bounds(&self) -> &SpanBounds {
1032        &self.span_bounds
1033    }
1034
1035    /// Discard the `Report` and take ownership of the `Error`.
1036    pub fn into_error(self) -> Error {
1037        self.error
1038    }
1039}
1040
1041fn find_expanded_newlines(json: &str, byte_index: usize) -> (usize, usize) {
1042    let pre = json.get(..byte_index).unwrap_or(json);
1043    let post = json.get(byte_index..).unwrap_or(json);
1044
1045    let mut bytes = pre.as_bytes().iter().rev();
1046    let prev = pre
1047        .len()
1048        .saturating_sub(bytes.position(|b| *b == b'\n').unwrap_or_default());
1049
1050    let mut bytes = post.as_bytes().iter();
1051    let next = bytes
1052        .position(|b| *b == b'\n')
1053        .map(|idx| idx.saturating_add(byte_index))
1054        .unwrap_or(prev.saturating_add(post.len()));
1055
1056    (prev, next)
1057}
1058
1059/// The line and col indices of the start and end of the span.
1060#[derive(Clone, Debug)]
1061pub struct SpanBounds {
1062    /// The start of the `Span` expressed as line and column index.
1063    pub start: LineCol,
1064
1065    /// The end of the `Span` expressed as line and column index.
1066    pub end: LineCol,
1067}
1068
1069/// A file location expressed as line and column.
1070#[derive(Clone, Debug)]
1071pub struct LineCol {
1072    /// The line index is 0 based.
1073    pub line: u32,
1074
1075    /// The col index is 0 based.
1076    pub col: u32,
1077}
1078
1079impl From<(u32, u32)> for LineCol {
1080    fn from(value: (u32, u32)) -> Self {
1081        Self {
1082            line: value.0,
1083            col: value.1,
1084        }
1085    }
1086}
1087
1088impl From<LineCol> for (u32, u32) {
1089    fn from(value: LineCol) -> Self {
1090        (value.line, value.col)
1091    }
1092}
1093
1094impl PartialEq<(u32, u32)> for LineCol {
1095    fn eq(&self, other: &(u32, u32)) -> bool {
1096        self.line == other.0 && self.col == other.1
1097    }
1098}
1099
1100impl fmt::Display for LineCol {
1101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1102        write!(f, "{}:{}", self.line, self.col)
1103    }
1104}
1105
1106/// Return the line and column indices of the end of the slice.
1107///
1108/// The line and column indices are zero based.
1109pub fn line_col(s: &str) -> LineCol {
1110    let mut chars = s.chars().rev();
1111    let mut line = 0_u32;
1112    let mut col = 0_u32;
1113
1114    // The col only needs to be calculated on the final line so we iterate from the last char
1115    // back to the start of the line and then only continue to count the lines after that.
1116    //
1117    // This is less work than continuously counting chars from the front of the slice.
1118    for c in chars.by_ref() {
1119        // If the `&str` is multiline, we count the line and stop accumulating the col count too.
1120        if c == '\n' {
1121            let Some(n) = line.checked_add(1) else {
1122                break;
1123            };
1124            line = n;
1125            break;
1126        }
1127        let Some(n) = col.checked_add(1) else {
1128            break;
1129        };
1130        col = n;
1131    }
1132
1133    // The col is now known, continue to the start of the str counting newlines as we go.
1134    for c in chars {
1135        if c == '\n' {
1136            let Some(n) = line.checked_add(1) else {
1137                break;
1138            };
1139            line = n;
1140        }
1141    }
1142
1143    LineCol { line, col }
1144}
1145
1146/// An error that has yet to be resolved with a [`Span`].
1147#[derive(Debug)]
1148pub struct PartialError {
1149    /// The location the [`PartialError`] happened in the source code.
1150    kind: ErrorKind,
1151
1152    /// The location the [`PartialError`] happened in the source code.
1153    loc: &'static std::panic::Location<'static>,
1154
1155    /// The token being parsed at the time of the [`PartialError`].
1156    token: Option<Token>,
1157}
1158
1159/// Convert a [`PartialError`] into an [`Error`] by providing a [`PartialElement`].
1160trait PartialIntoError<T> {
1161    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1162    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error>;
1163
1164    /// Converts a [`PartialError`] into an [`Error`] with a root path.
1165    ///
1166    /// This can be used If the path is unknown or the [`Error`] occurred at the root.
1167    fn with_root_path(self) -> Result<T, Error>;
1168}
1169
1170impl<T> PartialIntoError<T> for Result<T, PartialError> {
1171    fn with_head(self, head: &PartialElement<'_>) -> Result<T, Error> {
1172        match self {
1173            Ok(v) => Ok(v),
1174            Err(err) => Err(err.with_head(head)),
1175        }
1176    }
1177
1178    fn with_root_path(self) -> Result<T, Error> {
1179        match self {
1180            Ok(v) => Ok(v),
1181            Err(err) => Err(err.with_root_path()),
1182        }
1183    }
1184}
1185
1186impl PartialError {
1187    /// Convert a [`PartialError`] into an [`Error`] with a path based on the given [`PartialElement`].
1188    fn with_head(self, parent: &PartialElement<'_>) -> Error {
1189        let Self { loc, kind, token } = self;
1190        let span_end = token.map(|t| t.span.end).unwrap_or_default();
1191
1192        let (path, span) = if let Some(elem) = parent.elements.last() {
1193            (
1194                Path::from_node(Arc::clone(&elem.path_node)),
1195                Span {
1196                    start: elem.span.start,
1197                    end: span_end,
1198                },
1199            )
1200        } else {
1201            (
1202                Path::from_node(Arc::clone(&parent.path)),
1203                Span {
1204                    start: parent.span_start,
1205                    end: span_end,
1206                },
1207            )
1208        };
1209
1210        ErrorImpl {
1211            kind,
1212            loc,
1213            path,
1214            span,
1215            token,
1216        }
1217        .into()
1218    }
1219
1220    /// Converts a `PartialError` into an `Error` with a root path.
1221    ///
1222    /// This can be used If the path is unknown or the `Error` occurred at the root.
1223    pub fn with_root_path(self) -> Error {
1224        let Self { loc, kind, token } = self;
1225        let (span_start, span_end) = match (&kind, token) {
1226            (ErrorKind::UnexpectedToken, Some(t)) => (t.span.start, t.span.end),
1227            (_, Some(t)) => (0, t.span.end),
1228            (_, None) => (0, 0),
1229        };
1230        ErrorImpl {
1231            loc,
1232            kind,
1233            path: Path::root(),
1234            span: Span {
1235                start: span_start,
1236                end: span_end,
1237            },
1238            token,
1239        }
1240        .into()
1241    }
1242}
1243
1244/// The kind of Errors that can occur while parsing JSON.
1245#[derive(Debug)]
1246pub enum ErrorKind {
1247    /// An internal programming error.
1248    Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
1249
1250    /// The `Lexer` had no more tokens when more were expected.
1251    UnexpectedEOF,
1252
1253    /// An unexpected token was emitted by the `Lexer`.
1254    UnexpectedToken,
1255}
1256
1257impl ErrorKind {
1258    #[track_caller]
1259    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1260        PartialError {
1261            kind: self,
1262            loc: std::panic::Location::caller(),
1263            token,
1264        }
1265    }
1266
1267    #[track_caller]
1268    pub fn into_partial_error_without_token(self) -> PartialError {
1269        PartialError {
1270            kind: self,
1271            loc: std::panic::Location::caller(),
1272            token: None,
1273        }
1274    }
1275}
1276
1277impl std::error::Error for Error {}
1278
1279impl fmt::Display for Error {
1280    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1281        let ErrorImpl {
1282            kind,
1283            loc,
1284            path,
1285            span: _,
1286            token,
1287        } = &*self.0;
1288
1289        write!(
1290            f,
1291            "Error: code location: {loc}; while parsing element at `{path}`"
1292        )?;
1293
1294        if let Some(token) = token {
1295            write!(f, " token: `{}`", token.kind)?;
1296        }
1297
1298        match kind {
1299            ErrorKind::Internal(err) => write!(f, "Internal: {err}"),
1300            ErrorKind::UnexpectedEOF => f.write_str("Unexpected EOF"),
1301            ErrorKind::UnexpectedToken => write!(f, "unexpected token"),
1302        }
1303    }
1304}
1305
1306/// A programming Error resulting from faulty logic.
1307///
1308/// This should not be exposed on the public API.
1309#[derive(Debug)]
1310enum InternalError {
1311    /// Slicing into the JSON buf failed.
1312    ///
1313    /// This should not happen during parsing, as the parsing and `Span` calculations are all
1314    /// contained within the same callstack of functions.
1315    ///
1316    /// This can only happen if there's a mistake in the `Span` offset/range calculations.
1317    BufferSlice(Span),
1318
1319    /// The type of `Buffer` is invalid.
1320    ///
1321    /// The `json_tools::Lexer::next` is called in a few places and the `json_tools::Token` it
1322    /// emits is converted into a local `Token` with only a `Span` based buffer to avoid checking
1323    /// the buffer type each time it's used.
1324    ///
1325    /// The lexer is configured to only use a `Span` based buffer so the only way this Error can
1326    /// occur is if the code is changed so that the lexer uses a `String` based buffer.
1327    BufferType,
1328
1329    /// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1330    /// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1331    /// converted to the locally defined `Span` that uses `usize` based fields.
1332    ///
1333    /// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1334    FromInt(TryFromIntError),
1335
1336    /// A String was parsed without surrounding double quotes.
1337    ///
1338    /// This is only possible if the `json_tools` crate changes the implementation details of
1339    /// how they parse JSON strings.
1340    StringWithoutQuotes,
1341
1342    /// A `RawStr` was made using a token that is not a `String`.
1343    ///
1344    /// `RawStr`s are only creatable from inside the crate so the only way this can occur is
1345    /// through a programming error.
1346    RawStringFromInvalidToken,
1347}
1348
1349impl InternalError {
1350    #[track_caller]
1351    fn into_partial_error(self, token: Option<Token>) -> PartialError {
1352        ErrorKind::Internal(Box::new(self)).into_partial_error(token)
1353    }
1354}
1355
1356impl std::error::Error for InternalError {}
1357
1358/// The `json_tools::Span` uses `u64` for the `start` and `end` indices which would involve
1359/// conversion to `usize` each time they are used. To avoid this the `json_tools::Span` is
1360/// converted to the locally defined `Span` that uses `usize` based fields.
1361///
1362/// This conversion can fail if the binary is built for architectures other than `64` bit pointer width.
1363impl From<TryFromIntError> for InternalError {
1364    fn from(err: TryFromIntError) -> Self {
1365        InternalError::FromInt(err)
1366    }
1367}
1368
1369impl From<InternalError> for Error {
1370    #[track_caller]
1371    fn from(err: InternalError) -> Self {
1372        ErrorImpl {
1373            kind: ErrorKind::Internal(Box::new(err)),
1374            loc: std::panic::Location::caller(),
1375            path: Path::root(),
1376            span: Span { start: 0, end: 0 },
1377            token: None,
1378        }
1379        .into()
1380    }
1381}
1382
1383impl fmt::Display for InternalError {
1384    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1385        match self {
1386            InternalError::BufferSlice(span) => {
1387                write!(f, "Slice into buffer failed; span: {span:?}")
1388            }
1389            InternalError::BufferType => write!(f, "The tokens buffer is not a `Span`"),
1390            InternalError::FromInt(err) => write!(f, "{err}"),
1391            InternalError::StringWithoutQuotes => {
1392                write!(f, "A String was parsed without surrounding double quotes.")
1393            }
1394
1395            InternalError::RawStringFromInvalidToken => {
1396                write!(
1397                    f,
1398                    "A `RawString` was created using a `Token` that's not a `String`"
1399                )
1400            }
1401        }
1402    }
1403}
1404
1405trait InternalErrorIntoPartial<T> {
1406    #[track_caller]
1407    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1408    where
1409        F: FnOnce() -> Token;
1410}
1411
1412impl<T> InternalErrorIntoPartial<T> for Result<T, InternalError> {
1413    fn into_partial_error<F>(self, f: F) -> Result<T, PartialError>
1414    where
1415        F: FnOnce() -> Token,
1416    {
1417        match self {
1418            Ok(v) => Ok(v),
1419            Err(err) => {
1420                let token = f();
1421                Err(err.into_partial_error(Some(token)))
1422            }
1423        }
1424    }
1425}
1426
1427/// Create the `Span` of an `Element` given the start and the closing token.
1428fn element_span(token_end: &Token, start: usize) -> Span {
1429    Span {
1430        start,
1431        end: token_end.span.end,
1432    }
1433}
1434
1435/// Return the content of the `Token` as a `&str`.
1436///
1437/// This in only useful for `Token`'s that contain variable data, such as `String`, `Number` etc.
1438#[track_caller]
1439fn token_str<'buf>(json: &'buf str, token: &Token) -> Result<&'buf str, PartialError> {
1440    let start = token.span.start;
1441    let end = token.span.end;
1442    let s = &json
1443        .get(start..end)
1444        .ok_or(InternalError::BufferSlice(Span { start, end }))
1445        .into_partial_error(|| *token)?;
1446    Ok(s)
1447}
1448
1449/// A `&str` with surrounding quotes removed and it hasn't been analyzed for escapes codes.
1450#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Default)]
1451pub struct RawStr<'buf> {
1452    /// An unescaped `&str` with surrounding quotes removed.
1453    source: &'buf str,
1454
1455    /// The `String` token that produced the source `&str`.
1456    span: Span,
1457}
1458
1459/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1460impl Borrow<str> for RawStr<'_> {
1461    fn borrow(&self) -> &str {
1462        self.source
1463    }
1464}
1465
1466/// Impl `Borrow` so `RawStr` plays well with hashed collections.
1467impl Borrow<str> for &RawStr<'_> {
1468    fn borrow(&self) -> &str {
1469        self.source
1470    }
1471}
1472
1473impl<'buf> RawStr<'buf> {
1474    pub(super) fn from_str(source: &'buf str, span: Span) -> Self {
1475        Self { source, span }
1476    }
1477
1478    /// Create new `RawStr` from a string with surrounding quotes.
1479    #[track_caller]
1480    pub(super) fn from_quoted_str(
1481        s: &'buf str,
1482        token: Token,
1483    ) -> Result<RawStr<'buf>, PartialError> {
1484        const QUOTE: char = '"';
1485
1486        if token.kind != TokenType::String {
1487            return Err(InternalError::RawStringFromInvalidToken.into_partial_error(Some(token)));
1488        }
1489
1490        // remove double quotes
1491        let (_, s) = s
1492            .split_once(QUOTE)
1493            .ok_or(InternalError::StringWithoutQuotes)
1494            .into_partial_error(|| token)?;
1495
1496        let (source, _) = s
1497            .rsplit_once(QUOTE)
1498            .ok_or(InternalError::StringWithoutQuotes)
1499            .into_partial_error(|| token)?;
1500
1501        Ok(Self {
1502            source,
1503            span: token.span,
1504        })
1505    }
1506
1507    /// Return the raw unescaped `&str`.
1508    pub(crate) fn as_raw(&self) -> &'buf str {
1509        self.source
1510    }
1511
1512    /// Return the `&str` with all escapes decoded.
1513    pub(crate) fn decode_escapes(
1514        &self,
1515        elem: &Element<'buf>,
1516    ) -> Caveat<Cow<'_, str>, decode::Warning> {
1517        unescape_str(self.source, elem)
1518    }
1519
1520    /// Return a `&str` marked as either having escapes or not.
1521    pub(crate) fn has_escapes(
1522        &self,
1523        elem: &Element<'buf>,
1524    ) -> Caveat<decode::PendingStr<'_>, decode::Warning> {
1525        decode::analyze(self.source, elem)
1526    }
1527
1528    /// Return the [`Span`] of the [`Token`] that generated this string.
1529    pub fn span(&self) -> Span {
1530        self.span
1531    }
1532}
1533
1534impl fmt::Display for RawStr<'_> {
1535    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1536        fmt::Display::fmt(self.source, f)
1537    }
1538}
1539
1540#[track_caller]
1541fn token_str_as_string(json: &str, token: Token) -> Result<RawStr<'_>, PartialError> {
1542    let s = token_str(json, &token)?;
1543    let raw = RawStr::from_quoted_str(s, token)?;
1544    Ok(raw)
1545}