ion_rs/text/non_blocking/
raw_text_reader.rs

1use std::fmt::Display;
2
3use crate::element::{Blob, Clob};
4use crate::types::Str;
5use nom::Err::{Error, Failure, Incomplete};
6
7use crate::raw_reader::{BufferedRawReader, Expandable, RawStreamItem};
8use crate::raw_symbol_token::RawSymbolToken;
9use crate::result::{
10    decoding_error, illegal_operation, illegal_operation_raw, incomplete_text_error, IonError,
11    IonResult, Position,
12};
13use crate::stream_reader::IonReader;
14use crate::text::non_blocking::text_buffer::TextBuffer;
15use crate::text::parent_container::ParentContainer;
16use crate::text::parse_result::IonParseResult;
17use crate::text::parsers::containers::{
18    list_delimiter, list_value_or_end, s_expression_delimiter, s_expression_value_or_end,
19    struct_delimiter, struct_field_name_or_end, struct_field_value,
20};
21use crate::text::parsers::top_level::{stream_item, RawTextStreamItem};
22use crate::text::text_value::{AnnotatedTextValue, TextValue};
23use crate::types::{Decimal, Int, Timestamp};
24use crate::IonType;
25
26const INITIAL_PARENTS_CAPACITY: usize = 16;
27
28// Represents the current actions being carried out by the reader.
29#[derive(PartialEq, Debug)]
30enum ReaderState {
31    // Ready to read any value, no existing errors, no partial parses.
32    Ready,
33    // A previous `step_out` call failed to complete, but may have progressed through some values.
34    // We need to continue the step_out before doing anything else.
35    SteppingOut {
36        // The depth the reader is attempting to step-out to.
37        target_depth: usize,
38        // Whether the step_out process is searching for the parent container's delimiter. If true,
39        // then when continuing, the function will jump to searching for the delimiter. Otherwise it
40        // will continue exhausting the elements within the container.
41        finding_parent: bool,
42    },
43}
44
45impl ReaderState {
46    // Returns true if the current ReaderState is Ready.
47    fn is_ready(&self) -> bool {
48        *self == Self::Ready
49    }
50
51    fn reset(&mut self) {
52        *self = Self::Ready;
53    }
54}
55
56pub struct RawTextReader<A: AsRef<[u8]> + Expandable> {
57    buffer: TextBuffer<A>,
58    // If the reader is not positioned over a value inside a struct, this is None.
59    current_field_name: Option<RawSymbolToken>,
60    // If the reader has not yet begun reading at the current level or is positioned over an IVM,
61    // this is None.
62    current_value: Option<AnnotatedTextValue>,
63    // If the reader is positioned over an IVM instead of a value, this is:
64    //     Some(major_version, minor_version)
65    // Otherwise, it is None.
66    current_ivm: Option<(u8, u8)>,
67    // True if the current text buffer is exhausted.
68    is_eof: bool,
69    // True if the caller has indicated that all data has been read for the 'stream'.
70    is_eos: bool,
71    parents: Vec<ParentContainer>,
72    // Current state of the reader. This state, combined with `need_continue` will signal if an
73    // action needs to be completed (need_continue = true) or if we are attempting for the first
74    // time (need_continue = false).
75    state: ReaderState,
76    // The "call depth" of the current nested step_out.
77    step_out_nest: usize,
78    // Tracking whether or not we need to continue a previously failed state.
79    need_continue: bool,
80}
81
82/// Represents the final outcome of a [RawTextReader]'s attempt to parse the next value in the stream.
83///
84/// `IonParseResult<'a>` is not suitable for this purpose; its lifetime, `'a`, causes
85/// the result to hold a reference to the [RawTextReader]'s buffer even after parsing has finished,
86/// making it difficult to perform the necessary bookkeeping that follows finding the next value.
87///
88/// This type is intentionally limited in what it stores to avoid having a lifetime.
89#[derive(Eq, PartialEq, Debug)]
90pub(crate) enum RootParseResult<O> {
91    Ok(O),
92    Eof,
93    NoMatch,
94    Incomplete(usize, usize),
95    Failure(String),
96}
97
98impl From<Vec<u8>> for RawTextReader<Vec<u8>> {
99    fn from(source: Vec<u8>) -> Self {
100        RawTextReader::new(source)
101    }
102}
103
104impl<A: AsRef<[u8]> + Expandable> RawTextReader<A> {
105    pub fn new(input: A) -> RawTextReader<A> {
106        let expandable = input.expandable();
107        RawTextReader {
108            buffer: TextBuffer::new(input),
109            current_field_name: None,
110            current_value: None,
111            current_ivm: None,
112            is_eof: false,
113            is_eos: !expandable,
114            parents: Vec::with_capacity(INITIAL_PARENTS_CAPACITY),
115            state: ReaderState::Ready,
116            step_out_nest: 0,
117            need_continue: false,
118        }
119    }
120
121    fn load_next_value(&mut self) -> IonResult<()> {
122        // If the reader's current value is the beginning of a container and the user calls `next()`,
123        // we need to skip the entire container. We can do this by stepping into and then out of
124        // that container; `step_out()` has logic that will exhaust the remaining values.
125        let need_to_skip_container = !self.is_null()
126            && self
127                .current_value
128                .as_ref()
129                .map(|v| v.value().ion_type().is_container())
130                .unwrap_or(false);
131
132        if need_to_skip_container {
133            self.step_in()?;
134            self.step_out()?;
135        }
136
137        // Unset variables holding onto information about the previous position.
138        self.current_ivm = None;
139        self.current_value = None;
140        self.current_field_name = None;
141
142        if self.parents.is_empty() {
143            // The `parents` stack is empty. We're at the top level.
144
145            // If the reader has already found EOF (the end of the top level), there's no need to
146            // try to read more data. Return Ok(None).
147            if self.is_eof {
148                self.current_value = None;
149                return Ok(());
150            }
151
152            let next_stream_item = self.parse_next_nom(stream_item);
153            return self.process_stream_item(next_stream_item);
154        }
155
156        // Otherwise, the `parents` stack is not empty. We're inside a container.
157
158        // The `ParentLevel` type is only a couple of stack-allocated bytes. It's very cheap to clone.
159        let parent = *self.parents.last().unwrap();
160        // If the reader had already found the end of this container, return Ok(None).
161        if parent.is_exhausted() {
162            self.current_value = None;
163            return Ok(());
164        }
165        // Otherwise, try to read the next value. The syntax we expect will depend on the
166        // IonType of the parent container.
167        let value = match parent.ion_type() {
168            IonType::List => self.next_list_value(),
169            IonType::SExp => self.next_s_expression_value(),
170            IonType::Struct => {
171                self.buffer.checkpoint();
172                if let Some(field_name) = self.next_struct_field_name()? {
173                    // ...remember it and return the field value that follows.
174                    self.current_field_name = Some(field_name);
175                    let field_value_result = match self.next_struct_field_value() {
176                        Err(e) => {
177                            self.current_field_name = None;
178                            self.buffer.rollback();
179                            return Err(e);
180                        }
181                        Ok(v) => v,
182                    };
183                    Ok(Some(field_value_result))
184                } else {
185                    Ok(None)
186                }
187            }
188            other => unreachable!(
189                "The reader's `parents` stack contained a scalar value: {:?}",
190                other
191            ),
192        };
193
194        match value {
195            Ok(None) => {
196                // If the parser returns Ok(None), we've just encountered the end of the container for
197                // the first time. Set `is_exhausted` so we won't try to parse more until `step_out()` is
198                // called.
199                // We previously used a copy of the last `ParentLevel` in the stack to simplify reading.
200                // To modify it, we'll need to get a mutable reference to the original.
201                self.parents.last_mut().unwrap().set_exhausted(true);
202                self.current_value = None;
203            }
204            Ok(Some(value)) => {
205                // We successfully read a value. Set it as the current value.
206                self.current_value = Some(value);
207            }
208            Err(e) => return Err(e),
209        };
210
211        Ok(())
212    }
213
214    // This function implements the logic of `step_out`, but is extracted in order to make tracking
215    // of the step_out depth, which is handled by the actual `step_out` function, easier.
216    //
217    // See `step_out` for more info about how we handle errors when stepping out.
218    fn step_out_impl(&mut self) -> IonResult<()> {
219        let (target_depth, find_parent) = match self.state {
220            ReaderState::Ready => (self.depth() - 1, false),
221            ReaderState::SteppingOut {
222                target_depth,
223                finding_parent,
224            } => (target_depth, finding_parent),
225        };
226
227        // `find_parent` indicates that we previously failed to step_out and still need to find the
228        // delimiter, or end, for our parent container. We check it here, to see if we need to
229        // still exhaust the elements of our current container.
230        if !find_parent {
231            if self.parents.is_empty() {
232                return illegal_operation(
233                    "Cannot call `step_out()` when the reader is at the top level.",
234                );
235            }
236
237            // The container we're stepping out of.
238            let parent = self.parents.last().unwrap();
239
240            // If we're not at the end of the current container, advance the cursor until we are.
241            // Unlike the binary reader, which can skip-scan, the text reader must visit every value
242            // between its current position and the end of the container.
243            if !parent.is_exhausted() {
244                while let RawStreamItem::Value(_) | RawStreamItem::Null(_) = self.next()? {}
245            }
246
247            // Remove the parent container from the stack and clear the current value.
248            let _ = self.parents.pop();
249            self.current_value = None;
250
251            if self.parents.is_empty() {
252                // We're at the top level; nothing left to do.
253                return Ok(());
254            }
255            // We have reached a point where the original parent is no longer known, so if an error
256            // occurs while finishing up our step_out, we need to know where to start from.
257            self.state = ReaderState::SteppingOut {
258                target_depth,
259                finding_parent: true,
260            };
261        }
262
263        // We've stepped out, but the reader isn't at the top level. We're still inside another
264        // container. Make sure the container was followed by either the appropriate delimiter
265        // or the end of its parent.
266        let container_type = self.parents.last().unwrap().ion_type();
267        match container_type {
268            IonType::List => {
269                self.parse_expected("list delimiter or end", list_delimiter)?;
270            }
271            IonType::SExp => {
272                self.parse_expected("s-expression delimiter or end", s_expression_delimiter)?;
273            }
274            IonType::Struct => {
275                self.parse_expected("struct delimiter or end", struct_delimiter)?;
276            }
277            scalar => unreachable!("stepping out of a scalar type: {:?}", scalar),
278        };
279
280        self.state = ReaderState::SteppingOut {
281            target_depth,
282            finding_parent: false,
283        };
284
285        Ok(())
286    }
287
288    /// Continues any previously incomplete parsing attempt.
289    fn continue_state(&mut self) -> IonResult<()> {
290        if self.need_continue && self.step_out_nest == 0 {
291            self.need_continue = false;
292
293            match self.state {
294                // Previously Attempted to step_out, and failed.
295                ReaderState::SteppingOut { .. } => self.step_out(),
296                ReaderState::Ready => Ok(()),
297            }
298        } else {
299            Ok(())
300        }
301    }
302
303    fn process_stream_item(
304        &mut self,
305        read_result: RootParseResult<RawTextStreamItem>,
306    ) -> IonResult<()> {
307        match read_result {
308            RootParseResult::Ok(RawTextStreamItem::IonVersionMarker(1, 0)) => {
309                // We found an IVM; we currently only support Ion 1.0.
310                self.current_ivm = Some((1, 0));
311                Ok(())
312            }
313            RootParseResult::Ok(RawTextStreamItem::IonVersionMarker(major, minor)) => {
314                decoding_error(format!(
315                    "Unsupported Ion version: v{major}.{minor}. Only 1.0 is supported."
316                ))
317            }
318            RootParseResult::Ok(RawTextStreamItem::AnnotatedTextValue(value)) => {
319                // We read a value successfully; set it as our current value.
320                self.current_value = Some(value);
321                Ok(())
322            }
323            RootParseResult::Incomplete(line, column) => incomplete_text_error(
324                "text",
325                Position::with_offset(self.buffer.bytes_consumed())
326                    .with_text_position(line, column),
327            ),
328            RootParseResult::Eof => {
329                // We are only concerned with EOF behaviors when we are at the end of the stream,
330                // AND at the end of the buffer.
331                if self.is_eos {
332                    // The top level is the only depth at which EOF is legal. If we encounter an EOF,
333                    // double check that the buffer doesn't actually have a value in it. See the
334                    // comments in [parse_value_at_eof] for a detailed explanation of this.
335                    let item = self.parse_value_at_eof();
336                    if item == RootParseResult::Eof {
337                        // This is a genuine EOF; make a note of it and clear the current value.
338                        self.is_eof = true;
339                        self.current_value = None;
340                        return Ok(());
341                    }
342                    self.process_stream_item(item)
343                } else {
344                    // If we are not at the end of the stream, we need to get more data.
345                    incomplete_text_error("text", self.buffer.get_position())
346                }
347            }
348            RootParseResult::NoMatch => {
349                // The parser didn't recognize the text in the input buffer.
350                // Return an error that contains the text we were attempting to parse.
351                let error_message = format!(
352                    "unrecognized input near line {}: '{}'",
353                    self.buffer.lines_loaded(),
354                    self.buffer.remaining_text(),
355                );
356                decoding_error(error_message)
357            }
358            RootParseResult::Failure(error_message) => {
359                // A fatal error occurred while reading the next value.
360                // This could be an I/O error, malformed utf-8 data, or an invalid value.
361                decoding_error(error_message)
362            }
363        }
364    }
365
366    /// Assumes that the reader is inside a list and attempts to parse the next value.
367    /// If the next token in the stream is an end-of-list delimiter (`]`), returns Ok(None).
368    fn next_list_value(&mut self) -> IonResult<Option<AnnotatedTextValue>> {
369        self.parse_expected("a list", list_value_or_end)
370    }
371
372    /// Assumes that the reader is inside an s-expression and attempts to parse the next value.
373    /// If the next token in the stream is an end-of-s-expression delimiter (`)`), returns Ok(None).
374    fn next_s_expression_value(&mut self) -> IonResult<Option<AnnotatedTextValue>> {
375        self.parse_expected("an s-expression", s_expression_value_or_end)
376    }
377
378    /// Assumes that the reader is inside an struct and attempts to parse the next field name.
379    /// If the next token in the stream is an end-of-struct delimiter (`}`), returns Ok(None).
380    fn next_struct_field_name(&mut self) -> IonResult<Option<RawSymbolToken>> {
381        // If there isn't another value, this returns Ok(None).
382        self.parse_expected("a struct field name", struct_field_name_or_end)
383    }
384
385    /// Assumes that the reader is inside a struct AND that a field has already been successfully
386    /// parsed from input using [`next_struct_field_name`](Self::next_struct_field_name) and
387    /// attempts to parse the next value.
388    /// In this input position, only a value (or whitespace/comments) are legal. Anything else
389    /// (including EOF) will result in a decoding error.
390    fn next_struct_field_value(&mut self) -> IonResult<AnnotatedTextValue> {
391        // Only called after a call to [next_struct_field_name] that returns Some(field_name).
392        // It is not legal for a field name to be followed by a '}' or EOF.
393        // If there isn't another value, returns an Err.
394        self.parse_expected("a struct field value", struct_field_value)
395    }
396
397    /// Attempts to parse the next entity from the stream using the provided parser.
398    /// Returns a decoding error if EOF is encountered while parsing.
399    /// If the parser encounters an error, it will be returned as-is.
400    fn parse_expected<P, O>(&mut self, entity_name: &str, parser: P) -> IonResult<O>
401    where
402        P: Fn(&str) -> IonParseResult<O>,
403    {
404        match self.parse_next(parser) {
405            Ok(Some(value)) => Ok(value),
406            Ok(None) => {
407                if !self.is_eos {
408                    incomplete_text_error("text", self.buffer.get_position())
409                } else {
410                    decoding_error(format!(
411                        "unexpected end of input while reading {} on line {}: '{}'",
412                        entity_name,
413                        self.buffer.lines_loaded(),
414                        self.buffer.remaining_text()
415                    ))
416                }
417            }
418            Err(err @ IonError::Incomplete { .. }) => Err(err),
419            Err(e) => decoding_error(format!(
420                "Parsing error occurred while parsing {} near line {}:\n'{}'\n{}",
421                entity_name,
422                self.buffer.lines_loaded(),
423                self.buffer.remaining_text(),
424                e
425            )),
426        }
427    }
428
429    fn parse_next<P, O>(&mut self, parser: P) -> IonResult<Option<O>>
430    where
431        P: Fn(&str) -> IonParseResult<O>,
432    {
433        match self.parse_next_nom(parser) {
434            RootParseResult::Ok(item) => Ok(Some(item)),
435            RootParseResult::Incomplete(line, column) => incomplete_text_error(
436                "text",
437                Position::with_offset(self.buffer.bytes_consumed())
438                    .with_text_position(line, column),
439            ),
440            RootParseResult::Eof => Ok(None),
441            RootParseResult::NoMatch => {
442                // If we are not at the end of the stream we could be missing a match due to partial
443                // data.
444                if self.is_eos {
445                    // Return an error that contains the text currently in the buffer (i.e. what we
446                    // were attempting to parse.)
447                    let error_message = format!(
448                        "unrecognized input near line {}: '{}'",
449                        self.buffer.lines_loaded(),
450                        self.buffer.remaining_text(),
451                    );
452                    decoding_error(error_message)
453                } else {
454                    incomplete_text_error("text", self.buffer.get_position())
455                }
456            }
457            RootParseResult::Failure(error_message) => decoding_error(error_message),
458        }
459    }
460
461    /// Attempts to parse the next entity from the stream using the provided parser.
462    /// If there isn't enough data in the buffer for the parser to match its input conclusively,
463    /// more data will be loaded into the buffer and the parser will be called again.
464    /// If EOF is encountered, returns `Ok(None)`.
465    fn parse_next_nom<P, O>(&mut self, parser: P) -> RootParseResult<O>
466    where
467        P: Fn(&str) -> IonParseResult<O>,
468    {
469        use super::text_buffer::TextError;
470
471        let RawTextReader {
472            ref mut is_eof,
473            ref mut buffer,
474            ..
475        } = *self;
476
477        if *is_eof {
478            return RootParseResult::Eof;
479        }
480
481        loop {
482            // Note the number of bytes currently in the text buffer
483            let length_before_parse = buffer.remaining_text().len();
484            // Invoke the top_level_value() parser; this will attempt to recognize the next value
485            // in the stream and return a &str slice containing the remaining, not-yet-parsed text.
486            match parser(buffer.remaining_text()) {
487                // If `top_level_value` returns 'Incomplete', there wasn't enough text in the buffer
488                // to match the next value. No syntax errors have been encountered (yet?), but we
489                // need to load more text into the buffer before we try to parse it again.
490                Err(Incomplete(_needed)) => {
491                    // Ask the buffer to load another line of text.
492                    // TODO: Currently this loads a single line at a time for easier testing.
493                    //       We may wish to bump it to a higher number of lines at a time (8?)
494                    //       for efficiency once we're confident in the correctness.
495                    match buffer.load_next_line() {
496                        Ok(0) => {
497                            // If load_next_line() returns Ok(0), we've reached the end of our input.
498                            *is_eof = true;
499                            // The buffer had an `Incomplete` value in it; now that we know we're at EOF,
500                            // we can determine whether the buffer's contents should actually be
501                            // considered complete.
502                            return RootParseResult::Eof;
503                        }
504                        Ok(_bytes_loaded) => {
505                            // Retry the parser on the extended buffer in the next loop iteration
506                            continue;
507                        }
508                        Err(TextError::Incomplete { line, column }) => {
509                            // If load_next_line() returns TextError::Incomplete, then that means
510                            // it has incomplete UTF-8 data in the buffer, suggesting the user is
511                            // not done providing the ion data.
512                            return RootParseResult::Incomplete(line, column);
513                        }
514                        Err(e) => {
515                            let error_message = format!("I/O error, could not read more data: {e}");
516                            return RootParseResult::Failure(error_message);
517                        }
518                    }
519                }
520                Ok((remaining_text, value)) => {
521                    // Our parser successfully matched a value.
522                    // Note the length of the text that remains after parsing.
523                    let length_after_parse = remaining_text.len();
524                    // The difference in length tells us how many bytes were part of the
525                    // text representation of the value that we found.
526                    let bytes_consumed = length_before_parse - length_after_parse;
527                    buffer.consume(bytes_consumed);
528                    return RootParseResult::Ok(value);
529                }
530                Err(Error(_e)) => return RootParseResult::<O>::NoMatch,
531                Err(Failure(e)) => {
532                    let error_message = format!(
533                        "unrecognized input near line {}: {}: '{}'",
534                        buffer.lines_loaded(),
535                        e.description().unwrap_or("<no description>"),
536                        buffer.remaining_text(),
537                    );
538                    return RootParseResult::Failure(error_message);
539                }
540            };
541        }
542    }
543
544    // Parses the contents of the text buffer again with the knowledge that we're at the end of the
545    // input stream. This allows us to resolve a number of ambiguous cases.
546    // For a detailed description of the problem that this addresses, please see:
547    // https://github.com/amazon-ion/ion-rust/issues/318
548    // This method should only be called when the reader is at the top level. An EOF at any other
549    // depth is an error.
550    fn parse_value_at_eof(&mut self) -> RootParseResult<RawTextStreamItem> {
551        // An arbitrary, cheap-to-parse Ion value that we append to the buffer when its contents at
552        // EOF are ambiguous.
553        const SENTINEL_ION_TEXT: &str = "\n0\n";
554        // We unfortunately need to copy here in order to append the SENTINEL_ION_TEXT, since we
555        // aren't guaranteed a vector-backed reader.
556        let mut remaining_text = self.buffer.remaining_text().to_owned();
557        // Make a note of the text's length; we're about to modify it.
558        let original_length = remaining_text.len();
559
560        // Append our sentinel value to the end of the input buffer.
561        remaining_text.push_str(SENTINEL_ION_TEXT);
562
563        // If the buffer contained a value, the newline will indicate that the contents of the
564        // buffer were complete. For example:
565        // * the integer `7` becomes `7\n`; it wasn't the first digit in a truncated `755`.
566        // * the boolean `false` becomes `false\n`; it wasn't actually half of the
567        //   identifier `falseTeeth`.
568        //
569        // If the buffer contained a value that's written in segments, the extra `0` will indicate
570        // that no more segments are coming. For example:
571        // * `foo::bar` becomes `foo::bar\n0\n`; the parser can see that 'bar' is a value, not
572        //    another annotation in the sequence.
573        // * `'''long-form string'''` becomes `'''long-form string'''\n0\n`; the parser can see that
574        //   there aren't any more long-form string segments in the sequence.
575        //
576        // Attempt to parse the updated buffer.
577        let value = match stream_item(&remaining_text) {
578            Ok(("\n", RawTextStreamItem::AnnotatedTextValue(value)))
579                if value.annotations().is_empty()
580                    && *value.value() == TextValue::Int(Int::I64(0)) =>
581            {
582                // We found the unannotated zero that we appended to the end of the buffer.
583                // The "\n" in this pattern is the unparsed text left in the buffer,
584                // which indicates that our 0 was parsed.
585                RootParseResult::Eof
586            }
587            Ok((_remaining_text, value)) => {
588                // If we match, and try to consume the remaining buffer in its entirety before we
589                // know the data has been fully loaded we need to treat it as an incomplete error
590                // so that we do not inadvertently succeed on a partial parse.
591                if original_length == self.buffer.remaining_text().len() && !self.is_eos {
592                    return RootParseResult::Incomplete(
593                        self.buffer.lines_loaded(),
594                        self.buffer.line_offset(),
595                    );
596                }
597                // We found something else. The zero is still in the buffer; we can leave it there.
598                // The reader's `is_eof` flag has been set, so the text buffer will never be used
599                // again. Return the value we found.
600                RootParseResult::Ok(value)
601            }
602            Err(Incomplete(_needed)) => {
603                RootParseResult::Incomplete(self.buffer.lines_loaded(), self.buffer.line_offset())
604            }
605            Err(Error(ion_parse_error)) => {
606                RootParseResult::Failure(format!(
607                    "Parsing error occurred near line {}: '{}': '{:?}'",
608                    self.buffer.lines_loaded(),
609                    &self.buffer.remaining_text()[..original_length], /* Don't show the extra `\n0\n` */
610                    ion_parse_error
611                ))
612            }
613            Err(Failure(ion_parse_error)) => {
614                RootParseResult::Failure(format!(
615                    "A fatal error occurred while reading near line {}: '{}': '{:?}'",
616                    self.buffer.lines_loaded(),
617                    &self.buffer.remaining_text()[..original_length], /* Don't show the extra `\n0\n` */
618                    ion_parse_error
619                ))
620            }
621        };
622
623        value
624    }
625
626    /// Constructs an [IonError::IllegalOperation] which explains that the reader was asked to
627    /// perform an action that is only allowed when it is positioned over the item type described
628    /// by the parameter `expected`.
629    fn expected<E: Display>(&self, expected: E) -> IonError {
630        illegal_operation_raw(format!(
631            "type mismatch: expected a(n) {} but positioned over a(n) {}",
632            expected,
633            self.current()
634        ))
635    }
636}
637
638impl BufferedRawReader for RawTextReader<Vec<u8>> {
639    fn append_bytes(&mut self, bytes: &[u8]) -> IonResult<()> {
640        match self.buffer.append_bytes(bytes) {
641            Err(e) => decoding_error(e.to_string()),
642            Ok(()) => {
643                self.is_eof = false;
644                Ok(())
645            }
646        }
647    }
648
649    fn read_from<R: std::io::Read>(&mut self, source: R, length: usize) -> IonResult<usize> {
650        let res = self.buffer.read_from(source, length);
651        if res.is_ok() {
652            self.is_eof = false;
653        }
654        res
655    }
656
657    // Mark the data stream as being complete. This tells the reader that all data has been read
658    // into the reader.
659    fn stream_complete(&mut self) {
660        self.is_eos = true;
661    }
662
663    // Returns true if the stream has been marked as completely loaded via `stream_complete`.
664    fn is_stream_complete(&self) -> bool {
665        self.is_eos
666    }
667}
668
669// Returned by the `annotations()` method below if there is no current value.
670const EMPTY_SLICE_RAW_SYMBOL_TOKEN: &[RawSymbolToken] = &[];
671
672// TODO: This implementation of the text reader eagerly materializes each value that it encounters
673//       in the stream and stores it in the reader as `current_value`. Each time a user requests
674//       a value via `read_i64`, `read_bool`, etc, a clone of `current_value` is returned (assuming
675//       its type is in alignment with the request).
676//       A better implementation would identify the input slice containing the next value without
677//       materializing it and then attempt to materialize it when the user calls `read_TYPE`. This
678//       would take less memory and would only materialize values that the user requests.
679//       See: https://github.com/amazon-ion/ion-rust/issues/322
680impl<A: AsRef<[u8]> + Expandable> IonReader for RawTextReader<A> {
681    type Item = RawStreamItem;
682    type Symbol = RawSymbolToken;
683
684    fn ion_version(&self) -> (u8, u8) {
685        (1, 0)
686    }
687
688    fn next(&mut self) -> IonResult<RawStreamItem> {
689        // Failures due to incomplete data can occur any time the reader needs to advance, which
690        // can occur in a call to `next()` or `step_out()`. (Note that in some cases the
691        // implementations of `next()` and `step_out()` may invoke each other transitively.)
692        self.continue_state()?;
693
694        // Parse the next value from the stream, storing it in `self.current_value`.
695        self.load_next_value()?;
696
697        // If we're positioned on an IVM, return the (major, minor) version tuple
698        if let Some((major, minor)) = self.current_ivm {
699            return Ok(RawStreamItem::VersionMarker(major, minor));
700        }
701
702        // If we're positioned on a value, return its IonType and whether it's null.
703        if let Some(value) = self.current_value.as_ref() {
704            Ok(RawStreamItem::nullable_value(
705                value.ion_type(),
706                value.is_null(),
707            ))
708        } else {
709            Ok(RawStreamItem::Nothing)
710        }
711    }
712
713    fn current(&self) -> RawStreamItem {
714        if let Some(ref value) = self.current_value {
715            RawStreamItem::nullable_value(value.ion_type(), value.is_null())
716        } else if let Some(ivm) = self.current_ivm {
717            RawStreamItem::VersionMarker(ivm.0, ivm.1)
718        } else {
719            RawStreamItem::Nothing
720        }
721    }
722
723    fn ion_type(&self) -> Option<IonType> {
724        if let Some(ref value) = self.current_value {
725            return Some(value.ion_type());
726        }
727        None
728    }
729
730    fn is_null(&self) -> bool {
731        if let Some(ref value) = self.current_value {
732            return value.is_null();
733        }
734        false
735    }
736
737    fn annotations<'a>(&'a self) -> Box<dyn Iterator<Item = IonResult<Self::Symbol>> + 'a> {
738        let iterator = self
739            .current_value
740            .as_ref()
741            .map(|value| value.annotations())
742            .unwrap_or(EMPTY_SLICE_RAW_SYMBOL_TOKEN)
743            .iter()
744            .cloned()
745            // The annotations are already in memory and are already resolved to text, so
746            // this step cannot fail. Map each token to Ok(token).
747            .map(Ok);
748        Box::new(iterator)
749    }
750
751    fn has_annotations(&self) -> bool {
752        self.current_value
753            .as_ref()
754            .map(|value| !value.annotations().is_empty())
755            .unwrap_or(false)
756    }
757
758    fn number_of_annotations(&self) -> usize {
759        self.current_value
760            .as_ref()
761            .map(|value| value.annotations().len())
762            .unwrap_or(0)
763    }
764
765    fn field_name(&self) -> IonResult<Self::Symbol> {
766        match self.current_field_name.as_ref() {
767            Some(name) => Ok(name.clone()),
768            None => illegal_operation(
769                "field_name() can only be called when the reader is positioned inside a struct",
770            ),
771        }
772    }
773
774    fn read_null(&mut self) -> IonResult<IonType> {
775        match self.current_value.as_ref().map(|current| current.value()) {
776            Some(TextValue::Null(ion_type)) => Ok(*ion_type),
777            _ => Err(self.expected("null value")),
778        }
779    }
780
781    fn read_bool(&mut self) -> IonResult<bool> {
782        match self.current_value.as_ref().map(|current| current.value()) {
783            Some(TextValue::Bool(value)) => Ok(*value),
784            _ => Err(self.expected("bool value")),
785        }
786    }
787
788    fn read_int(&mut self) -> IonResult<Int> {
789        match self.current_value.as_ref().map(|current| current.value()) {
790            Some(TextValue::Int(value)) => Ok(value.clone()),
791            _ => Err(self.expected("int value")),
792        }
793    }
794
795    fn read_i64(&mut self) -> IonResult<i64> {
796        match self.current_value.as_ref().map(|current| current.value()) {
797            Some(TextValue::Int(Int::I64(value))) => Ok(*value),
798            Some(TextValue::Int(Int::BigInt(value))) => {
799                decoding_error(format!("Integer {value} is too large to fit in an i64."))
800            }
801            _ => Err(self.expected("int value")),
802        }
803    }
804
805    fn read_f32(&mut self) -> IonResult<f32> {
806        match self.current_value.as_ref().map(|current| current.value()) {
807            Some(TextValue::Float(value)) => Ok(*value as f32),
808            _ => Err(self.expected("float value")),
809        }
810    }
811
812    fn read_f64(&mut self) -> IonResult<f64> {
813        match self.current_value.as_ref().map(|current| current.value()) {
814            Some(TextValue::Float(value)) => Ok(*value),
815            _ => Err(self.expected("float value")),
816        }
817    }
818
819    fn read_decimal(&mut self) -> IonResult<Decimal> {
820        match self.current_value.as_ref().map(|current| current.value()) {
821            Some(TextValue::Decimal(ref value)) => Ok(value.clone()),
822            _ => Err(self.expected("decimal value")),
823        }
824    }
825
826    fn read_string(&mut self) -> IonResult<Str> {
827        match self.current_value.as_ref().map(|current| current.value()) {
828            Some(TextValue::String(ref value)) => Ok(Str::from(value.as_str())),
829            _ => Err(self.expected("string value")),
830        }
831    }
832
833    fn read_str(&mut self) -> IonResult<&str> {
834        match self.current_value.as_ref().map(|current| current.value()) {
835            Some(TextValue::String(ref value)) => Ok(value.as_str()),
836            _ => Err(self.expected("string value")),
837        }
838    }
839
840    fn read_symbol(&mut self) -> IonResult<Self::Symbol> {
841        match self.current_value.as_ref().map(|current| current.value()) {
842            Some(TextValue::Symbol(ref value)) => Ok(value.clone()),
843            _ => Err(self.expected("symbol value")),
844        }
845    }
846
847    fn read_blob(&mut self) -> IonResult<Blob> {
848        match self.current_value.as_ref().map(|current| current.value()) {
849            Some(TextValue::Blob(ref value)) => Ok(Blob::from(value.as_slice())),
850            _ => Err(self.expected("blob value")),
851        }
852    }
853
854    fn read_clob(&mut self) -> IonResult<Clob> {
855        match self.current_value.as_ref().map(|current| current.value()) {
856            Some(TextValue::Clob(ref value)) => Ok(Clob::from(value.as_slice())),
857            _ => Err(self.expected("clob value")),
858        }
859    }
860
861    fn read_timestamp(&mut self) -> IonResult<Timestamp> {
862        match self.current_value.as_ref().map(|current| current.value()) {
863            Some(TextValue::Timestamp(ref value)) => Ok(value.clone()),
864            _ => Err(self.expected("timestamp value")),
865        }
866    }
867
868    fn step_in(&mut self) -> IonResult<()> {
869        match &self.current_value {
870            Some(value) if value.ion_type().is_container() => {
871                self.parents
872                    .push(ParentContainer::new(value.value().ion_type()));
873                self.current_value = None;
874                Ok(())
875            }
876            Some(value) => {
877                illegal_operation(format!("Cannot step_in() to a {:?}", value.ion_type()))
878            }
879            None => illegal_operation(format!(
880                "{} {}",
881                "Cannot `step_in`: the reader is not positioned on a value.",
882                "Try calling `next()` to advance first."
883            )),
884        }
885    }
886
887    fn step_out(&mut self) -> IonResult<()> {
888        let (target_depth, find_parent) = match self.state {
889            ReaderState::Ready => (self.depth().saturating_sub(1), false),
890            ReaderState::SteppingOut {
891                target_depth,
892                finding_parent,
893            } => (target_depth, finding_parent),
894        };
895        self.state = ReaderState::SteppingOut {
896            target_depth,
897            finding_parent: find_parent,
898        };
899
900        // If an incomplete error occurs during a step_out, we will re-enter step_out and:
901        //    Using the state information from the previous attempt, we will either continue
902        //    exhausting the container, or continue looking for the parent container's delimiter,
903        //    or end.
904        //
905        //    Once the failed step_out is done, we need to "bubble up" and step_out of any of
906        //    the parent containers that the failed step_out was nested in. Since we have lost
907        //    the call stack that we would normally lean on, we keep track of the depth we want
908        //    to step out to and the nesting of step_out calls. We handle all step_outs like
909        //    normal, until we have completed the exhaustion and delimiter steps for the
910        //    container. Then, if the current step_out call is nested, we return. Otherwise, we
911        //    loop back and continue executing step_outs for the parent container(s) until we have
912        //    reached the target depth.
913        loop {
914            self.step_out_nest += 1;
915            let result = self.step_out_impl();
916            self.step_out_nest -= 1;
917
918            if let Err(IonError::Incomplete { .. }) = result {
919                self.need_continue = true;
920            }
921            result?;
922
923            // If we are a nested call, we're done for now.
924            if self.step_out_nest != 0 {
925                break;
926            } else if target_depth == self.depth() {
927                // We've reached our desired depth, so we can reset our state, and target
928                // depth.
929                self.state = ReaderState::Ready;
930                break;
931            }
932        }
933        Ok(())
934    }
935
936    fn parent_type(&self) -> Option<IonType> {
937        self.parents.last().map(|parent| parent.ion_type())
938    }
939
940    fn depth(&self) -> usize {
941        self.parents.len()
942    }
943}
944
945#[cfg(test)]
946mod reader_tests {
947    use rstest::*;
948
949    use super::*;
950    use crate::raw_reader::RawStreamItem;
951    use crate::raw_symbol_token::{local_sid_token, text_token, RawSymbolToken};
952    use crate::result::{IonResult, Position};
953    use crate::stream_reader::IonReader;
954    use crate::text::non_blocking::raw_text_reader::RawTextReader;
955    use crate::text::text_value::{IntoRawAnnotations, TextValue};
956    use crate::types::{Decimal, Timestamp};
957    use crate::IonType;
958    use crate::RawStreamItem::Nothing;
959
960    fn next_type<T: AsRef<[u8]> + Expandable>(
961        reader: &mut RawTextReader<T>,
962        ion_type: IonType,
963        is_null: bool,
964    ) {
965        assert_eq!(
966            reader.next().unwrap(),
967            RawStreamItem::nullable_value(ion_type, is_null)
968        );
969    }
970
971    fn annotations_eq<I: IntoRawAnnotations>(reader: &mut RawTextReader<&str>, expected: I) {
972        let expected: Vec<RawSymbolToken> = expected.into_annotations();
973        let actual: Vec<RawSymbolToken> = reader
974            .annotations()
975            .map(|a| a.expect("annotation with unknown text"))
976            .collect();
977        assert_eq!(actual, expected);
978    }
979
980    #[test]
981    fn test_basic_incomplete() -> IonResult<()> {
982        let ion_data = r#"
983        [1, 2, 3
984        "#;
985        let mut reader = RawTextReader::new(ion_data.as_bytes().to_owned());
986        next_type(&mut reader, IonType::List, false);
987        reader.step_in()?;
988        next_type(&mut reader, IonType::Int, false);
989        assert_eq!(reader.read_i64()?, 1);
990        next_type(&mut reader, IonType::Int, false);
991        assert_eq!(reader.read_i64()?, 2);
992        match reader.next() {
993            // the failure occurs after reading the \n after 3, so it is identified on line 3.
994            Err(IonError::Incomplete {
995                position:
996                    Position {
997                        line_column: Some((line, column)),
998                        ..
999                    },
1000                ..
1001            }) => {
1002                assert_eq!(line, 2);
1003                assert_eq!(column, 0);
1004            }
1005            Err(e) => panic!("unexpected error when parsing partial data: {e}"),
1006            Ok(item) => panic!("unexpected successful parsing of partial data: {item:?}"),
1007        }
1008        reader
1009            .append_bytes("]".as_bytes())
1010            .expect("Unable to append bytes");
1011        next_type(&mut reader, IonType::Int, false);
1012        assert_eq!(reader.read_i64()?, 3);
1013        Ok(())
1014    }
1015
1016    #[test]
1017    fn test_utf8_incomplete() -> IonResult<()> {
1018        let source: &[u8] = &[
1019            0x22, 0x57, 0x65, 0x20, 0x4c, 0x6f, 0x76, 0x65, 0x20, 0x49, 0x6f, 0x6e, 0x21, 0xe2,
1020            0x9a, 0x9b, 0xef, 0xb8, 0x8f, 0x22,
1021        ];
1022
1023        // This will initialize our reader with the full source but end just short of
1024        // the end of the utf-8 sequence \xe29a9befb88f.
1025        let mut reader = RawTextReader::new(source[0..18].to_owned());
1026
1027        match reader.next() {
1028            Err(IonError::Incomplete {
1029                position:
1030                    Position {
1031                        line_column: Some((line, column)),
1032                        ..
1033                    },
1034                ..
1035            }) => {
1036                assert_eq!(line, 0); // Line is still 0 since we haven't actually seen a '\n' yet.
1037                assert_eq!(column, 14); // failure at start of multi-byte sequence.
1038            }
1039            Err(e) => panic!("unexpected error after partial utf-8 data: {e}"),
1040            Ok(item) => panic!("unexpected successful parsing of partial utf-8 data: {item:?}"),
1041        }
1042        reader.append_bytes(&source[18..])?;
1043        next_type(&mut reader, IonType::String, false);
1044        Ok(())
1045    }
1046
1047    #[test]
1048    fn test_skipping_containers() -> IonResult<()> {
1049        let ion_data = r#"
1050            0 [1, 2, 3] (4 5) 6
1051        "#;
1052        let reader = &mut RawTextReader::new(ion_data);
1053        next_type(reader, IonType::Int, false);
1054        assert_eq!(reader.read_i64()?, 0);
1055
1056        next_type(reader, IonType::List, false);
1057        reader.step_in()?;
1058        next_type(reader, IonType::Int, false);
1059        assert_eq!(reader.read_i64()?, 1);
1060        reader.step_out()?;
1061        // This should have skipped over the `2, 3` at the end of the list.
1062        next_type(reader, IonType::SExp, false);
1063        // Don't step into the s-expression. Instead, skip over it.
1064        next_type(reader, IonType::Int, false);
1065        assert_eq!(reader.read_i64()?, 6);
1066        Ok(())
1067    }
1068
1069    #[test]
1070    fn test_read_nested_containers() -> IonResult<()> {
1071        let ion_data = r#"
1072            {
1073                foo: [
1074                    1,
1075                    [2, 3],
1076                    4
1077                ],
1078                bar: {
1079                    a: 5,
1080                    b: (true true true)
1081                }
1082            }
1083            11
1084        "#;
1085        let reader = &mut RawTextReader::new(ion_data);
1086        next_type(reader, IonType::Struct, false);
1087        reader.step_in()?;
1088        next_type(reader, IonType::List, false);
1089        reader.step_in()?;
1090        next_type(reader, IonType::Int, false);
1091        next_type(reader, IonType::List, false);
1092        reader.step_in()?;
1093        next_type(reader, IonType::Int, false);
1094        // The reader is now at the '2' nested inside of 'foo'
1095        reader.step_out()?;
1096        reader.step_out()?;
1097        next_type(reader, IonType::Struct, false);
1098        reader.step_in()?;
1099        next_type(reader, IonType::Int, false);
1100        next_type(reader, IonType::SExp, false);
1101        reader.step_in()?;
1102        next_type(reader, IonType::Bool, false);
1103        next_type(reader, IonType::Bool, false);
1104        // The reader is now at the second 'true' in the s-expression nested in 'bar'/'b'
1105        reader.step_out()?;
1106        reader.step_out()?;
1107        reader.step_out()?;
1108        next_type(reader, IonType::Int, false);
1109        assert_eq!(reader.read_i64()?, 11);
1110        Ok(())
1111    }
1112
1113    #[test]
1114    fn test_read_container_with_mixed_scalars_and_containers() -> IonResult<()> {
1115        let ion_data = r#"
1116            {
1117                foo: 4,
1118                bar: {
1119                    a: 5,
1120                    b: (true true true)
1121                }
1122            }
1123            42
1124        "#;
1125
1126        let reader = &mut RawTextReader::new(ion_data);
1127        next_type(reader, IonType::Struct, false);
1128        reader.step_in()?;
1129        next_type(reader, IonType::Int, false);
1130        assert_eq!(reader.field_name()?, text_token("foo"));
1131        next_type(reader, IonType::Struct, false);
1132        assert_eq!(reader.field_name()?, text_token("bar"));
1133        reader.step_in()?;
1134        next_type(reader, IonType::Int, false);
1135        assert_eq!(reader.read_i64()?, 5);
1136        reader.step_out()?;
1137        assert_eq!(reader.next()?, RawStreamItem::Nothing);
1138        reader.step_out()?;
1139        next_type(reader, IonType::Int, false);
1140        assert_eq!(reader.read_i64()?, 42);
1141        Ok(())
1142    }
1143
1144    #[rstest]
1145    #[case(" null ", TextValue::Null(IonType::Null))]
1146    #[case(" null.string ", TextValue::Null(IonType::String))]
1147    #[case(" true ", TextValue::Bool(true))]
1148    #[case(" false ", TextValue::Bool(false))]
1149    #[case(" 738 ", TextValue::Int(Int::I64(738)))]
1150    #[case(" 2.5e0 ", TextValue::Float(2.5))]
1151    #[case(" 2.5 ", TextValue::Decimal(Decimal::new(25, -1)))]
1152    #[case(" 2007-07-12T ", TextValue::Timestamp(Timestamp::with_ymd(2007, 7, 12).build().unwrap()))]
1153    #[case(" foo ", TextValue::Symbol(text_token("foo")))]
1154    #[case(" \"hi!\" ", TextValue::String("hi!".to_owned()))]
1155    #[case(" {{ZW5jb2RlZA==}} ", TextValue::Blob(Vec::from("encoded".as_bytes())))]
1156    #[case(" {{\"hello\"}} ", TextValue::Clob(Vec::from("hello".as_bytes())))]
1157    fn test_read_single_top_level_values(#[case] text: &str, #[case] expected_value: TextValue) {
1158        let reader = &mut RawTextReader::new(text);
1159        next_type(
1160            reader,
1161            expected_value.ion_type(),
1162            matches!(expected_value, TextValue::Null(_)),
1163        );
1164        // TODO: Redo (or remove?) this test. There's not an API that exposes the
1165        //       AnnotatedTextValue any more. We're directly accessing `current_value` as a hack.
1166        let actual_value = reader.current_value.clone();
1167        assert_eq!(actual_value.unwrap(), expected_value.without_annotations());
1168    }
1169
1170    #[test]
1171    fn test_text_read_multiple_top_level_values() -> IonResult<()> {
1172        let ion_data = r#"
1173            null
1174            true
1175            5
1176            5e0
1177            5.5
1178            2021-09-25T
1179            '$ion_1_0' // A quoted symbol, not an IVM
1180            $ion_1_0   // An IVM, not a symbol
1181            foo
1182            "hello"
1183            {foo: bar}
1184            ["foo", "bar"]
1185            ('''foo''')
1186        "#;
1187
1188        let reader = &mut RawTextReader::new(ion_data);
1189        next_type(reader, IonType::Null, true);
1190
1191        next_type(reader, IonType::Bool, false);
1192        assert!(reader.read_bool()?);
1193
1194        next_type(reader, IonType::Int, false);
1195        assert_eq!(reader.read_i64()?, 5);
1196
1197        next_type(reader, IonType::Float, false);
1198        assert_eq!(reader.read_f64()?, 5.0f64);
1199
1200        next_type(reader, IonType::Decimal, false);
1201        assert_eq!(reader.read_decimal()?, Decimal::new(55i32, -1i64));
1202
1203        next_type(reader, IonType::Timestamp, false);
1204        assert_eq!(
1205            reader.read_timestamp()?,
1206            Timestamp::with_ymd(2021, 9, 25).build().unwrap()
1207        );
1208
1209        next_type(reader, IonType::Symbol, false);
1210        assert_eq!(reader.read_symbol()?, text_token("$ion_1_0"));
1211
1212        // A mid-stream Ion Version Marker
1213        assert_eq!(reader.next()?, RawStreamItem::VersionMarker(1, 0));
1214
1215        next_type(reader, IonType::Symbol, false);
1216        assert_eq!(reader.read_symbol()?, text_token("foo"));
1217
1218        next_type(reader, IonType::String, false);
1219        assert_eq!(reader.read_string()?, "hello".to_string());
1220
1221        // ===== CONTAINERS =====
1222
1223        // Reading a struct: {foo: bar}
1224        next_type(reader, IonType::Struct, false);
1225        reader.step_in()?;
1226        next_type(reader, IonType::Symbol, false);
1227        assert_eq!(reader.read_symbol()?, text_token("bar"));
1228        assert_eq!(reader.field_name()?, text_token("foo"));
1229
1230        assert_eq!(reader.next()?, Nothing);
1231        reader.step_out()?;
1232
1233        // Reading a list: ["foo", "bar"]
1234        next_type(reader, IonType::List, false);
1235        reader.step_in()?;
1236        next_type(reader, IonType::String, false);
1237        assert_eq!(reader.read_string()?, String::from("foo"));
1238        next_type(reader, IonType::String, false);
1239        assert_eq!(reader.read_string()?, String::from("bar"));
1240        assert_eq!(reader.next()?, Nothing);
1241        reader.step_out()?;
1242
1243        // Reading an s-expression: ('''foo''')
1244        next_type(reader, IonType::SExp, false);
1245        reader.step_in()?;
1246        next_type(reader, IonType::String, false);
1247        assert_eq!(reader.read_string()?, String::from("foo"));
1248        assert_eq!(reader.next()?, Nothing);
1249        reader.step_out()?;
1250
1251        // There are no more top level values.snow
1252        assert_eq!(reader.next()?, Nothing);
1253
1254        // Asking for more still results in `None`
1255        assert_eq!(reader.next()?, Nothing);
1256
1257        Ok(())
1258    }
1259
1260    #[test]
1261    fn test_read_multiple_top_level_values_with_comments() -> IonResult<()> {
1262        let ion_data = r#"
1263            /*
1264                Arrokoth is a trans-Neptunian object in the Kuiper belt.
1265                It is a contact binary composed of two plenetesimals joined
1266                along their major axes.
1267            */
1268            "(486958) 2014 MU69" // Original designation
1269            2014-06-26T // Date of discovery
1270            km::36 // width
1271        "#;
1272
1273        let reader = &mut RawTextReader::new(ion_data);
1274
1275        next_type(reader, IonType::String, false);
1276        assert_eq!(reader.read_string()?, String::from("(486958) 2014 MU69"));
1277
1278        next_type(reader, IonType::Timestamp, false);
1279        assert_eq!(
1280            reader.read_timestamp()?,
1281            Timestamp::with_ymd(2014, 6, 26).build()?
1282        );
1283        // TODO: Check for 'km' annotation after change to OwnedSymbolToken
1284        next_type(reader, IonType::Int, false);
1285        assert_eq!(reader.read_i64()?, 36);
1286        Ok(())
1287    }
1288
1289    #[test]
1290    fn test_text_read_multiple_annotated_top_level_values() -> IonResult<()> {
1291        let ion_data = r#"
1292            mercury::null
1293            venus::'earth'::true
1294            $17::mars::5
1295            jupiter::5e0
1296            'saturn'::5.5
1297            $100::$200::$300::2021-09-25T
1298            'uranus'::foo
1299            neptune::"hello"
1300            $55::{foo: $21::bar}
1301            pluto::[1, $77::2, 3]
1302            haumea::makemake::eris::ceres::(++ -- &&&&&)
1303        "#;
1304        // TODO: Check for annotations after OwnedSymbolToken
1305
1306        let reader = &mut RawTextReader::new(ion_data);
1307        next_type(reader, IonType::Null, true);
1308        annotations_eq(reader, ["mercury"]);
1309
1310        next_type(reader, IonType::Bool, false);
1311        assert!(reader.read_bool()?);
1312        annotations_eq(reader, ["venus", "earth"]);
1313
1314        next_type(reader, IonType::Int, false);
1315        assert_eq!(reader.read_i64()?, 5);
1316        annotations_eq(reader, &[local_sid_token(17), text_token("mars")]);
1317
1318        next_type(reader, IonType::Float, false);
1319        assert_eq!(reader.read_f64()?, 5.0f64);
1320        annotations_eq(reader, ["jupiter"]);
1321
1322        next_type(reader, IonType::Decimal, false);
1323        assert_eq!(reader.read_decimal()?, Decimal::new(55i32, -1i64));
1324        annotations_eq(reader, ["saturn"]);
1325
1326        next_type(reader, IonType::Timestamp, false);
1327        assert_eq!(
1328            reader.read_timestamp()?,
1329            Timestamp::with_ymd(2021, 9, 25).build().unwrap()
1330        );
1331        annotations_eq(reader, [100, 200, 300]);
1332
1333        next_type(reader, IonType::Symbol, false);
1334        assert_eq!(reader.read_symbol()?, text_token("foo"));
1335        annotations_eq(reader, ["uranus"]);
1336
1337        next_type(reader, IonType::String, false);
1338        assert_eq!(reader.read_string()?, "hello".to_string());
1339        annotations_eq(reader, ["neptune"]);
1340
1341        // ===== CONTAINERS =====
1342
1343        // Reading a struct: $55::{foo: $21::bar}
1344        next_type(reader, IonType::Struct, false);
1345        annotations_eq(reader, 55);
1346        reader.step_in()?;
1347        next_type(reader, IonType::Symbol, false);
1348        assert_eq!(reader.field_name()?, text_token("foo"));
1349        annotations_eq(reader, 21);
1350        assert_eq!(reader.read_symbol()?, text_token("bar"));
1351        assert_eq!(reader.next()?, Nothing);
1352        reader.step_out()?;
1353
1354        // Reading a list: pluto::[1, $77::2, 3]
1355        next_type(reader, IonType::List, false);
1356        reader.step_in()?;
1357        next_type(reader, IonType::Int, false);
1358        assert_eq!(reader.number_of_annotations(), 0);
1359        assert_eq!(reader.read_i64()?, 1);
1360        next_type(reader, IonType::Int, false);
1361        annotations_eq(reader, [77]);
1362        assert_eq!(reader.read_i64()?, 2);
1363        next_type(reader, IonType::Int, false);
1364        assert_eq!(reader.number_of_annotations(), 0);
1365        assert_eq!(reader.read_i64()?, 3);
1366        assert_eq!(reader.next()?, Nothing);
1367        reader.step_out()?;
1368
1369        // Reading an s-expression: haumea::makemake::eris::ceres::(++ -- &&&&&)
1370        next_type(reader, IonType::SExp, false);
1371        annotations_eq(reader, ["haumea", "makemake", "eris", "ceres"]);
1372        reader.step_in()?;
1373        next_type(reader, IonType::Symbol, false);
1374        assert_eq!(reader.read_symbol()?, text_token("++"));
1375        next_type(reader, IonType::Symbol, false);
1376        assert_eq!(reader.read_symbol()?, text_token("--"));
1377        next_type(reader, IonType::Symbol, false);
1378        assert_eq!(reader.read_symbol()?, text_token("&&&&&"));
1379        assert_eq!(reader.next()?, Nothing);
1380        reader.step_out()?;
1381
1382        // There are no more top level values.
1383        assert_eq!(reader.next()?, Nothing);
1384
1385        // Asking for more still results in `None`
1386        assert_eq!(reader.next()?, Nothing);
1387
1388        Ok(())
1389    }
1390
1391    #[test]
1392    fn structs_trailing_comma() -> IonResult<()> {
1393        let pretty_ion = br#"
1394            // Structs with last field with/without trailing comma
1395            (
1396                {a:1, b:2,}     // with trailing comma
1397                {a:1, b:2 }     // without trailing comma
1398            )
1399        "#;
1400        let mut reader = RawTextReader::new(&pretty_ion[..]);
1401        assert_eq!(reader.next()?, RawStreamItem::Value(IonType::SExp));
1402        reader.step_in()?;
1403        assert_eq!(reader.next()?, RawStreamItem::Value(IonType::Struct));
1404
1405        reader.step_in()?;
1406        assert_eq!(reader.next()?, RawStreamItem::Value(IonType::Int));
1407        assert_eq!(reader.field_name()?, RawSymbolToken::Text("a".to_string()));
1408        assert_eq!(reader.read_i64()?, 1);
1409        assert_eq!(reader.next()?, RawStreamItem::Value(IonType::Int));
1410        assert_eq!(reader.field_name()?, RawSymbolToken::Text("b".to_string()));
1411        assert_eq!(reader.read_i64()?, 2);
1412        reader.step_out()?;
1413
1414        assert_eq!(reader.next()?, RawStreamItem::Value(IonType::Struct));
1415        reader.step_out()?;
1416        Ok(())
1417    }
1418
1419    #[test]
1420    fn annotation_false() -> IonResult<()> {
1421        // The reader will reject the unquoted boolean keyword 'false' when used as an annotation
1422        let pretty_ion = br#"
1423            false::23
1424        "#;
1425        let mut reader = RawTextReader::new(&pretty_ion[..]);
1426        let result = reader.next();
1427        println!("{result:?}");
1428        assert!(result.is_err());
1429        Ok(())
1430    }
1431
1432    #[test]
1433    fn annotation_nan() -> IonResult<()> {
1434        // The reader will reject the unquoted float keyword 'nan' when used as an annotation
1435        let pretty_ion = br#"
1436            nan::23
1437        "#;
1438        let mut reader = RawTextReader::new(&pretty_ion[..]);
1439        let result = reader.next();
1440        println!("{result:?}");
1441        assert!(result.is_err());
1442        Ok(())
1443    }
1444
1445    #[test]
1446    // Ensure that field names and values are bundled transactionally so that we do not move past
1447    // the field name, in the event of a failed field value parse. This is limited to scalar values
1448    // and reading the start of a container ends the transaction.
1449    fn rollback_field_name() -> IonResult<()> {
1450        // We'll initialize the buffer with the first 9 characters, which will cause an incomplete
1451        // text error due to the ambiguous parse.
1452        //             | Init.  |  |
1453        let source = r#"{field: 10}"#;
1454
1455        let mut reader = RawTextReader::new(source[..10].as_bytes().to_owned());
1456        let result = reader.next()?;
1457        assert_eq!(result, RawStreamItem::Value(IonType::Struct));
1458
1459        reader.step_in()?;
1460        assert_eq!(reader.depth(), 1);
1461
1462        match reader.next() {
1463            Err(IonError::Incomplete { .. }) => {
1464                assert!(reader.field_name().is_err());
1465                reader.read_from(&mut source[10..].as_bytes(), 3)?;
1466                reader.stream_complete();
1467                reader.next()?;
1468            }
1469            other => panic!("unexpected return from next: {other:?}"),
1470        }
1471
1472        assert_eq!(reader.ion_type().unwrap(), IonType::Int);
1473        assert_eq!(reader.field_name()?.text(), Some("field"));
1474
1475        Ok(())
1476    }
1477
1478    #[test]
1479    // Test the happy path of reaching an IncompleteText error while stepping out of a container.
1480    // The happy part, is that we're just going to re-call `step_out` after feeding more data into
1481    // the buffer.
1482    fn resume_step_out() -> IonResult<()> {
1483        // The spacing on this is important, since we need to know where (approximately) the
1484        // incomplete error occurs. We provide the first 47 characters, which is everything in the
1485        // string up to, and including, the 'w' in 'somewhere'.
1486        //
1487        //              |    Initial Data Provided to the reader       | Added after Incomplete |
1488        //              [..............................................][.......................]
1489        let source = r#"{field:{another_field:{foo:"We should fail somewhere in this string.."}}}"#;
1490
1491        let mut reader = RawTextReader::new(source[..47].as_bytes().to_owned());
1492        let result = reader.next()?;
1493        assert_eq!(result, RawStreamItem::Value(IonType::Struct));
1494
1495        reader.step_in()?;
1496        assert_eq!(reader.depth(), 1);
1497
1498        reader.next()?;
1499
1500        reader.step_in()?;
1501        assert_eq!(reader.depth(), 2);
1502
1503        match reader.step_out() {
1504            Err(IonError::Incomplete { .. }) => {
1505                assert_eq!(reader.depth(), 3); // we should fail at depth 3, within the foo field.
1506                reader.read_from(&mut source[47..].as_bytes(), 512)?;
1507
1508                // Resume our initial step_out, this should bring us back to depth 1.
1509                reader.step_out()?;
1510                assert_eq!(reader.depth(), 1);
1511            }
1512            other => panic!("Expected to get an incomplete error: {other:?}"),
1513        }
1514        // Step out to the root of the document.
1515        reader.step_out()?;
1516        assert_eq!(0, reader.depth()); // we should be at the root level.
1517        Ok(())
1518    }
1519
1520    // Test stepping out of an inner struct which has trailing fields and ensure all fields are
1521    // exhausted.
1522    #[test]
1523    fn resume_step_out_exhaustion() -> IonResult<()> {
1524        // The spacing on this is important, since we need to know where (approximately) the
1525        // incomplete error occurs. We provide the first 47 characters, which is everything in the
1526        // string up to, and including, the 'w' in 'somewhere'.
1527        //
1528        //              |    Initial Data Provided to the reader       | Added after Incomplete |
1529        //              [..............................................][.......................]
1530        let source = r#"{field:{another_field:{foo:"We should fail somewhere in this string..", number: 3}}, other_field: 21}"#;
1531
1532        let mut reader = RawTextReader::new(source[..47].as_bytes().to_owned());
1533
1534        let result = reader.next()?;
1535        assert_eq!(result, RawStreamItem::Value(IonType::Struct));
1536
1537        // Stepping into the top level struct..
1538        reader.step_in()?;
1539        assert_eq!(reader.depth(), 1);
1540
1541        reader.next()?;
1542        assert_eq!(reader.field_name()?.text(), Some("field"));
1543
1544        // Stepping into the `field` struct.
1545        reader.step_in()?;
1546        assert_eq!(reader.depth(), 2);
1547
1548        reader.next()?;
1549        assert_eq!(reader.field_name()?.text(), Some("another_field"));
1550
1551        // Step out of `field`.. after this we should be at `other_Field`.
1552        match reader.step_out() {
1553            Err(IonError::Incomplete { .. }) => {
1554                assert_eq!(reader.depth(), 3); // we should fail at depth 3, within the foo field.
1555                reader.read_from(&mut source[47..].as_bytes(), 512)?;
1556                reader.stream_complete();
1557
1558                // Resume our initial step_out, this should bring us back to depth 1.
1559                reader.step_out()?;
1560                assert_eq!(reader.depth(), 1);
1561            }
1562            other => panic!("Expected to get an incomplete error: {other:?}"),
1563        }
1564        // We've stepped out of the inner structs, and we should be at the last field in the outter
1565        // most container.
1566        let result = reader.next()?;
1567        assert_eq!(result, RawStreamItem::Value(IonType::Int));
1568        assert_eq!(reader.field_name()?.text(), Some("other_field"));
1569        assert_eq!(reader.read_i64()?, 21);
1570
1571        // Step out to the root of the document.
1572        reader.step_out()?;
1573        assert_eq!(0, reader.depth()); // we should be at the root level.
1574        Ok(())
1575    }
1576
1577    #[test]
1578    fn resume_step_out_exhaustion2() -> IonResult<()> {
1579        // For this source, we're going to initialize the reader with a partial source, from the
1580        // first byte to the start of the line with "quux". The last character of the buffer will
1581        // be the end-of-line, from the "baz" definition (offset 62).
1582        let source = r#"{
1583            foo: 1,
1584            bar: 2,
1585            baz: 3,
1586            quux: 4,
1587        }"#;
1588
1589        // We first read up to the start of the line "quux" is on.
1590        let mut reader = RawTextReader::new(source[..62].as_bytes().to_owned());
1591
1592        // Advance the reader, so that we can step_in to the struct.
1593        let result = reader.next()?;
1594        assert_eq!(result, RawStreamItem::Value(IonType::Struct));
1595        reader.step_in()?; // Step into the top level struct.
1596
1597        // Read 'foo'..
1598        let result = reader.next()?;
1599        assert_eq!(result, RawStreamItem::Value(IonType::Int));
1600        assert_eq!(reader.field_name()?.text(), Some("foo"));
1601        assert_eq!(reader.read_i64()?, 1);
1602
1603        // Read "bar"
1604        let result = reader.next()?;
1605        assert_eq!(result, RawStreamItem::Value(IonType::Int));
1606        assert_eq!(reader.field_name()?.text(), Some("bar"));
1607        assert_eq!(reader.read_i64()?, 2);
1608
1609        // We have provided up to the "quux" definition in our buffer, and have read up to and
1610        // including the "bar" definition. Now we step_out, which should cause an incomplete error.
1611        match reader.step_out() {
1612            Err(IonError::Incomplete { .. }) => {
1613                // After receiving the incomplete error, the reader should not let us doing
1614                // anything other than completing the step_out. If we call next here, we should
1615                // still get an IncompleteText error.
1616                assert!(matches!(reader.next(), Err(IonError::Incomplete { .. })));
1617
1618                // After the incomplete error, we'll provide the rest of the buffer which should
1619                // let us complete our step_out.
1620                reader.read_from(&mut source[62..].as_bytes(), 512)?;
1621                // Since we've provided the entirety of the source, we'll mark the stream complete.
1622                reader.stream_complete();
1623                // Stepping out should succeed, and leave us back at the top level.
1624                reader.step_out()?;
1625                assert_eq!(reader.depth(), 0);
1626            }
1627            other => panic!("Expected to get an incomplete error: {other:?}"),
1628        }
1629        // We have stepped out, and should now be at the end of the stream.
1630        let result = reader.next()?;
1631        assert_eq!(result, RawStreamItem::Nothing);
1632
1633        Ok(())
1634    }
1635
1636    #[test]
1637    fn resume_failed_step_out_with_next() -> IonResult<()> {
1638        // This test is the same as resume_Step_out_exhaustion2, but we're going to make sure that
1639        // calling other functions (like next) after a failed step_out, continues the step_out and
1640        // avances like we'd expect.
1641        let source = r#"{
1642            foo: 1,
1643            bar: 2,
1644            baz: 3,
1645            quux: 4,
1646        }"#;
1647
1648        // We first read up to the start of the line "quux" is on.
1649        let mut reader = RawTextReader::new(source[..62].as_bytes().to_owned());
1650
1651        // Advance the reader, so that we can step_in to the struct.
1652        let result = reader.next()?;
1653        assert_eq!(result, RawStreamItem::Value(IonType::Struct));
1654        reader.step_in()?; // Step into the top level struct.
1655
1656        // Read 'foo'..
1657        let result = reader.next()?;
1658        assert_eq!(result, RawStreamItem::Value(IonType::Int));
1659        assert_eq!(reader.field_name()?.text(), Some("foo"));
1660        assert_eq!(reader.read_i64()?, 1);
1661
1662        // Read "bar"
1663        let result = reader.next()?;
1664        assert_eq!(result, RawStreamItem::Value(IonType::Int));
1665        assert_eq!(reader.field_name()?.text(), Some("bar"));
1666        assert_eq!(reader.read_i64()?, 2);
1667
1668        // We have provided up to the "quux" definition in our buffer, and have read up to and
1669        // including the "bar" definition. Now we step_out, which should cause an incomplete error.
1670        match reader.step_out() {
1671            Err(IonError::Incomplete { .. }) => {
1672                // After receiving the incomplete error, the reader should not let us do anything
1673                // that doesn't result in completing the failed step_out. If we call next now, we
1674                // should get another IncompleteText error.
1675                assert!(matches!(reader.next(), Err(IonError::Incomplete { .. })));
1676
1677                // After the incomplete error, we'll provide the rest of the buffer which should
1678                // let us complete our step_out.
1679                reader.read_from(&mut source[62..].as_bytes(), 512)?;
1680                // Since we've provided the entirety of the source, we'll mark the stream complete.
1681                reader.stream_complete();
1682                // Calling next will continue the step_out, and follow up by moving to the next
1683                // value.
1684                reader.next()?;
1685                assert_eq!(reader.depth(), 0);
1686            }
1687            other => panic!("Expected to get an incomplete error: {other:?}"),
1688        }
1689        // At this point, we should have successfully stepped out, and advanced with the `next`,
1690        // reaching the end of the document.
1691        //let result = reader.next()?;
1692        assert_eq!(reader.current(), RawStreamItem::Nothing);
1693
1694        Ok(())
1695    }
1696
1697    #[test]
1698    fn resume_step_out_from_failed_next() -> IonResult<()> {
1699        // This test is the same as resume_step_out_exhaustion2, but is a bit more simple. Rather
1700        // than stepping out, we're going to get to the struct, and then call `next`. This should
1701        // bring us to the end of the document, but we'll fail part way through and the user will
1702        // expect to have to call `next` again.
1703        let source = r#"{
1704            foo: 1,
1705            bar: 2,
1706            baz: 3,
1707            quux: 4,
1708        }"#;
1709
1710        // We first read up to the start of the line "quux" is on.
1711        let mut reader = RawTextReader::new(source[..62].as_bytes().to_owned());
1712
1713        // Advance the reader, so that we can step_in to the struct.
1714        let mut result = reader.next()?;
1715        assert_eq!(result, RawStreamItem::Value(IonType::Struct));
1716
1717        match reader.next() {
1718            Err(IonError::Incomplete { .. }) => {
1719                reader.read_from(&mut source[62..].as_bytes(), 512)?;
1720                reader.stream_complete();
1721                result = reader.next()?;
1722            }
1723            other => panic!("unexpected result from next: {other:?}"),
1724        }
1725
1726        // At this point, we should have successfully stepped out, and advanced with the `next`,
1727        // reaching the end of the document.
1728        //let result = reader.next()?;
1729        assert_eq!(result, RawStreamItem::Nothing);
1730
1731        Ok(())
1732    }
1733
1734    #[test]
1735    fn failed_step_out_no_values() -> IonResult<()> {
1736        // This test is just to ensure that once we fail, and need to continue a step_out, the
1737        // reader doesn't provide any functionality to read values, whether they are previous
1738        // values, or parsing new.
1739        let source = r#"{
1740            foo: 1,
1741            bar: 2,
1742            baz: 3,
1743            quux: 4,
1744        }"#;
1745
1746        // We first read up to the start of the line "quux" is on.
1747        let mut reader = RawTextReader::new(source[..62].as_bytes().to_owned());
1748
1749        // Advance the reader, so that we can step_in to the struct.
1750        let result = reader.next()?;
1751        assert_eq!(result, RawStreamItem::Value(IonType::Struct));
1752        reader.step_in()?; // Step in, so we can read a value first..
1753
1754        let mut result = reader.next()?;
1755        assert_eq!(result, RawStreamItem::Value(IonType::Int));
1756        assert_eq!(reader.field_name()?.text(), Some("foo"));
1757
1758        match reader.step_out() {
1759            Err(IonError::Incomplete { .. }) => {
1760                // We received the incomplete, and now we want to make sure that no previously
1761                // parsed data is available, and that the reader won't try to start parsing more.
1762                match reader.read_i64() {
1763                    Err(IonError::IllegalOperation { .. }) => (),
1764                    other => panic!("unexpected result from read_i64: {other:?}"),
1765                }
1766
1767                reader.read_from(&mut source[62..].as_bytes(), 512)?;
1768                reader.stream_complete();
1769                result = reader.next()?;
1770            }
1771            other => panic!("unexpected result from next: {other:?}"),
1772        }
1773        assert_eq!(result, RawStreamItem::Nothing);
1774
1775        Ok(())
1776    }
1777
1778    #[test]
1779    fn generate_incomplete_on_truncated_escape() -> IonResult<()> {
1780        let source = "\"123456\\u269b\"";
1781        //                       ^-- First read stops here. (offset 9)
1782        let mut reader = RawTextReader::new(source.as_bytes()[..10].to_owned());
1783
1784        match reader.next() {
1785            Err(IonError::Incomplete {
1786                position:
1787                    Position {
1788                        line_column: Some((line, column)),
1789                        ..
1790                    },
1791                ..
1792            }) => {
1793                assert_eq!(line, 0); // Line is still 0 since we haven't actually seen a '\n' yet.
1794                assert_eq!(column, 0); // start of the string; the value being parsed.
1795            }
1796            Err(e) => panic!("unexpected error after partial escaped sequence: {e}"),
1797            Ok(item) => {
1798                panic!("unexpected successful parsing of partial escaped sequence data: {item:?}")
1799            }
1800        }
1801        reader.append_bytes(source[10..].as_bytes())?;
1802        next_type(&mut reader, IonType::String, false);
1803        Ok(())
1804    }
1805}