ion_rs/element/
reader.rs

1// Copyright Amazon.com, Inc. or its affiliates.
2
3//! Provides APIs to read Ion data into [Element] from different sources such
4//! as slices or files.
5
6use crate::element::{Annotations, Element, Sequence, Struct, Value};
7use crate::result::{decoding_error, IonResult};
8use crate::{IonReader, StreamItem, Symbol};
9
10/// Reads Ion data into [`Element`] instances.
11///
12/// This trait is automatically implemented by all Ion reader implementations that operate
13/// at the highest layer of abstraction, sometimes called the 'user' layer.
14pub trait ElementReader {
15    type ElementIterator<'a>: Iterator<Item = IonResult<Element>>
16    where
17        Self: 'a;
18
19    /// Recursively materializes the next Ion value, returning it as an `Ok(Element)`.
20    /// If there is no more data left to be read, returns `Ok(None)`.
21    /// If an error occurs while the data is being read, returns `Err(IonError)`.
22    fn read_next_element(&mut self) -> IonResult<Option<Element>>;
23
24    /// Returns an iterator over the [Element]s in the data stream.
25    fn elements(&mut self) -> Self::ElementIterator<'_>;
26
27    /// Like [Self::read_next_element], this method reads the next Ion value in the input stream,
28    /// returning it as an `Ok(Element)`. However, it also requires that the stream contain exactly
29    /// one value.
30    ///
31    /// If the stream's data is valid and it contains one value, returns `Ok(Element)`.
32    /// If the stream's data is invalid or the stream does not contain exactly one value,
33    /// returns `Err(IonError)`.
34    fn read_one_element(&mut self) -> IonResult<Element> {
35        let mut iter = self.elements();
36        let only_element = match iter.next() {
37            Some(Ok(element)) => element,
38            Some(Err(e)) => return Err(e),
39            None => return decoding_error("expected 1 value, found 0"),
40        };
41        // See if there is a second, unexpected value.
42        match iter.next() {
43            Some(Ok(element)) => {
44                return decoding_error(format!(
45                    "found more than one value; second value: {}",
46                    element
47                ))
48            }
49            Some(Err(e)) => return decoding_error(format!("error after expected value: {}", e)),
50            None => {}
51        };
52        Ok(only_element)
53    }
54
55    /// Reads all of the values in the input stream, materializing each into an [Element] and
56    /// returning the complete sequence as a `Vec<Element>`.
57    ///
58    /// If an error occurs while reading, returns `Err(IonError)`.
59    fn read_all_elements(&mut self) -> IonResult<Vec<Element>> {
60        self.elements().collect()
61    }
62}
63
64impl<R> ElementReader for R
65where
66    R: IonReader<Item = StreamItem, Symbol = Symbol> + ?Sized,
67{
68    type ElementIterator<'a> = ElementIterator<'a, R> where R: 'a;
69
70    fn read_next_element(&mut self) -> IonResult<Option<Element>> {
71        ElementLoader::for_reader(self).materialize_next()
72    }
73
74    fn elements(&mut self) -> ElementIterator<R> {
75        ElementIterator { reader: self }
76    }
77}
78
79/// Holds a reference to a given [ElementReader] implementation and yields one [Element] at a time
80/// until the stream is exhausted or invalid data is encountered.
81pub struct ElementIterator<'a, R: ElementReader + ?Sized> {
82    reader: &'a mut R,
83}
84
85impl<'a, R: ElementReader + ?Sized> Iterator for ElementIterator<'a, R> {
86    type Item = IonResult<Element>;
87
88    fn next(&mut self) -> Option<Self::Item> {
89        match self.reader.read_next_element() {
90            Ok(Some(element)) => Some(Ok(element)),
91            Ok(None) => None,
92            Err(error) => Some(Err(error)),
93        }
94    }
95}
96
97/// Helper type; wraps an [ElementReader] and recursively materializes the next value in the
98/// reader's input, reporting any errors that might occur along the way.
99struct ElementLoader<'a, R: ?Sized> {
100    reader: &'a mut R,
101}
102
103impl<'a, R: IonReader<Item = StreamItem, Symbol = Symbol> + ?Sized> ElementLoader<'a, R> {
104    pub(crate) fn for_reader(reader: &mut R) -> ElementLoader<R> {
105        ElementLoader { reader }
106    }
107
108    /// Advances the reader to the next value in the stream and uses [Self::materialize_current]
109    /// to materialize it.
110    pub(crate) fn materialize_next(&mut self) -> IonResult<Option<Element>> {
111        // Advance the reader to the next value
112        let _ = self.reader.next()?;
113        self.materialize_current()
114    }
115
116    /// Recursively materialize the reader's current Ion value and returns it as `Ok(Some(value))`.
117    /// If there are no more values at this level, returns `Ok(None)`.
118    /// If an error occurs while materializing the value, returns an `Err`.
119    /// Calling this method advances the reader and consumes the current value.
120    fn materialize_current(&mut self) -> IonResult<Option<Element>> {
121        // Collect this item's annotations into a Vec. We have to do this before materializing the
122        // value itself because materializing a collection requires advancing the reader further.
123        let mut annotations = Vec::new();
124        // Current API limitations require `self.reader.annotations()` to heap allocate its
125        // iterator even if there aren't annotations. `self.reader.has_annotations()` is trivial
126        // and allows us to skip the heap allocation in the common case.
127        if self.reader.has_annotations() {
128            for annotation in self.reader.annotations() {
129                annotations.push(annotation?);
130            }
131        }
132
133        let value = match self.reader.current() {
134            // No more values at this level of the stream
135            StreamItem::Nothing => return Ok(None),
136            // This is a typed null
137            StreamItem::Null(ion_type) => Value::Null(ion_type),
138            // This is a non-null value
139            StreamItem::Value(ion_type) => {
140                use crate::IonType::*;
141                match ion_type {
142                    Null => unreachable!("non-null value had IonType::Null"),
143                    Bool => Value::Bool(self.reader.read_bool()?),
144                    Int => Value::Int(self.reader.read_int()?),
145                    Float => Value::Float(self.reader.read_f64()?),
146                    Decimal => Value::Decimal(self.reader.read_decimal()?),
147                    Timestamp => Value::Timestamp(self.reader.read_timestamp()?),
148                    Symbol => Value::Symbol(self.reader.read_symbol()?),
149                    String => Value::String(self.reader.read_string()?),
150                    Clob => Value::Clob(self.reader.read_clob()?.into()),
151                    Blob => Value::Blob(self.reader.read_blob()?.into()),
152                    // It's a collection; recursively materialize all of this value's children
153                    List => Value::List(self.materialize_sequence()?),
154                    SExp => Value::SExp(self.materialize_sequence()?),
155                    Struct => Value::Struct(self.materialize_struct()?),
156                }
157            }
158        };
159        Ok(Some(Element::new(Annotations::new(annotations), value)))
160    }
161
162    /// Steps into the current sequence and materializes each of its children to construct
163    /// an [`Vec<Element>`]. When all of the the children have been materialized, steps out.
164    /// The reader MUST be positioned over a list or s-expression when this is called.
165    fn materialize_sequence(&mut self) -> IonResult<Sequence> {
166        let mut child_elements = Vec::new();
167        self.reader.step_in()?;
168        while let Some(element) = self.materialize_next()? {
169            child_elements.push(element);
170        }
171        self.reader.step_out()?;
172        Ok(child_elements.into())
173    }
174
175    /// Steps into the current struct and materializes each of its fields to construct
176    /// an [`Struct`]. When all of the the fields have been materialized, steps out.
177    /// The reader MUST be positioned over a struct when this is called.
178    fn materialize_struct(&mut self) -> IonResult<Struct> {
179        let mut child_elements = Vec::new();
180        self.reader.step_in()?;
181        while let StreamItem::Value(_) | StreamItem::Null(_) = self.reader.next()? {
182            let field_name = self.reader.field_name()?;
183            let value = self
184                .materialize_current()?
185                .expect("materialize_current() returned None for user data");
186            child_elements.push((field_name, value));
187        }
188        self.reader.step_out()?;
189        Ok(Struct::from_iter(child_elements.into_iter()))
190    }
191}
192
193#[cfg(test)]
194mod reader_tests {
195    use super::*;
196    use crate::element::builders::{ion_list, ion_sexp, ion_struct};
197    use crate::element::Value::*;
198    use crate::element::{Element, IntoAnnotatedElement};
199    use crate::ion_data::IonEq;
200    use crate::types::{Int, Timestamp as TS};
201    use crate::{IonType, Symbol};
202    use bigdecimal::BigDecimal;
203    use num_bigint::BigInt;
204    use rstest::*;
205    use std::str::FromStr;
206
207    #[rstest]
208    #[case::nulls(
209        br#"
210           null
211           null.bool
212           null.int
213           null.float
214           null.decimal
215           null.timestamp
216           null.symbol
217           null.string
218           null.clob
219           null.blob
220           null.list
221           null.sexp
222           null.struct
223        "#,
224        vec![
225            Null(IonType::Null),
226            Null(IonType::Bool),
227            Null(IonType::Int),
228            Null(IonType::Float),
229            Null(IonType::Decimal),
230            Null(IonType::Timestamp),
231            Null(IonType::Symbol),
232            Null(IonType::String),
233            Null(IonType::Clob),
234            Null(IonType::Blob),
235            Null(IonType::List),
236            Null(IonType::SExp),
237            Null(IonType::Struct),
238        ].into_iter().map(|v| v.into()).collect(),
239    )]
240    #[case::ints(
241        br#"
242            0
243            -65536 65535
244            -4294967296 4294967295
245            -9007199254740992 9007199254740991
246            -18446744073709551616 18446744073709551615
247            -79228162514264337593543950336 79228162514264337593543950335
248        "#,
249        vec![
250            0,
251            -65536, 65535,
252            -4294967296, 4294967295,
253            -9007199254740992, 9007199254740991,
254        ].into_iter().map(Int::I64).chain(
255            vec![
256                "-18446744073709551616", "18446744073709551615",
257                "-79228162514264337593543950336", "79228162514264337593543950335",
258            ].into_iter()
259            .map(|v| Int::BigInt(BigInt::parse_bytes(v.as_bytes(), 10).unwrap()))
260        ).map(|ai| Int(ai).into()).collect(),
261    )]
262    #[case::int64_threshold_as_big_int(
263        &[0xE0, 0x01, 0x00, 0xEA, 0x28, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
264        vec![
265            "18446744073709551615",
266        ].into_iter()
267        .map(|v| Int::BigInt(BigInt::parse_bytes(v.as_bytes(), 10).unwrap())).map(|ai| Int(ai).into()).collect(),
268    )]
269    #[case::int64_threshold_as_int64(
270        &[0xE0, 0x01, 0x00, 0xEA, 0x38, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00],
271        vec![
272            "-9223372036854775808",
273        ].into_iter()
274        .map(|v| Int::BigInt(BigInt::parse_bytes(v.as_bytes(), 10).unwrap())).map(|ai| Int(ai).into()).collect(),
275    )]
276    #[case::floats(
277        br#"
278           1e0 +inf -inf nan
279        "#,
280        vec![
281            1f64, f64::INFINITY, f64::NEG_INFINITY, f64::NAN
282        ].into_iter().map(|v| Float(v).into()).collect(),
283    )]
284    #[case::decimals(
285        br#"
286            1d0 100d10 -2.1234567d-100
287        "#,
288        vec![
289            "1e0", "100e10", "-2.1234567e-100",
290        ].into_iter().map(|s| Decimal(BigDecimal::from_str(s).unwrap().into()).into()).collect(),
291    )]
292    #[case::timestamps(
293        br#"
294            2020T
295            2020-02-27T
296            2020-02-27T14:16:33-00:00
297            2020-02-27T14:16:33.123Z
298        "#,
299        vec![
300            TS::with_year(2020).build(),
301            TS::with_ymd(2020, 2, 27).build(),
302            TS::with_ymd(2020, 2, 27)
303                .with_hms(14, 16, 33)
304                .build_at_unknown_offset(),
305            TS::with_ymd(2020, 2, 27)
306                .with_hms(14, 16, 33)
307                .with_milliseconds(123)
308                .build_at_offset(0),
309        ].into_iter().map(|ts_res| Timestamp(ts_res.unwrap()).into()).collect(),
310    )]
311    #[case::text_symbols(
312        br#"
313            foo
314            'bar'
315        "#,
316        vec![
317            "foo", "bar",
318        ].into_iter().map(|s| Symbol(s.into()).into()).collect(),
319    )]
320    #[case::strings(
321        br#"
322            '''hello'''
323            "world"
324        "#,
325        vec![
326            "hello", "world",
327        ].into_iter().map(|s| String(s.into()).into()).collect(),
328    )]
329    #[case::clobs(
330        br#"
331            {{'''goodbye'''}}
332            {{"moon"}}
333        "#,
334        {
335            // XXX annotate a vector otherwise inference gets a bit confused
336            let lobs: Vec<&[u8]> = vec![
337                b"goodbye", b"moon",
338            ];
339            lobs
340        }.into_iter().map(|b| Clob(b.into()).into()).collect(),
341    )]
342    #[case::blobs(
343        br#"
344           {{bW9v}}
345        "#,
346        {
347            // XXX annotate a vector otherwise inference gets a bit confused
348            let lobs: Vec<&[u8]> = vec![
349                b"moo",
350            ];
351            lobs
352        }.into_iter().map(|b| Blob(b.into()).into()).collect(),
353    )]
354    #[case::lists(
355        br#"
356            ["a", "b"]
357        "#,
358        vec![
359            ion_list!["a", "b"].into()
360        ]
361    )]
362    #[case::sexps(
363        br#"
364            (e f g)
365        "#,
366        vec![
367            ion_sexp!(Symbol::owned("e") Symbol::owned("f") Symbol::owned("g")).into()
368        ]
369    )]
370    #[case::structs(
371        br#"
372            {
373                bool_field: a::true,
374                string_field: a::"moo!",
375                string_field: a::"oink!",
376            }
377        "#,
378        vec![
379            ion_struct! {
380                "string_field": "oink!".with_annotations(["a"]),
381                "string_field": "moo!".with_annotations(["a"]),
382                "bool_field": true.with_annotations(["a"])
383            }.into()
384        ]
385    )]
386    fn read_and_compare(#[case] input: &[u8], #[case] expected: Vec<Element>) -> IonResult<()> {
387        let actual = Element::read_all(input)?;
388        assert!(expected.ion_eq(&actual));
389        Ok(())
390    }
391}