ion-rs 1.0.0

Implementation of Amazon Ion
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
#![allow(non_camel_case_types)]

use crate::element::reader::ElementReader;
use crate::element::Element;
use crate::lazy::decoder::Decoder;
use crate::lazy::streaming_raw_reader::IonInput;
use crate::lazy::system_reader::SystemReader;
use crate::lazy::value::LazyValue;
use crate::read_config::ReadConfig;
use crate::result::IonFailure;
use crate::{IonError, IonResult, MacroTable, SymbolTable};

/// An Ion reader that only reads each value that it visits upon request (that is: lazily).
///
/// Each time [`Reader::next`] is called, the reader will advance to the next top-level value
/// in the input stream. Once positioned on a top-level value, users may visit nested values by
/// calling [`LazyValue::read`] and working with the resulting [`crate::lazy::value_ref::ValueRef`],
/// which may contain either a scalar value or a lazy container that may itself be traversed.
///
/// The values that the reader yields ([`LazyValue`],
/// [`LazyList`](crate::lazy::sequence::LazyList), [`LazySExp`](crate::lazy::sequence::LazySExp),
/// and [`LazyStruct`](crate::lazy::struct::LazyStruct)) are immutable references to the data
/// stream, and remain valid until [`Reader::next`] is called again to advance the
/// reader to the next top level value. This means that these references can be stored, read, and
/// re-read as long as the reader remains on the same top-level value.
/// ```
///# use ion_rs::IonResult;
///# #[cfg(feature = "experimental-reader-writer")]
///# fn main() -> IonResult<()> {
///
/// // Construct an Element and serialize it as binary Ion.
/// use ion_rs::{Element, ion_list, Reader};
/// use ion_rs::v1_0::Binary;
///
/// let element: Element = ion_list! [10, 20, 30].into();
/// let binary_ion = element.encode_as(Binary)?;
///
/// let mut lazy_reader = Reader::new(Binary, binary_ion)?;
///
/// // Get the first value from the stream and confirm that it's a list.
/// let lazy_list = lazy_reader.expect_next()?.read()?.expect_list()?;
///
/// // Visit the values in the list
/// let mut sum = 0;
/// for lazy_value in &lazy_list {
///     // Read each lazy value in the lazy list as an int (i64) and
///     // add it to the running total
///     sum += lazy_value?.read()?.expect_i64()?;
/// }
///
/// assert_eq!(sum, 60);
///
/// // Note that we can re-read any of the lazy values. Here we'll step into the list again and
/// // read the first child value.
/// let first_int = lazy_list.iter().next().unwrap()?.read()?.expect_i64()?;
/// assert_eq!(first_int, 10);
///
///# Ok(())
///# }
///# #[cfg(not(feature = "experimental-reader-writer"))]
///# fn main() -> IonResult<()> { Ok(()) }
/// ```
pub struct Reader<Encoding: Decoder, Input: IonInput> {
    system_reader: SystemReader<Encoding, Input>,
}

impl<Encoding: Decoder, Input: IonInput> Reader<Encoding, Input> {
    /// Returns the next top-level value in the input stream as `Ok(Some(lazy_value))`.
    /// If there are no more top-level values in the stream, returns `Ok(None)`.
    /// If the next value is incomplete (that is: only part of it is in the input buffer) or if the
    /// input buffer contains invalid data, returns `Err(ion_error)`.
    ///
    /// <div class="warning">A warning when reading from growing input streams</div>
    ///
    /// If reader's [`IonInput`] indicates that the stream is complete, the reader will
    /// also consider any remaining available data to be complete. In select circumstances--namely,
    /// when reading top-level text Ion scalars or keywords from an input stream that continues
    /// to grow over time--this can lead to unexpected results.
    ///
    /// For example: consider the case of following a growing file (as `tail -f` would do).
    /// When the reader encounters the (temporary!) end of the file and a [`std::io::Read::read`]
    /// operation returns `Ok(0)`, the reader would consider its input buffer's contents to be final.
    /// This has the potential to result in **incorrect data** when the data that was available
    /// happened to be legal Ion data. Here are some examples:
    ///
    /// | On `Ok(0)`, `next()` returns... | A later call to `next()` returns... |
    /// | ------------------------------- | ------------------------------------|
    /// | `false`                         | `_teeth`                            |
    /// | `123`                           | `456`                               |
    /// | `null`                          | `.struct`                           |
    /// | `$ion`                          | `_1_0`                              |
    /// | `2024-03-14T`                   | `12:00:30.000Z`                     |
    /// | `// Discarded start of comment` | ` with words treated as symbols`    |
    ///
    /// This is not an issue in binary Ion as incomplete items can always be detected. When following
    /// a text Ion data source, it is recommended that you only trust values returned after an
    /// `Ok(container_value)`, as incomplete containers can be detected reliably. This should only
    /// be attempted when you have control over the format of the data being read.
    #[allow(clippy::should_implement_trait)]
    // ^-- Clippy objects that the method name `next` will be confused for `Iterator::next()`
    pub fn next(&mut self) -> IonResult<Option<LazyValue<'_, Encoding>>> {
        self.system_reader.next_value()
    }

    /// Like [`Self::next`], but returns an `IonError` if there are no more values in the stream.
    pub fn expect_next(&mut self) -> IonResult<LazyValue<'_, Encoding>> {
        self.next()?
            .ok_or_else(|| IonError::decoding_error("expected another top-level value"))
    }

    #[allow(dead_code)]
    pub fn symbol_table(&self) -> &SymbolTable {
        self.system_reader.symbol_table()
    }

    #[allow(dead_code)]
    pub fn macro_table(&self) -> &MacroTable {
        self.system_reader.macro_table()
    }
}

impl<Encoding: Decoder, Input: IonInput> Reader<Encoding, Input> {
    pub fn new(
        config: impl Into<ReadConfig<Encoding>>,
        ion_data: Input,
    ) -> IonResult<Reader<Encoding, Input>> {
        let system_reader = SystemReader::new(config, ion_data);
        Ok(Reader { system_reader })
    }
}

use crate::lazy::expanded::lazy_element::LazyElement;
use crate::lazy::{expanded::template::TemplateMacro, text::raw::v1_1::reader::MacroAddress};

// TODO: The Reader is now able to understand encoding directives, so it would be good to
//       conditionally compile these using `#[cfg(test)]`. However, these methods are still used by
//       some of the benchmarks, which are not `cfg`-detectable. The benchmarks to be updated to
//       include encoding directives in each data stream.
#[allow(dead_code)]
impl<Encoding: Decoder, Input: IonInput> Reader<Encoding, Input> {
    // TODO: Remove this when the reader can understand 1.1 encoding directives.
    pub fn register_template_src(&mut self, template_definition: &str) -> IonResult<MacroAddress> {
        self.system_reader
            .expanding_reader
            .register_template_src(template_definition)
    }

    pub fn register_template(&mut self, template_macro: TemplateMacro) -> IonResult<MacroAddress> {
        self.system_reader
            .expanding_reader
            .register_template(template_macro)
    }
}

impl<Encoding: Decoder, Input: IonInput> Iterator for Reader<Encoding, Input> {
    type Item = IonResult<LazyElement<Encoding>>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.next() {
            Ok(None) => None,
            Ok(Some(lazy_value)) => Some(Ok(lazy_value.to_owned())),
            Err(e) => Some(Err(e)),
        }
    }
}

#[allow(dead_code)] // TODO: Evaluate
pub struct LazyElementIterator<'iter, Encoding: Decoder, Input: IonInput> {
    lazy_reader: &'iter mut Reader<Encoding, Input>,
}

impl<Encoding: Decoder, Input: IonInput> Iterator for LazyElementIterator<'_, Encoding, Input> {
    type Item = IonResult<LazyElement<Encoding>>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.lazy_reader.next() {
            Ok(None) => None,
            Ok(Some(lazy_value)) => Some(Ok(lazy_value.to_owned())),
            Err(e) => Some(Err(e)),
        }
    }
}

pub struct ElementIterator<'iter, Encoding: Decoder, Input: IonInput> {
    lazy_reader: &'iter mut Reader<Encoding, Input>,
}

impl<Encoding: Decoder, Input: IonInput> Iterator for ElementIterator<'_, Encoding, Input> {
    type Item = IonResult<Element>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.lazy_reader.next() {
            Ok(None) => None,
            Ok(Some(lazy_value)) => Some(lazy_value.try_into()),
            Err(e) => Some(Err(e)),
        }
    }
}

impl<Encoding: Decoder, Input: IonInput> ElementReader for Reader<Encoding, Input> {
    type ElementIterator<'a>
        = ElementIterator<'a, Encoding, Input>
    where
        Self: 'a;

    fn read_next_element(&mut self) -> IonResult<Option<Element>> {
        let lazy_value = match self.next()? {
            None => return Ok(None),
            Some(lazy_value) => lazy_value,
        };
        let element: Element = lazy_value.try_into()?;
        Ok(Some(element))
    }

    fn elements(&mut self) -> Self::ElementIterator<'_> {
        ElementIterator { lazy_reader: self }
    }
}

#[cfg(test)]
mod tests {
    use crate::element::element_writer::ElementWriter;
    use crate::element::Element;
    use crate::lazy::encoder::writer::Writer;
    use crate::lazy::encoding::BinaryEncoding_1_0;
    use crate::lazy::value_ref::ValueRef;
    use crate::write_config::WriteConfig;
    use crate::{ion_list, ion_sexp, ion_struct, v1_0, Int, IonResult, IonType};

    use super::*;

    fn to_binary_ion(text_ion: &str) -> IonResult<Vec<u8>> {
        let buffer = Vec::new();
        let config = WriteConfig::<BinaryEncoding_1_0>::new();
        let mut writer = Writer::new(config, buffer)?;
        let elements = Element::read_all(text_ion)?;
        writer.write_elements(&elements)?;
        writer.flush()?;
        writer.close()
    }

    #[test]
    fn sequence_iter() -> IonResult<()> {
        let ion_data = to_binary_ion(
            r#"
                (foo baz baz)
                (1 2 3)
                (a b c)
        "#,
        )?;
        let mut reader = Reader::new(v1_0::Binary, ion_data)?;
        // For each top-level value...
        while let Some(top_level_value) = reader.next()? {
            // ...see if it's an S-expression...
            if let ValueRef::SExp(sexp) = top_level_value.read()? {
                //...and if it is, print its child values.
                for lazy_value in &sexp {
                    println!("{:?}", lazy_value?.read()?)
                }
            }
        }
        Ok(())
    }

    #[test]
    fn test_rewind() -> IonResult<()> {
        let data = to_binary_ion(
            r#"
            [
                "yo",
                77,
                true,
                {name:"hi", name: "hello"},
            ]
        "#,
        )?;
        let mut reader = Reader::new(v1_0::Binary, data)?;

        let first_value = reader.expect_next()?;
        let list = first_value.read()?.expect_list()?;
        let lazy_values = list.iter().collect::<IonResult<Vec<_>>>()?;

        assert_eq!(lazy_values[1].read()?.expect_int()?, Int::from(77));
        assert!(lazy_values[2].read()?.expect_bool()?);
        Ok(())
    }

    #[test]
    fn materialize() -> IonResult<()> {
        let data = to_binary_ion(
            r#"
            [
                "yo",
                77,
                true,
                {name:"hi", name: "hello"},
            ]
            null.int
            (null null.string)
        "#,
        )?;
        let mut reader = Reader::new(v1_0::Binary, data)?;
        let list: Element = ion_list![
            "yo",
            77,
            true,
            ion_struct! {
                "name": "hi",
                "name": "hello"
            }
        ]
        .into();
        assert_eq!(reader.read_next_element()?, Some(list));
        assert_eq!(
            reader.read_next_element()?,
            Some(Element::null(IonType::Int))
        );
        let sexp: Element = ion_sexp!(IonType::Null IonType::String).into();
        assert_eq!(reader.read_next_element()?, Some(sexp));
        assert_eq!(reader.read_next_element()?, None);
        Ok(())
    }

    /// Regression test for https://github.com/amazon-ion/ion-rust/issues/1020
    /// Verifies that the symbol table is reset at each IVM when reading concatenated
    /// Ion 1.0 binary documents.
    #[test]
    fn ivm_resets_symbol_table_in_concatenated_documents() -> IonResult<()> {
        use crate::v1_0::Binary;

        let mut output = Vec::new();
        output = Element::read_one("annotation1::{id1: \"one\"}")?.encode_to(output, Binary)?;
        output = Element::read_one("annotation2::{id2: \"two\"}")?.encode_to(output, Binary)?;
        output = Element::read_one("annotation3::{id3: \"three\"}")?.encode_to(output, Binary)?;

        let elements: Vec<Element> = Element::read_all(&output)?.into_iter().collect();
        assert_eq!(elements.len(), 3);

        assert_eq!(elements[0].annotations().first(), Some("annotation1"));
        let s0 = elements[0].as_struct().unwrap();
        assert!(s0.get("id1").is_some());
        assert!(s0.get("id2").is_none());

        assert_eq!(elements[1].annotations().first(), Some("annotation2"));
        let s1 = elements[1].as_struct().unwrap();
        assert!(s1.get("id2").is_some());
        assert!(s1.get("id1").is_none());

        assert_eq!(elements[2].annotations().first(), Some("annotation3"));
        let s2 = elements[2].as_struct().unwrap();
        assert!(s2.get("id3").is_some());
        assert!(s2.get("id1").is_none());

        Ok(())
    }
}

#[cfg(all(test, feature = "experimental-ion-1-1"))]
mod tests_1_1 {
    use crate::lazy::text::raw::v1_1::reader::MacroAddress;
    use crate::{v1_1, IonResult, MacroTable, Reader};

    fn expand_macro_test(
        macro_source: &str,
        encode_macro_fn: impl FnOnce(MacroAddress) -> Vec<u8>,
        test_fn: impl FnOnce(Reader<v1_1::Binary, &[u8]>) -> IonResult<()>,
    ) -> IonResult<()> {
        // Because readers do not yet understand encoding directives, we'll pre-calculate the
        // macro ID that will be assigned.
        let macro_address = MacroTable::FIRST_USER_MACRO_ID;
        let opcode_byte = u8::try_from(macro_address).unwrap();
        // Using that ID, encode a binary stream containing an invocation of the new macro.
        // This function must add an IVM and the encoded e-expression ID, followed by any number
        // of arguments that matches the provided signature.
        let binary_ion = encode_macro_fn(opcode_byte as usize);
        // Construct a reader for the encoded data.
        let mut reader = Reader::new(v1_1::Binary, binary_ion.as_slice())?;
        // Register the template definition, getting the same ID we used earlier.
        let actual_address = reader.register_template_src(macro_source)?;
        assert_eq!(
            macro_address, actual_address,
            "Assigned macro address did not match expected address."
        );
        // Use the provided test function to confirm that the data expands to the expected stream.
        test_fn(reader)
    }

    #[test]
    fn expand_binary_template_macro() -> IonResult<()> {
        let macro_source = "(macro seventeen () 17)";
        let encode_macro_fn = |address| vec![address as u8];
        expand_macro_test(macro_source, encode_macro_fn, |mut reader| {
            assert_eq!(reader.expect_next()?.read()?.expect_i64()?, 17);
            Ok(())
        })
    }

    #[test]
    fn expand_binary_template_macro_with_one_arg() -> IonResult<()> {
        let macro_source = r#"
            (macro greet (name)
                (.make_string "Hello, " (%name) "!")
            )
        "#;
        #[rustfmt::skip]
        let encode_macro_fn = |address| vec![
            // === Macro ID ===
            address as u8,
            // === Arg 1 ===
            // 8-byte string
            0x98,
            // M     i     c     h     e     l     l     e
            0x4D, 0x69, 0x63, 0x68, 0x65, 0x6C, 0x6C, 0x65,
        ];
        expand_macro_test(macro_source, encode_macro_fn, |mut reader| {
            assert_eq!(
                reader.expect_next()?.read()?.expect_string()?,
                "Hello, Michelle!"
            );
            Ok(())
        })
    }

    #[test]
    fn expand_binary_template_macro_with_multiple_outputs() -> IonResult<()> {
        let macro_source = r#"
            (macro questions (food)
                (.values
                    (.make_string "What color is a " (%food) "?")
                    (.make_string "How much potassium is in a " (%food) "?")
                    (.make_string "What wine should I pair with a " (%food) "?")))
        "#;
        #[rustfmt::skip]
            let encode_macro_fn = |address| vec![
            // === Macro ID ===
            address as u8,
            // === Arg 1 ===
            // 6-byte string
            0x96,
            // b     a     n     a     n     a
            0x62, 0x61, 0x6E, 0x61, 0x6E, 0x61
        ];
        expand_macro_test(macro_source, encode_macro_fn, |mut reader| {
            assert_eq!(
                reader.expect_next()?.read()?.expect_string()?,
                "What color is a banana?"
            );
            assert_eq!(
                reader.expect_next()?.read()?.expect_string()?,
                "How much potassium is in a banana?"
            );
            assert_eq!(
                reader.expect_next()?.read()?.expect_string()?,
                "What wine should I pair with a banana?"
            );
            Ok(())
        })
    }
}