use std::io;
use std::ops::Range;
use delegate::delegate;
use crate::constants::v1_0::{system_symbol_ids, SYSTEM_SYMBOLS};
use crate::raw_reader::{RawReader, RawStreamItem};
use crate::raw_symbol_token::RawSymbolToken;
use crate::result::{decoding_error, decoding_error_raw, illegal_operation, IonError, IonResult};
use crate::symbol::Symbol;
use crate::system_reader::LstPosition::*;
use crate::types::decimal::Decimal;
use crate::types::integer::Integer;
use crate::types::timestamp::Timestamp;
use crate::{IonReader, IonType, RawBinaryReader, SymbolTable};
/// Tracks where the [SystemReader] is in the process of reading a local symbol table.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum LstPosition {
/// The enum variants below indicate that the reader is...
/// Not positioned at or inside of an LST. Examples include when the reader is
/// positioned over:
/// * The start of the stream (i.e. nothing)
/// * An IVM
/// * A user value
NotReadingAnLst,
/// Positioned on an $ion_symbol_table but have not yet stepped in
AtLstStart,
/// Inside an LST but between fields at depth 1. This occurs when:
/// * the reader has stepped into the LST but has not yet read a field
/// * after the reader has stepped out of a field in the LST and is back at depth 1
BetweenLstFields,
/// Inside an $ion_symbol_table and positioned at the `imports` field but have not yet stepped in
AtLstImports,
/// Inside the `imports` field
ProcessingLstImports,
/// Inside an $ion_symbol_table and positioned at the `symbols` field but have not yet stepped in
AtLstSymbols,
/// Inside the `symbols` field
ProcessingLstSymbols,
/// Inside an $ion_symbol_table and positioned at a field whose behavior is not defined by the
/// spec.
AtLstOpenContent,
/// Inside an $ion_symbol_table but processing "open" content--data defined by the user but not
/// required or recognized by the spec.
ProcessingLstOpenContent,
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
/// Raw stream elements that a SystemReader may encounter.
pub enum SystemStreamItem {
/// An Ion Version Marker (IVM) indicating the Ion major and minor version that were used to
/// encode the values that follow.
VersionMarker(u8, u8),
/// A non-null Ion value that is part of an encoded local symbol table.
/// This includes:
/// * Top-level structs annotated with $ion_symbol_table::
/// * Any fields nested inside such structs, but especially `imports` and `symbols`
SymbolTableValue(IonType),
/// A null Ion value that is part of an encoded local symbol table.
SymbolTableNull(IonType),
/// A non-null Ion value and its corresponding Ion data type.
Value(IonType),
/// A null Ion value and its corresponding Ion data type.
Null(IonType),
/// Indicates that the reader is not positioned over anything. This can happen:
/// * before the reader has begun processing the stream.
/// * after the reader has stepped into a container, but before the reader has called next()
/// * after the reader has stepped out of a container, but before the reader has called next()
/// * after the reader has read the last item in a container
Nothing,
}
// Stores information that has been read from a local symbol table that is currently being
// processed.
// TODO: Support shared symbol table imports.
struct LstData {
// The reader's position within the LST.
state: LstPosition,
// Whether this LST's symbols should be appended to the current symbol table.
// If this is false, the current symbol table will be cleared before the new symbols are
// added to the table. `is_append` is only set to true if the LST's `imports` field has
// the symbol `$ion_symbol_table` as its value.
is_append: bool,
// All of the new symbols being defined in this LST. These pending symbols are buffered in a Vec
// because the `symbols` field of the LST can appear before the `imports` field but the `imports`
// field MUST be processed first.
symbols: Vec<Option<String>>,
// At present, RawTextReader and RawBinaryReader cannot read the same value more than once.
// When the SystemReader needs to read the current value as part of processing a local symbol
// table, it must store a copy of that value in case the user requests it via `read_string()`,
// `read_i64()`, etc. The fields below are used to store such copies.
current_symbol: RawSymbolToken,
current_string: String,
current_int: i64,
}
impl LstData {
fn new() -> LstData {
LstData {
is_append: false,
symbols: vec![],
state: LstPosition::NotReadingAnLst,
current_symbol: RawSymbolToken::SymbolId(0),
current_string: String::new(),
current_int: 0,
}
}
}
/// A streaming Ion reader that:
/// * maintains a symbol table
/// * processes local symbol tables as they are encountered
/// * can resolve symbol IDs into their associated text
/// * does NOT filter out encoding data like IVMs and symbol tables
///
/// SystemReader itself is format-agnostic; all format-specific logic is handled by the
/// wrapped [RawReader] implementation.
///
/// If the user skips over some or all of a local symbol table in the stream (via either `next()`
/// or `step_out()`), the SystemReader will automatically process the rest of the local symbol
/// table before advancing. This allows users to visit Ion values used to encode local symbol tables
/// without also being responsible for interpreting them (as they would with an implementation of
/// the [RawReader] trait.)
pub struct SystemReader<R: RawReader> {
raw_reader: R,
// The current symbol table
symbol_table: SymbolTable,
// Information about the local symbol table we're currently reading, if any
lst: LstData,
current_item: SystemStreamItem,
}
impl<R: RawReader> SystemReader<R> {
pub fn new(raw_reader: R) -> SystemReader<R> {
SystemReader {
raw_reader,
symbol_table: SymbolTable::new(),
lst: LstData::new(),
current_item: SystemStreamItem::Nothing,
}
}
// Returns true if the raw reader is positioned over a top-level struct whose first annotation
// is $ion_symbol_table.
fn current_value_is_symbol_table(&self) -> bool {
self.raw_reader.current() == RawStreamItem::Value(IonType::Struct)
&& self.depth() == 0
&& self
.raw_annotations()
.next()
.map(|a| {
a.matches(
system_symbol_ids::ION_SYMBOL_TABLE,
SYSTEM_SYMBOLS[system_symbol_ids::ION_SYMBOL_TABLE],
)
})
.unwrap_or(false)
}
// When next() is called, process any LST-related data in the reader's current position before
// advancing to the next value. This may involve stepping into and processing container values.
fn before_next(&mut self) -> IonResult<()> {
match self.lst.state {
AtLstStart => {
// If the reader is positioned over the beginning of an LST when next() is called,
// we need to process the entire LST instead of just skipping to the next value.
self.step_in()?;
self.finish_reading_current_lst()?;
}
BetweenLstFields => {
// If the reader is at depth=1 inside an LST but not positioned over a value,
// do nothing and allow the call to next() to advance the cursor.
}
AtLstImports | AtLstSymbols | AtLstOpenContent => {
// If the reader is positioned over an LST field at depth=1 when next() is called,
// we need to process the value of that field instead of just skipping
// to the next field.
// If the field value is a scalar, it will have already been processed the last time
// that the reader advanced. (See the 'after_next()` method.) If it's a
// list, however, we need to process it now. (Other container types in LST fields
// are ignored.)
if self
.raw_reader
.ion_type()
.map(|t| t == IonType::List)
.unwrap_or(false)
{
self.step_in()?;
self.finish_reading_current_level()?;
}
}
_ => {
// Allow values at depths > 1 to be skipped.
// TODO: Process import structs that live at depth=2
// imports: [{name:_, version:_, max_id:_}, {name:_, version:_, max_id:_}]
}
}
Ok(())
}
// After the raw reader is advanced by a call to `next()`, take stock of the reader's new
// position. Return a SystemStreamItem indicating whether the reader is now positioned over
// a user value or part of a local symbol table.
fn after_next(&mut self, ion_type: IonType, is_null: bool) -> IonResult<SystemStreamItem> {
// At this point, `self.lst.state` represents the reader's _previous_ position.
// We have now advanced to the next value at the same depth and need to update
// `self.lst.state` to reflect that.
match self.lst.state {
NotReadingAnLst | AtLstStart => {
// If we're at the top level and the next element we encounter is a struct whose
// first annotation is $ion_symbol_table, then set the state to `AtLstStart`
if self.current_value_is_symbol_table() {
self.lst.state = AtLstStart;
} else {
// Otherwise, it's just a plain user value.
// This is the only branch in this method that returns a user value.
self.lst.state = NotReadingAnLst;
return if is_null {
Ok(SystemStreamItem::Null(ion_type))
} else {
Ok(SystemStreamItem::Value(ion_type))
};
}
}
AtLstImports | AtLstSymbols | AtLstOpenContent | BetweenLstFields => {
// The reader is inside the LST at depth=1. Figure out which field we're on and
// update the state.
match self.raw_reader.field_name() {
Ok(field_name_token) => {
if field_name_token.matches(system_symbol_ids::IMPORTS, "imports") {
self.move_to_lst_imports_field(ion_type)?;
} else if field_name_token.matches(system_symbol_ids::SYMBOLS, "symbols") {
self.lst.state = AtLstSymbols;
} else {
// This field has no effect on our handling of the LST.
self.lst.state = AtLstOpenContent;
}
}
Err(IonError::IllegalOperation { .. }) => {
// TODO: Check this with self.raw_reader.current() == RawStreamItem::Nothing instead
// There's no field name; we're between fields.
self.lst.state = BetweenLstFields;
}
Err(error) => panic!(
"the RawReader returned an unexpected error from `field_name()`: {:?}",
error
),
}
}
ProcessingLstImports => {
todo!("Support shared symbol table imports.");
}
ProcessingLstSymbols => {
// We're in the `symbols` list.
if let (IonType::String, false) = (ion_type, is_null) {
// If the current value is a non-null string, add its text to the symbol table.
self.load_current_string()?;
// We clone the current string because the user may ask for its value via
// read_string().
self.lst.symbols.push(Some(self.lst.current_string.clone()))
} else {
// Non-string values and nulls are treated as symbols with unknown text.
self.lst.symbols.push(None);
}
}
ProcessingLstOpenContent => {
// We were in open content before and haven't stepped out yet. Do nothing.
}
};
if is_null {
Ok(SystemStreamItem::SymbolTableNull(ion_type))
} else {
Ok(SystemStreamItem::SymbolTableValue(ion_type))
}
}
// Called when the system reader advances to the `imports` field of an LST.
fn move_to_lst_imports_field(&mut self, ion_type: IonType) -> IonResult<()> {
self.lst.state = AtLstImports;
match ion_type {
IonType::Symbol => {
// If the `imports` field value is the symbol '$ion_symbol_table', then this is an
// LST append.
self.load_current_symbol()?;
if self
.lst
.current_symbol
.matches(system_symbol_ids::ION_SYMBOL_TABLE, "$ion_symbol_table")
{
self.lst.is_append = true;
}
}
IonType::List => {
// Once this is supported, this branch will do nothing because the list either
// be processed when the user steps into/through it or when they try to skip over
// it, not when it's first encountered. For now though, we fail because this
// feature is not yet supported.
todo!("Support shared symbol table imports.");
}
_ => {
// Non-list, non-symbol values for the `imports` field are ignored.
}
};
Ok(())
}
// Reads the raw reader's current value expecting a symbol. Stores the value in
// `self.lst.current_symbol` so it can be returned if the user requests it.
fn load_current_symbol(&mut self) -> IonResult<()> {
let token = self.raw_reader.read_symbol()?;
self.lst.current_symbol = token;
Ok(())
}
// Reads the raw reader's current value expecting a string. Stores the value in
// `self.lst.current_string` so it can be returned if the user requests it.
fn load_current_string(&mut self) -> IonResult<()> {
self.lst.current_string.clear();
let SystemReader {
ref mut raw_reader,
ref mut lst,
..
} = *self;
raw_reader.map_string(|s| lst.current_string.push_str(s))?;
Ok(())
}
// Reads the raw reader's current value expecting an integer. Stores the value in
// `self.lst.current_int` so it can be returned if the user requests it.
fn load_current_int(&mut self) -> IonResult<()> {
// Note: This method will only be called on integers found inside of local symbol tables.
// If an LST has an integer that's too big to fit in an i64, this will fail.
self.raw_reader
.read_i64()
.expect("load_current_int() called at a value that was not an integer.");
Ok(())
}
fn process_ivm(&mut self, major: u8, minor: u8) -> IonResult<SystemStreamItem> {
if self.depth() > 0 {
return decoding_error("Encountered an IVM at a depth > 0");
}
self.lst.state = NotReadingAnLst;
self.symbol_table.reset();
Ok(SystemStreamItem::VersionMarker(major, minor))
}
// When the reader steps out of an LST, this method will add the new symbols we've been
// buffering in `self.lst.symbols` to the current symbol table.
fn add_lst_symbols_to_current_symbol_table(&mut self) {
if !self.lst.is_append {
// This is not an append. Clear the current symbol table.
self.symbol_table.reset();
}
// TODO: support importing shared symbol tables
// This for loop consumes the `String` values, clearing `self.lst.symbols`.
for value in self.lst.symbols.drain(..) {
if let Some(text) = value {
// This symbol has defined text. Add it to the symbol table.
self.symbol_table.intern(text);
} else {
// This symbol was a null or non-string value. Add a placeholder.
self.symbol_table.add_placeholder();
}
}
}
// The SystemReader can skip any user-level value, but cannot skip Local Symbol Tables (LSTs) in
// the stream. If the application tries to advance the reader beyond the LST using `next()` or
// `step_out()`, the reader must first consume the rest of the LST so it can add any imported
// or newly declared symbols to the current symbol table.
// When this method returns, the reader will be positioned just after the LST and the current
// symbol table will have been updated accordingly. The caller must then call `next()` to
// advance to the next item in the stream.
fn finish_reading_current_lst(&mut self) -> IonResult<()> {
while self.depth() > 0 {
self.finish_reading_current_level()?;
self.step_out()?;
}
Ok(())
}
// Visit every value at or below the current level of nesting. For example, given this Ion data:
//
// {
// foo: [1, 2, 3]
// bar: [(1), (2), (3)]
// // ^-- The reader is positioned here, at the beginning of this s-expression.
// baz: true
// }
//
// If the reader were positioned at the first s-expression in the `bar`, this function would
// advance the reader to the end of `bar`, visiting every nested value along the way.
// When the function returned, the reader would need to call `step_out()` and `next()` to start
// reading field `baz`.
fn finish_reading_current_level(&mut self) -> IonResult<()> {
use SystemStreamItem::*;
let starting_depth = self.depth();
// We need to visit every value in the LST (however deeply nested) so the current symbol
// table will be updated with any new symbols that the LST imports/defines.
loop {
match self.next()? {
VersionMarker(major, minor) => {
return decoding_error(format!(
"Encountered an IVM for v{}.{} inside an LST.",
major, minor
))
}
Value(_) | Null(_) => {
// Any value inside an LST should be considered a `SymbolTableValue`; it
// shouldn't be possible to encounter a user-level `Value`.
unreachable!("Cannot encounter a user-level value inside an LST.")
}
SymbolTableValue(ion_type) => {
// We've encountered another value in the LST. If's a container, step into it.
if ion_type.is_container() {
self.step_in()?;
}
// The logic that handles interpreting each value in the LST lives inside
// the `process_raw_value` helper function. The act of calling `next()` and
// `step_in()` here is enough to trigger it.
}
SymbolTableNull(_ion_type) => {
// We've encountered a null value in the LST. Do nothing.
}
Nothing if self.depth() > starting_depth => {
// We've run out of values, but we're not back to the starting depth yet.
// Step out a level and let the loop call next() again.
self.step_out()?;
}
// We've run out of values and we're at the starting depth, so we're done.
Nothing => return Ok(()),
}
}
}
pub fn raw_annotations(&self) -> impl Iterator<Item = RawSymbolToken> + '_ {
// RawReader implementations do not attempt to resolve each annotation into text.
// Additionally, they perform all I/O related to annotations in their implementations
// of Reader::next. As such, it's safe to call `unwrap()` on each raw annotation.
self.raw_reader.annotations().map(|a| a.unwrap())
}
pub fn symbol_table(&self) -> &SymbolTable {
&self.symbol_table
}
pub fn read_raw_symbol(&mut self) -> IonResult<RawSymbolToken> {
if self.lst.state == AtLstImports
&& self.raw_reader.ion_type() == Some(IonType::Symbol)
&& !self.raw_reader.is_null()
{
// The raw reader is at the `imports` field of an LST and its value is a symbol.
// This means that it has eagerly loaded the symbol to see if it is $ion_symbol_table.
// Return a copy of the materialized symbol value.
return Ok(self.lst.current_symbol.clone());
}
// Otherwise, delegate to the raw reader
if self.raw_reader.current() == RawStreamItem::Nothing {
return illegal_operation("called `read_raw_symbol`, but reader is not over a value");
}
self.raw_reader.read_symbol()
}
pub fn raw_field_name_token(&mut self) -> IonResult<RawSymbolToken> {
self.raw_reader.field_name()
}
// Returns true if the system reader already consumed the current string from input as part of
// processing the current local symbol table.
pub fn current_string_was_consumed(&self) -> bool {
// The raw reader is inside the `symbols` field of an LST and its value is a string.
// This means that the system reader has eagerly loaded the string to eventually store
// its text in the current symbol table. Return a copy of the materialized string value.
self.lst.state == ProcessingLstSymbols
&& self.raw_reader.ion_type() == Some(IonType::String)
&& !self.raw_reader.is_null()
}
}
impl<R: RawReader> IonReader for SystemReader<R> {
type Item = SystemStreamItem;
type Symbol = Symbol;
/// Advances the system reader to the next raw stream element.
// `next` resembles `Iterator::next()`
#[allow(clippy::should_implement_trait)]
fn next(&mut self) -> IonResult<Self::Item> {
// If the reader is positioned at a container that makes up part of an LST, it cannot
// simply skip it with next(); it must instead descend into that container to read each
// value.
self.before_next()?;
let item = match self.raw_reader.next()? {
RawStreamItem::VersionMarker(major, minor) => self.process_ivm(major, minor)?,
RawStreamItem::Value(ion_type) => {
// We need to consider the context to determine if this is a user-level value
// or part of a system value.
self.after_next(ion_type, false)?
}
RawStreamItem::Null(ion_type) => self.after_next(ion_type, true)?,
RawStreamItem::Nothing => SystemStreamItem::Nothing,
};
self.current_item = item;
Ok(item)
}
fn current(&self) -> Self::Item {
self.current_item
}
fn step_in(&mut self) -> IonResult<()> {
// Try to step in with the raw_reader. If the reader isn't positioned on container,
// this will return an error.
self.raw_reader.step_in()?;
// Update the LST state to track what we've stepped into.
match self.lst.state {
NotReadingAnLst => {
// We're diving deeper into user data; do nothing.
}
BetweenLstFields => {
// raw_reader.step_in() above should have returned an error.
unreachable!("The raw reader stepped in but the LST state was BetweenLstFields.");
}
AtLstStart => {
// We've stepped into an LST struct but are not yet positioned on a field.
self.lst.state = BetweenLstFields;
}
AtLstImports => {
// We've stepped into the `imports` field of an LST.
self.lst.state = ProcessingLstImports;
}
ProcessingLstImports => {
// We're diving deeper into the imports; do nothing.
}
AtLstSymbols => {
// We've stepped into the `symbols` field of an LST.
self.lst.state = ProcessingLstSymbols;
}
ProcessingLstSymbols => {
// We're diving deeper into the symbols, which is weird but not illegal. Do nothing.
}
AtLstOpenContent => {
// We've stepped into a container on a user-defined field that has not meaning
// to the reader.
self.lst.state = ProcessingLstOpenContent;
}
ProcessingLstOpenContent => {
// This is user data with no meaning to the reader; do nothing.
}
}
Ok(())
}
fn step_out(&mut self) -> IonResult<()> {
// If stepping out is successful, we'll apply this new state before returning Ok(())
let mut new_lst_state = self.lst.state;
// Update the LST state to track where we are now that we're stepping out.
match self.lst.state {
NotReadingAnLst => {
// Stepping out of user data can never change the LST state; do nothing.
}
AtLstStart => {
// Symbol tables are always at the top level.
return illegal_operation("Cannot step out when the reader is at the top level.");
}
BetweenLstFields | AtLstSymbols | AtLstImports | AtLstOpenContent => {
// We're stepping out of the local symbol table altogether. Finish processing the
// LST instead of skipping its remaining contents.
self.finish_reading_current_level()?;
self.add_lst_symbols_to_current_symbol_table();
self.lst.is_append = false;
new_lst_state = NotReadingAnLst;
}
ProcessingLstImports | ProcessingLstSymbols | ProcessingLstOpenContent => {
// We're inside one of the LST fields. Finish processing the current level before
// stepping out.
self.finish_reading_current_level()?;
// If the upcoming call to step_out() will cause us to leave the field altogether,
// update our state to indicate that we're back at depth=1.
if self.depth() == 2 {
new_lst_state = BetweenLstFields;
}
}
}
self.raw_reader.step_out()?;
self.lst.state = new_lst_state;
Ok(())
}
fn field_name(&self) -> IonResult<Symbol> {
match self.raw_reader.field_name() {
Ok(RawSymbolToken::SymbolId(sid)) => {
self.symbol_table.symbol_for(sid).cloned().ok_or_else(|| {
decoding_error_raw(format!(
"encountered field ID with undefined text: ${}",
sid
))
})
}
Ok(RawSymbolToken::Text(text)) => Ok(Symbol::owned(text)),
Err(error) => Err(error),
}
}
fn annotations<'a>(&'a self) -> Box<dyn Iterator<Item = IonResult<Symbol>> + 'a> {
let iter = self
.raw_reader
.annotations()
.map(|raw_token| match raw_token {
// If the annotation was a symbol ID, try to resolve it
Ok(RawSymbolToken::SymbolId(sid)) => {
self.symbol_table.symbol_for(sid).cloned().ok_or_else(|| {
decoding_error_raw(format!(
"Found annotation with undefined symbol ${}",
sid
))
})
}
// If the annotation was a text literal, turn it into a `Symbol`
Ok(RawSymbolToken::Text(text)) => Ok(Symbol::owned(text)),
// If the raw reader couldn't provide the annotation, propagate the error
Err(error) => Err(error),
});
Box::new(iter)
}
fn read_symbol(&mut self) -> IonResult<Self::Symbol> {
let sid = match self.read_raw_symbol()? {
RawSymbolToken::Text(text) => return Ok(Symbol::owned(text)),
RawSymbolToken::SymbolId(sid) => sid,
};
if let Some(symbol) = self.symbol_table.symbol_for(sid) {
// Make a cheap clone of the Rc<str> in the symbol table
Ok(symbol.clone())
} else if !self.symbol_table.sid_is_valid(sid) {
decoding_error(format!("Symbol ID ${} is out of range.", sid))
} else {
decoding_error(format!("Symbol ID ${} has unknown text.", sid))
}
}
fn read_string(&mut self) -> IonResult<String> {
if self.current_string_was_consumed() {
return Ok(self.lst.current_string.clone());
}
// Otherwise, delegate to the raw reader
if self.raw_reader.current() == RawStreamItem::Nothing {
return illegal_operation(
"called `read_string` when reader was not positioned on a value",
);
}
self.raw_reader.read_string()
}
fn map_string<F, U>(&mut self, f: F) -> IonResult<U>
where
F: FnOnce(&str) -> U,
{
if self.current_string_was_consumed() {
let text = self.lst.current_string.as_str();
return Ok(f(text));
}
if self.raw_reader.current() == RawStreamItem::Nothing {
return illegal_operation(
"called `map_string` when reader was not positioned on a value",
);
}
self.raw_reader.map_string(|s| f(s))
}
fn map_string_bytes<F, U>(&mut self, f: F) -> IonResult<U>
where
F: FnOnce(&[u8]) -> U,
{
if self.current_string_was_consumed() {
let bytes = self.lst.current_string.as_bytes();
return Ok(f(bytes));
}
if self.raw_reader.current() == RawStreamItem::Nothing {
return illegal_operation(
"called `map_string_bytes` when reader was not positioned on a value",
);
}
self.raw_reader.map_string_bytes(|b| f(b))
}
// The SystemReader needs to expose many of the same functions as the Cursor, but only some of
// those need to be re-defined to allow for system value processing. Any method listed here will
// be delegated to self.raw_reader directly.
delegate! {
to self.raw_reader {
fn is_null(&self) -> bool;
fn ion_version(&self) -> (u8, u8);
fn ion_type(&self) -> Option<IonType>;
fn read_null(&mut self) -> IonResult<IonType>;
fn read_bool(&mut self) -> IonResult<bool>;
fn read_integer(&mut self) -> IonResult<Integer>;
fn read_i64(&mut self) -> IonResult<i64>;
fn read_f32(&mut self) -> IonResult<f32>;
fn read_f64(&mut self) -> IonResult<f64>;
fn read_decimal(&mut self) -> IonResult<Decimal>;
fn read_blob(&mut self) -> IonResult<Vec<u8>>;
fn map_blob<F, U>(&mut self, f: F) -> IonResult<U> where F: FnOnce(&[u8]) -> U;
fn read_clob(&mut self) -> IonResult<Vec<u8>>;
fn map_clob<F, U>(&mut self, f: F) -> IonResult<U> where F: FnOnce(&[u8]) -> U;
fn read_timestamp(&mut self) -> IonResult<Timestamp>;
fn depth(&self) -> usize;
fn parent_type(&self) -> Option<IonType>;
}
}
}
/// Functionality that is only available if the data source we're reading from is in-memory, like
/// a `Vec<u8>` or `&[u8]`.
impl<T: AsRef<[u8]>> SystemReader<RawBinaryReader<io::Cursor<T>>> {
delegate! {
to self.raw_reader {
pub fn raw_bytes(&self) -> Option<&[u8]>;
pub fn raw_field_id_bytes(&self) -> Option<&[u8]>;
pub fn raw_header_bytes(&self) -> Option<&[u8]>;
pub fn raw_value_bytes(&self) -> Option<&[u8]>;
pub fn raw_annotations_bytes(&self) -> Option<&[u8]>;
pub fn field_id_length(&self) -> Option<usize>;
pub fn field_id_offset(&self) -> Option<usize>;
pub fn field_id_range(&self) -> Option<Range<usize>>;
pub fn annotations_length(&self) -> Option<usize>;
pub fn annotations_offset(&self) -> Option<usize>;
pub fn annotations_range(&self) -> Option<Range<usize>>;
pub fn header_length(&self) -> usize;
pub fn header_offset(&self) -> usize;
pub fn header_range(&self) -> Range<usize>;
pub fn value_length(&self) -> usize;
pub fn value_offset(&self) -> usize;
pub fn value_range(&self) -> Range<usize>;
}
}
}
#[cfg(test)]
mod tests {
use super::SystemStreamItem::*;
use crate::text::raw_text_reader::RawTextReader;
use super::*;
fn system_reader_for(ion: &str) -> SystemReader<RawTextReader<&str>> {
let raw_reader = RawTextReader::new(ion).expect("unable to initialize reader");
SystemReader::new(raw_reader)
}
#[test]
fn basic_symbol_table() -> IonResult<()> {
// The stream contains a local symbol table that is not an append.
let mut reader = system_reader_for(
r#"
$ion_symbol_table::{
symbols: ["foo", "bar", "baz"],
}
$10 // "foo"
$11 // "bar"
$12 // "baz"
"#,
);
// We step over the LST...
assert_eq!(reader.next()?, SymbolTableValue(IonType::Struct));
// ...but expect all of the symbols we encounter after it to be in the symbol table,
// indicating that the SystemReader processed the LST even though we skipped it with `next()`
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "foo");
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "bar");
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "baz");
Ok(())
}
#[test]
fn symbol_table_append() -> IonResult<()> {
// The stream contains multiple LST appends
let mut reader = system_reader_for(
r#"
$ion_symbol_table::{
imports: $ion_symbol_table,
symbols: ["foo"],
}
$ion_symbol_table::{
imports: $ion_symbol_table,
symbols: ["bar"],
}
$ion_symbol_table::{
imports: $ion_symbol_table,
symbols: ["baz"],
}
$10 // "foo"
$11 // "bar"
$12 // "baz"
"#,
);
// Expect 3 symbol tables in a row, stepping over each one
for _ in 0..3 {
assert_eq!(reader.next()?, SymbolTableValue(IonType::Struct));
}
// Confirm that the symbols defined in each append map to the expected text.
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "foo");
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "bar");
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "baz");
Ok(())
}
#[test]
fn symbol_table_reset() -> IonResult<()> {
// The stream contains multiple symbol tables that are not appends. Verify that the
// current symbol table is reset each time it encounters a non-append LST.
let mut reader = system_reader_for(
r#"
$ion_symbol_table::{
symbols: ["foo"],
}
$10 // "foo"
$ion_1_0 // Reset the table on IVM
$ion_symbol_table::{
symbols: ["bar"],
}
$ion_symbol_table::{
// Reset the table because this isn't an LST append
symbols: ["baz"],
}
$10 // "baz"
"#,
);
assert_eq!(reader.next()?, SymbolTableValue(IonType::Struct));
// Only system symbols initially
assert_eq!(reader.symbol_table.len(), 10);
// Advance to the symbol $10, loading the LST as we pass it
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.symbol_table.len(), 11);
assert_eq!(reader.read_symbol()?, "foo");
// Encounter an IVM, reset the table
assert_eq!(reader.next()?, VersionMarker(1, 0));
// The symbol we defined is gone
assert_eq!(reader.symbol_table.len(), 10);
// Step over the two symbol tables that follow
assert_eq!(reader.next()?, SymbolTableValue(IonType::Struct));
assert_eq!(reader.next()?, SymbolTableValue(IonType::Struct));
// Advance to the symbol $10 again, but this time it's 'baz'
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.symbol_table.len(), 11);
assert_eq!(reader.read_symbol()?, "baz");
Ok(())
}
#[test]
fn manually_step_through_lst() -> IonResult<()> {
// The stream contains an LST
let mut reader = system_reader_for(
r#"
$ion_1_0
$ion_symbol_table::{
imports: $ion_symbol_table,
symbols: ["foo", "bar", "baz"],
}
$10
$11
$12
"#,
);
// IVM
assert_eq!(reader.next()?, VersionMarker(1, 0));
// Symbol table
assert_eq!(reader.next()?, SymbolTableValue(IonType::Struct));
// Instead of stepping _over_ the LST as we've done in other tests, step into it.
// We're going to visit/read every value inside the LST. Afterwards, we'll confirm
// that the SystemReader correctly processed the LST under the hood as we stepped
// through it ourselves.
reader.step_in()?;
// Advance to `imports`, confirm its value is the system symbol "$ion_symbol_table"
assert_eq!(reader.next()?, SymbolTableValue(IonType::Symbol));
assert_eq!(reader.field_name()?, "imports");
assert_eq!(reader.read_symbol()?, "$ion_symbol_table".to_string());
// Advance to `symbols`, visit each string in the list
assert_eq!(reader.next()?, SymbolTableValue(IonType::List));
assert_eq!(reader.field_name()?, "symbols");
reader.step_in()?;
assert_eq!(reader.next()?, SymbolTableValue(IonType::String));
assert_eq!(reader.read_string()?, "foo");
assert_eq!(reader.next()?, SymbolTableValue(IonType::String));
assert_eq!(reader.read_string()?, "bar");
assert_eq!(reader.next()?, SymbolTableValue(IonType::String));
assert_eq!(reader.read_string()?, "baz");
// No more strings
assert_eq!(reader.next()?, Nothing);
reader.step_out()?;
// No more LST fields
assert_eq!(reader.next()?, Nothing);
reader.step_out()?;
// Read the user-level symbol values in the stream to confirm that the LST was processed
// successfully by the SystemReader.
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "foo");
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "bar");
assert_eq!(reader.next()?, Value(IonType::Symbol));
assert_eq!(reader.read_symbol()?, "baz");
Ok(())
}
}