1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
use super::deserializer::Deserializer;
use super::read::Fused;
use super::read::Read;
use crate::error::{Error, ErrorCode, Result};
use core::iter::FusedIterator;
use core::marker::PhantomData;
use serde::de;
/// Iterator that deserializes a stream into multiple EnCom values.
///
/// A stream deserializer can be created from any EnCom deserializer using the
/// `Deserializer::into_iter` method.
///
/// The data can consist of any EnCom value. Values need to be a self-delineating value e.g.
/// arrays, objects, or strings, or be followed by whitespace or a self-delineating value.
///
/// ```
/// use serde_encom::{Deserializer, Value};
///
/// fn main() {
/// let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]";
///
/// let stream = Deserializer::from_str(data).into_iter::<Value>();
///
/// for value in stream {
/// println!("{}", value.unwrap());
/// }
/// }
/// ```
pub struct StreamDeserializer<'de, R, T> {
pub(crate) de: Deserializer<R>,
pub(crate) offset: usize,
pub(crate) failed: bool,
pub(crate) output: PhantomData<T>,
pub(crate) lifetime: PhantomData<&'de ()>,
}
impl<'de, R, T> StreamDeserializer<'de, R, T>
where
R: Read<'de>,
T: de::Deserialize<'de>,
{
/// Create an EnCom stream deserializer from one of the possible serde_encom
/// input sources.
///
/// Typically it is more convenient to use one of these methods instead:
///
/// - Deserializer::from_str(...).into_iter()
/// - Deserializer::from_slice(...).into_iter()
/// - Deserializer::from_reader(...).into_iter()
pub fn new(read: R) -> Self {
let offset = read.byte_offset();
StreamDeserializer {
de: Deserializer::new(read),
offset,
failed: false,
output: PhantomData,
lifetime: PhantomData,
}
}
/// Returns the number of bytes so far deserialized into a successful `T`.
///
/// If a stream deserializer returns an EOF error, new data can be joined to
/// `old_data[stream.byte_offset()..]` to try again.
///
/// ```
/// let data = b"0 1 ";
///
/// let de = serde_encom::Deserializer::from_slice(data);
/// let mut stream = de.into_iter::<Vec<i32>>();
/// assert_eq!(0, stream.byte_offset());
///
/// println!("{:?}", stream.next()); // [0]
/// assert_eq!(3, stream.byte_offset());
///
/// println!("{:?}", stream.next()); // [1]
/// assert_eq!(7, stream.byte_offset());
///
/// println!("{:?}", stream.next()); // error
/// assert_eq!(8, stream.byte_offset());
///
/// // If err.is_eof(), can join the remaining data to new data and continue.
/// let remaining = &data[stream.byte_offset()..];
/// ```
///
/// *Note:* In the future this method may be changed to return the number of
/// bytes so far deserialized into a successful T *or* syntactically valid
/// EnCom skipped over due to a type error. See [serde-rs/json#70] for an
/// example illustrating this.
///
/// [serde-rs/json#70]: https://github.com/serde-rs/json/issues/70
pub fn byte_offset(&self) -> usize {
self.offset
}
fn peek_end_of_value(&mut self) -> Result<()> {
match self.de.peek()? {
Some(b' ' | b'\n' | b'\t' | b'\r' | b'{' | b'}' | b':') | None => Ok(()),
Some(_) => {
let position = self.de.read.peek_position();
Err(Error::syntax(
ErrorCode::TrailingCharacters,
position.line,
position.column,
))
}
}
}
}
impl<'de, R, T> Iterator for StreamDeserializer<'de, R, T>
where
R: Read<'de>,
T: de::Deserialize<'de>,
{
type Item = Result<T>;
fn next(&mut self) -> Option<Result<T>> {
if R::SHOULD_EARLY_RETURN_IF_FAILED && self.failed {
return None;
}
// skip whitespaces, if any
// this helps with trailing whitespaces, since whitespaces between
// values are handled for us.
match self.de.parse_whitespace() {
Ok(None) => {
self.offset = self.de.read.byte_offset();
None
}
Ok(Some(b)) => {
// If the value does not have a clear way to show the end of the value
// (like numbers, null, true etc.) we have to look for whitespace or
// the beginning of a self-delineated value.
let self_delineated_value = match b {
b'{' => true,
_ => false,
};
self.offset = self.de.read.byte_offset();
let result = de::Deserialize::deserialize(&mut self.de);
Some(match result {
Ok(value) => {
self.offset = self.de.read.byte_offset();
if self_delineated_value {
Ok(value)
} else {
self.peek_end_of_value().map(|()| value)
}
}
Err(e) => {
self.de.read.set_failed(&mut self.failed);
Err(e)
}
})
}
Err(e) => {
self.de.read.set_failed(&mut self.failed);
Some(Err(e))
}
}
}
}
impl<'de, R, T> FusedIterator for StreamDeserializer<'de, R, T>
where
R: Read<'de> + Fused,
T: de::Deserialize<'de>,
{
}