preserves/value/
reader.rs

1//! Generic [Reader] trait for parsing Preserves [Value][crate::value::repr::Value]s,
2//! implemented by code that provides each specific transfer syntax.
3
4use crate::error::{self, io_eof, ExpectedKind, Received};
5
6use std::borrow::Cow;
7use std::io;
8use std::marker::PhantomData;
9
10use super::boundary as B;
11use super::signed_integer::SignedInteger;
12use super::CompoundClass;
13use super::DomainDecode;
14use super::DomainParse;
15use super::Double;
16use super::IOValue;
17use super::IOValueDomainCodec;
18use super::NestedValue;
19use super::ViaCodec;
20
21pub type ReaderResult<T> = std::result::Result<T, error::Error>;
22
23/// Tokens produced when performing
24/// [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style reading of terms.
25pub enum Token<N: NestedValue> {
26    /// An embedded value was seen and completely decoded.
27    Embedded(N::Embedded),
28    /// An atomic value was seen and completely decoded.
29    Atom(N),
30    /// A compound value has been opened; its contents follow, and it will be terminated by
31    /// [Token::End].
32    Compound(CompoundClass),
33    /// Closes a previously-opened compound value.
34    End,
35}
36
37/// Generic parser for Preserves.
38pub trait Reader<'de, N: NestedValue> {
39    /// Retrieve the next parseable value or an indication of end-of-input.
40    ///
41    /// Yields `Ok(Some(...))` if a complete value is available, `Ok(None)` if the end of
42    /// stream has been reached, or `Err(...)` for parse or IO errors, including
43    /// incomplete/partial input. See also [Reader::demand_next].
44    fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>>;
45
46    // Hiding these from the documentation for the moment because I don't want to have to
47    // document the whole Boundary thing.
48    #[doc(hidden)]
49    fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<()>;
50    #[doc(hidden)]
51    fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item>;
52    #[doc(hidden)]
53    fn open_sequence(&mut self) -> ReaderResult<()>;
54    #[doc(hidden)]
55    fn open_set(&mut self) -> ReaderResult<()>;
56    #[doc(hidden)]
57    fn open_dictionary(&mut self) -> ReaderResult<()>;
58    #[doc(hidden)]
59    fn boundary(&mut self, b: &B::Type) -> ReaderResult<()>;
60
61    #[doc(hidden)]
62    // close_compound implies a b.shift(None) and a self.boundary(b) *iff* an actual
63    // close marker was consumed and true returned.
64    fn close_compound(&mut self, b: &mut B::Type) -> ReaderResult<bool>;
65
66    #[doc(hidden)]
67    fn open_embedded(&mut self) -> ReaderResult<()>;
68    #[doc(hidden)]
69    fn close_embedded(&mut self) -> ReaderResult<()>;
70
71    /// Allows structured backtracking to an earlier stage in a parse. Useful for layering
72    /// parser combinators atop a Reader.
73    type Mark;
74    /// Retrieve a marker for the current position in the input.
75    fn mark(&mut self) -> io::Result<Self::Mark>;
76    /// Seek the input to a previously-saved position.
77    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
78
79    /// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event,
80    /// discarding annotations.
81    ///
82    /// The `read_embedded_annotations` controls whether annotations are also skipped on
83    /// *embedded* values or not.
84    fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>>;
85    /// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event, plus
86    /// a vector containing any annotations that preceded it.
87    fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)>;
88
89    //---------------------------------------------------------------------------
90
91    /// Skips the next available complete value. Yields an error if no such value exists.
92    fn skip_value(&mut self) -> io::Result<()> {
93        // TODO efficient skipping in specific impls of this trait
94        let _ = self.demand_next(false)?;
95        Ok(())
96    }
97
98    /// Retrieve the next parseable value, treating end-of-input as an error.
99    ///
100    /// Yields `Ok(...)` if a complete value is available or `Err(...)` for parse or IO errors,
101    /// including incomplete/partial input or end of stream. See also [Reader::next].
102    fn demand_next(&mut self, read_annotations: bool) -> io::Result<N> {
103        self.next(read_annotations)?.ok_or_else(io_eof)
104    }
105
106    /// Yields the next value, if it is a `Boolean`, or an error otherwise.
107    fn next_boolean(&mut self) -> ReaderResult<bool> {
108        self.demand_next(false)?.value().to_boolean()
109    }
110
111    /// Yields the next value, if it is a `Double`, or an error otherwise.
112    fn next_double(&mut self) -> ReaderResult<Double> {
113        Ok(self.demand_next(false)?.value().to_double()?.to_owned())
114    }
115
116    /// Yields the next value, if it is a `SignedInteger`, or an error otherwise.
117    fn next_signedinteger(&mut self) -> ReaderResult<SignedInteger> {
118        Ok(self
119            .demand_next(false)?
120            .value()
121            .to_signedinteger()?
122            .to_owned())
123    }
124
125    /// Yields the next value, if it is a `SignedInteger` that fits in [i8], or an error
126    /// otherwise.
127    fn next_i8(&mut self) -> ReaderResult<i8> {
128        self.demand_next(false)?.value().to_i8()
129    }
130    /// Yields the next value, if it is a `SignedInteger` that fits in [u8], or an error
131    /// otherwise.
132    fn next_u8(&mut self) -> ReaderResult<u8> {
133        self.demand_next(false)?.value().to_u8()
134    }
135    /// Yields the next value, if it is a `SignedInteger` that fits in [i16], or an error
136    /// otherwise.
137    fn next_i16(&mut self) -> ReaderResult<i16> {
138        self.demand_next(false)?.value().to_i16()
139    }
140    /// Yields the next value, if it is a `SignedInteger` that fits in [u16], or an error
141    /// otherwise.
142    fn next_u16(&mut self) -> ReaderResult<u16> {
143        self.demand_next(false)?.value().to_u16()
144    }
145    /// Yields the next value, if it is a `SignedInteger` that fits in [i32], or an error
146    /// otherwise.
147    fn next_i32(&mut self) -> ReaderResult<i32> {
148        self.demand_next(false)?.value().to_i32()
149    }
150    /// Yields the next value, if it is a `SignedInteger` that fits in [u32], or an error
151    /// otherwise.
152    fn next_u32(&mut self) -> ReaderResult<u32> {
153        self.demand_next(false)?.value().to_u32()
154    }
155    /// Yields the next value, if it is a `SignedInteger` that fits in [i64], or an error
156    /// otherwise.
157    fn next_i64(&mut self) -> ReaderResult<i64> {
158        self.demand_next(false)?.value().to_i64()
159    }
160    /// Yields the next value, if it is a `SignedInteger` that fits in [u64], or an error
161    /// otherwise.
162    fn next_u64(&mut self) -> ReaderResult<u64> {
163        self.demand_next(false)?.value().to_u64()
164    }
165    /// Yields the next value, if it is a `SignedInteger` that fits in [i128], or an error
166    /// otherwise.
167    fn next_i128(&mut self) -> ReaderResult<i128> {
168        self.demand_next(false)?.value().to_i128()
169    }
170    /// Yields the next value, if it is a `SignedInteger` that fits in [u128], or an error
171    /// otherwise.
172    fn next_u128(&mut self) -> ReaderResult<u128> {
173        self.demand_next(false)?.value().to_u128()
174    }
175    /// Yields the next value as an [f64], if it is a `Double`, or an error otherwise.
176    fn next_f64(&mut self) -> ReaderResult<f64> {
177        self.demand_next(false)?.value().to_f64()
178    }
179    /// Yields the next value as a [char], if it is parseable by
180    /// [Value::to_char][crate::value::Value::to_char], or an error otherwise.
181    fn next_char(&mut self) -> ReaderResult<char> {
182        self.demand_next(false)?.value().to_char()
183    }
184
185    /// Yields the next value, if it is a `String`, or an error otherwise.
186    fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> {
187        Ok(Cow::Owned(
188            self.demand_next(false)?.value().to_string()?.to_owned(),
189        ))
190    }
191
192    /// Yields the next value, if it is a `ByteString`, or an error otherwise.
193    fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> {
194        Ok(Cow::Owned(
195            self.demand_next(false)?.value().to_bytestring()?.to_owned(),
196        ))
197    }
198
199    /// Yields the next value, if it is a `Symbol`, or an error otherwise.
200    fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> {
201        Ok(Cow::Owned(
202            self.demand_next(false)?.value().to_symbol()?.to_owned(),
203        ))
204    }
205
206    #[doc(hidden)]
207    fn open_option(&mut self) -> ReaderResult<Option<B::Type>> {
208        self.open_record(None)?;
209        let mut b = B::Type::default();
210        self.ensure_boundary(&mut b, &B::Item::RecordLabel)?;
211        let label: &str = &self.next_symbol()?;
212        match label {
213            "None" => {
214                self.ensure_complete(b)?;
215                Ok(None)
216            }
217            "Some" => {
218                Ok(Some(b))
219            }
220            _ => Err(error::Error::Expected(
221                ExpectedKind::Option,
222                Received::ReceivedRecordWithLabel(label.to_owned()),
223            )),
224        }
225    }
226
227    #[doc(hidden)]
228    fn open_simple_record(&mut self, name: &str, arity: Option<usize>) -> ReaderResult<B::Type> {
229        self.open_record(arity)?;
230        let mut b = B::Type::default();
231        self.ensure_boundary(&mut b, &B::Item::RecordLabel)?;
232        let label: &str = &self.next_symbol()?;
233        if label == name {
234            Ok(b)
235        } else {
236            Err(error::Error::Expected(
237                ExpectedKind::SimpleRecord(name.to_owned(), arity),
238                Received::ReceivedRecordWithLabel(label.to_owned()),
239            ))
240        }
241    }
242
243    /// Constructs a [ConfiguredReader] set with the given value for `read_annotations`.
244    fn configured(self, read_annotations: bool) -> ConfiguredReader<'de, N, Self>
245    where
246        Self: std::marker::Sized,
247    {
248        ConfiguredReader {
249            reader: self,
250            read_annotations,
251            phantom: PhantomData,
252        }
253    }
254
255    #[doc(hidden)]
256    #[inline]
257    fn expect_boundary(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool> {
258        if self.close_compound(b)? {
259            Ok(false)
260        } else {
261            b.shift(Some(i.clone()));
262            self.boundary(b)?;
263            Ok(true)
264        }
265    }
266
267    #[doc(hidden)]
268    fn ensure_boundary(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<()> {
269        if self.expect_boundary(b, i)? {
270            Ok(())
271        } else {
272            Err(error::Error::MissingItem)
273        }
274    }
275
276    #[doc(hidden)]
277    fn ensure_complete(&mut self, mut b: B::Type) -> ReaderResult<()> {
278        if !self.close_compound(&mut b)? {
279            Err(error::Error::MissingCloseDelimiter)
280        } else {
281            Ok(())
282        }
283    }
284}
285
286impl<'r, 'de, N: NestedValue, R: Reader<'de, N>> Reader<'de, N> for &'r mut R {
287    fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
288        (*self).next(read_annotations)
289    }
290
291    fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<()> {
292        (*self).open_record(arity)
293    }
294
295    fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
296        (*self).open_sequence_or_set()
297    }
298
299    fn open_sequence(&mut self) -> ReaderResult<()> {
300        (*self).open_sequence()
301    }
302
303    fn open_set(&mut self) -> ReaderResult<()> {
304        (*self).open_set()
305    }
306
307    fn open_dictionary(&mut self) -> ReaderResult<()> {
308        (*self).open_dictionary()
309    }
310
311    fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> {
312        (*self).boundary(b)
313    }
314
315    fn close_compound(&mut self, b: &mut B::Type) -> ReaderResult<bool> {
316        (*self).close_compound(b)
317    }
318
319    fn open_embedded(&mut self) -> ReaderResult<()> {
320        (*self).open_embedded()
321    }
322
323    fn close_embedded(&mut self) -> ReaderResult<()> {
324        (*self).close_embedded()
325    }
326
327    type Mark = R::Mark;
328
329    fn mark(&mut self) -> io::Result<Self::Mark> {
330        (*self).mark()
331    }
332
333    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
334        (*self).restore(mark)
335    }
336
337    fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>> {
338        (*self).next_token(read_embedded_annotations)
339    }
340
341    fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)> {
342        (*self).next_annotations_and_token()
343    }
344}
345
346/// Generic seekable stream of input bytes.
347pub trait BinarySource<'de>: Sized {
348    /// Allows structured backtracking to an earlier position in an input.
349    type Mark;
350    /// Retrieve a marker for the current position in the input.
351    fn mark(&mut self) -> io::Result<Self::Mark>;
352    /// Seek the input to a previously-saved position.
353    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;
354
355    /// Skip the next byte.
356    fn skip(&mut self) -> io::Result<()>;
357    /// Returns the next byte without advancing over it.
358    fn peek(&mut self) -> io::Result<u8>;
359    /// Returns and consumes the next `count` bytes, which must all be available. Always yields
360    /// exactly `count` bytes or an error.
361    fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>>;
362    /// As [BinarySource::readbytes], but uses `bs` as destination for the read bytes as well
363    /// as taking the size of `bs` as the count of bytes to read.
364    fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()>;
365
366    /// Constructs a [PackedReader][super::PackedReader] that will read from `self`.
367    fn packed<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
368        &mut self,
369        decode_embedded: Dec,
370    ) -> super::PackedReader<'de, '_, N, Dec, Self> {
371        super::PackedReader::new(self, decode_embedded)
372    }
373
374    /// Constructs a [PackedReader][super::PackedReader] that will read [IOValue]s from `self`.
375    fn packed_iovalues(
376        &mut self,
377    ) -> super::PackedReader<'de, '_, IOValue, IOValueDomainCodec, Self> {
378        self.packed(IOValueDomainCodec)
379    }
380
381    /// Constructs a [TextReader][super::TextReader] that will read from `self`.
382    fn text<N: NestedValue, Dec: DomainParse<N::Embedded>>(
383        &mut self,
384        decode_embedded: Dec,
385    ) -> super::TextReader<'de, '_, N, Dec, Self> {
386        super::TextReader::new(self, decode_embedded)
387    }
388
389    /// Constructs a [TextReader][super::TextReader] that will read [IOValue]s from `self`.
390    fn text_iovalues(
391        &mut self,
392    ) -> super::TextReader<'de, '_, IOValue, ViaCodec<IOValueDomainCodec>, Self> {
393        self.text::<IOValue, _>(ViaCodec::new(IOValueDomainCodec))
394    }
395}
396
397/// Implementation of [BinarySource] backed by an [`io::Read`]` + `[`io::Seek`] implementation.
398pub struct IOBinarySource<R: io::Read + io::Seek> {
399    /// The underlying byte source.
400    pub read: R,
401    #[doc(hidden)]
402    /// One-place buffer for peeked bytes.
403    pub buf: Option<u8>,
404}
405
406impl<R: io::Read + io::Seek> IOBinarySource<R> {
407    /// Constructs an [IOBinarySource] from the given [`io::Read`]` + `[`io::Seek`]
408    /// implementation.
409    #[inline(always)]
410    pub fn new(read: R) -> Self {
411        IOBinarySource { read, buf: None }
412    }
413}
414
415impl<'de, R: io::Read + io::Seek> BinarySource<'de> for IOBinarySource<R> {
416    type Mark = u64;
417
418    #[inline(always)]
419    fn mark(&mut self) -> io::Result<Self::Mark> {
420        Ok(self.read.stream_position()? - (if self.buf.is_some() { 1 } else { 0 }))
421    }
422
423    #[inline(always)]
424    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
425        self.read.seek(io::SeekFrom::Start(*mark))?;
426        self.buf = None;
427        Ok(())
428    }
429
430    #[inline(always)]
431    fn skip(&mut self) -> io::Result<()> {
432        if self.buf.is_none() {
433            unreachable!();
434        }
435        self.buf = None;
436        Ok(())
437    }
438
439    #[inline(always)]
440    fn peek(&mut self) -> io::Result<u8> {
441        match self.buf {
442            Some(b) => Ok(b),
443            None => {
444                let b = &mut [0];
445                match self.read.read(b)? {
446                    0 => Err(io_eof()),
447                    1 => {
448                        self.buf = Some(b[0]);
449                        Ok(b[0])
450                    }
451                    _ => unreachable!(),
452                }
453            }
454        }
455    }
456
457    #[inline(always)]
458    fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>> {
459        if self.buf.is_some() {
460            unreachable!();
461        }
462        let mut bs = vec![0; count];
463        self.read.read_exact(&mut bs)?;
464        Ok(Cow::Owned(bs))
465    }
466
467    #[inline(always)]
468    fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
469        if self.buf.is_some() {
470            unreachable!();
471        }
472        self.read.read_exact(bs)
473    }
474}
475
476/// Implementation of [BinarySource] backed by a slice of [u8].
477pub struct BytesBinarySource<'de> {
478    /// The underlying byte source.
479    pub bytes: &'de [u8],
480    #[doc(hidden)]
481    /// Current position within `bytes`.
482    pub index: usize,
483}
484
485impl<'de> BytesBinarySource<'de> {
486    /// Constructs a [BytesBinarySource] from the given `u8` slice.
487    #[inline(always)]
488    pub fn new(bytes: &'de [u8]) -> Self {
489        BytesBinarySource { bytes, index: 0 }
490    }
491}
492
493impl<'de> BinarySource<'de> for BytesBinarySource<'de> {
494    type Mark = usize;
495
496    #[inline(always)]
497    fn mark(&mut self) -> io::Result<Self::Mark> {
498        Ok(self.index)
499    }
500
501    #[inline(always)]
502    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
503        self.index = *mark;
504        Ok(())
505    }
506
507    #[inline(always)]
508    fn skip(&mut self) -> io::Result<()> {
509        if self.index >= self.bytes.len() {
510            unreachable!();
511        }
512        self.index += 1;
513        Ok(())
514    }
515
516    #[inline(always)]
517    fn peek(&mut self) -> io::Result<u8> {
518        if self.index >= self.bytes.len() {
519            Err(io_eof())
520        } else {
521            Ok(self.bytes[self.index])
522        }
523    }
524
525    #[inline(always)]
526    fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>> {
527        if self.index + count > self.bytes.len() {
528            Err(io_eof())
529        } else {
530            let bs = &self.bytes[self.index..self.index + count];
531            self.index += count;
532            Ok(Cow::Borrowed(bs))
533        }
534    }
535
536    #[inline(always)]
537    fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
538        let count = bs.len();
539        if self.index + count > self.bytes.len() {
540            Err(io_eof())
541        } else {
542            bs.copy_from_slice(&self.bytes[self.index..self.index + count]);
543            self.index += count;
544            Ok(())
545        }
546    }
547}
548
549/// A combination of a [Reader] with presets governing its operation.
550pub struct ConfiguredReader<'de, N: NestedValue, R: Reader<'de, N>> {
551    /// The underlying [Reader].
552    pub reader: R,
553    /// Configuration as to whether to include or discard annotations while reading.
554    pub read_annotations: bool,
555    phantom: PhantomData<&'de N>,
556}
557
558impl<'de, N: NestedValue, R: Reader<'de, N>> ConfiguredReader<'de, N, R> {
559    /// Constructs a [ConfiguredReader] based on the given `reader`.
560    pub fn new(reader: R) -> Self {
561        reader.configured(true)
562    }
563
564    /// Updates the `read_annotations` field of `self`.
565    pub fn set_read_annotations(&mut self, read_annotations: bool) {
566        self.read_annotations = read_annotations
567    }
568
569    /// Retrieve the next parseable value, treating end-of-input as an error.
570    ///
571    /// Delegates directly to [Reader::demand_next].
572    pub fn demand_next(&mut self) -> io::Result<N> {
573        self.reader.demand_next(self.read_annotations)
574    }
575}
576
577impl<'de, N: NestedValue, R: Reader<'de, N>> std::iter::Iterator for ConfiguredReader<'de, N, R> {
578    type Item = io::Result<N>;
579    fn next(&mut self) -> Option<Self::Item> {
580        match self.reader.next(self.read_annotations) {
581            Err(e) => Some(Err(e)),
582            Ok(None) => None,
583            Ok(Some(v)) => Some(Ok(v)),
584        }
585    }
586}