1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
//! Generic [Reader] trait for parsing Preserves [Value][crate::value::repr::Value]s,
//! implemented by code that provides each specific transfer syntax.

use crate::error::{self, io_eof, ExpectedKind, Received};

use std::borrow::Cow;
use std::io;
use std::marker::PhantomData;

use super::boundary as B;
use super::signed_integer::SignedInteger;
use super::CompoundClass;
use super::DomainDecode;
use super::DomainParse;
use super::Double;
use super::Float;
use super::IOValue;
use super::IOValueDomainCodec;
use super::NestedValue;
use super::ViaCodec;

pub type ReaderResult<T> = std::result::Result<T, error::Error>;

/// Tokens produced when performing
/// [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style reading of terms.
pub enum Token<N: NestedValue> {
    /// An embedded value was seen and completely decoded.
    Embedded(N::Embedded),
    /// An atomic value was seen and completely decoded.
    Atom(N),
    /// A compound value has been opened; its contents follow, and it will be terminated by
    /// [Token::End].
    Compound(CompoundClass),
    /// Closes a previously-opened compound value.
    End,
}

/// Generic parser for Preserves.
pub trait Reader<'de, N: NestedValue> {
    /// Retrieve the next parseable value or an indication of end-of-input.
    ///
    /// Yields `Ok(Some(...))` if a complete value is available, `Ok(None)` if the end of
    /// stream has been reached, or `Err(...)` for parse or IO errors, including
    /// incomplete/partial input. See also [Reader::demand_next].
    fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>>;

    // Hiding these from the documentation for the moment because I don't want to have to
    // document the whole Boundary thing.
    #[doc(hidden)]
    fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type>;
    #[doc(hidden)]
    fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item>;
    #[doc(hidden)]
    fn open_sequence(&mut self) -> ReaderResult<()>;
    #[doc(hidden)]
    fn open_set(&mut self) -> ReaderResult<()>;
    #[doc(hidden)]
    fn open_dictionary(&mut self) -> ReaderResult<()>;
    #[doc(hidden)]
    fn boundary(&mut self, b: &B::Type) -> ReaderResult<()>;

    #[doc(hidden)]
    // close_compound implies a b.shift(...) and a self.boundary(b).
    fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool>;

    #[doc(hidden)]
    fn open_embedded(&mut self) -> ReaderResult<()>;
    #[doc(hidden)]
    fn close_embedded(&mut self) -> ReaderResult<()>;

    /// Allows structured backtracking to an earlier stage in a parse. Useful for layering
    /// parser combinators atop a Reader.
    type Mark;
    /// Retrieve a marker for the current position in the input.
    fn mark(&mut self) -> io::Result<Self::Mark>;
    /// Seek the input to a previously-saved position.
    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;

    /// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event,
    /// discarding annotations.
    ///
    /// The `read_embedded_annotations` controls whether annotations are also skipped on
    /// *embedded* values or not.
    fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>>;
    /// Get the next [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style event, plus
    /// a vector containing any annotations that preceded it.
    fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)>;

    //---------------------------------------------------------------------------

    /// Skips the next available complete value. Yields an error if no such value exists.
    fn skip_value(&mut self) -> io::Result<()> {
        // TODO efficient skipping in specific impls of this trait
        let _ = self.demand_next(false)?;
        Ok(())
    }

    /// Retrieve the next parseable value, treating end-of-input as an error.
    ///
    /// Yields `Ok(...)` if a complete value is available or `Err(...)` for parse or IO errors,
    /// including incomplete/partial input or end of stream. See also [Reader::next].
    fn demand_next(&mut self, read_annotations: bool) -> io::Result<N> {
        self.next(read_annotations)?.ok_or_else(io_eof)
    }

    /// Yields the next value, if it is a `Boolean`, or an error otherwise.
    fn next_boolean(&mut self) -> ReaderResult<bool> {
        self.demand_next(false)?.value().to_boolean()
    }

    /// Yields the next value, if it is a `Float`, or an error otherwise.
    fn next_float(&mut self) -> ReaderResult<Float> {
        Ok(self.demand_next(false)?.value().to_float()?.to_owned())
    }

    /// Yields the next value, if it is a `Double`, or an error otherwise.
    fn next_double(&mut self) -> ReaderResult<Double> {
        Ok(self.demand_next(false)?.value().to_double()?.to_owned())
    }

    /// Yields the next value, if it is a `SignedInteger`, or an error otherwise.
    fn next_signedinteger(&mut self) -> ReaderResult<SignedInteger> {
        Ok(self
            .demand_next(false)?
            .value()
            .to_signedinteger()?
            .to_owned())
    }

    /// Yields the next value, if it is a `SignedInteger` that fits in [i8], or an error
    /// otherwise.
    fn next_i8(&mut self) -> ReaderResult<i8> {
        self.demand_next(false)?.value().to_i8()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [u8], or an error
    /// otherwise.
    fn next_u8(&mut self) -> ReaderResult<u8> {
        self.demand_next(false)?.value().to_u8()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [i16], or an error
    /// otherwise.
    fn next_i16(&mut self) -> ReaderResult<i16> {
        self.demand_next(false)?.value().to_i16()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [u16], or an error
    /// otherwise.
    fn next_u16(&mut self) -> ReaderResult<u16> {
        self.demand_next(false)?.value().to_u16()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [i32], or an error
    /// otherwise.
    fn next_i32(&mut self) -> ReaderResult<i32> {
        self.demand_next(false)?.value().to_i32()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [u32], or an error
    /// otherwise.
    fn next_u32(&mut self) -> ReaderResult<u32> {
        self.demand_next(false)?.value().to_u32()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [i64], or an error
    /// otherwise.
    fn next_i64(&mut self) -> ReaderResult<i64> {
        self.demand_next(false)?.value().to_i64()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [u64], or an error
    /// otherwise.
    fn next_u64(&mut self) -> ReaderResult<u64> {
        self.demand_next(false)?.value().to_u64()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [i128], or an error
    /// otherwise.
    fn next_i128(&mut self) -> ReaderResult<i128> {
        self.demand_next(false)?.value().to_i128()
    }
    /// Yields the next value, if it is a `SignedInteger` that fits in [u128], or an error
    /// otherwise.
    fn next_u128(&mut self) -> ReaderResult<u128> {
        self.demand_next(false)?.value().to_u128()
    }
    /// Yields the next value as an [f32], if it is a `Float`, or an error otherwise.
    fn next_f32(&mut self) -> ReaderResult<f32> {
        self.demand_next(false)?.value().to_f32()
    }
    /// Yields the next value as an [f64], if it is a `Double`, or an error otherwise.
    fn next_f64(&mut self) -> ReaderResult<f64> {
        self.demand_next(false)?.value().to_f64()
    }
    /// Yields the next value as a [char], if it is parseable by
    /// [Value::to_char][crate::value::Value::to_char], or an error otherwise.
    fn next_char(&mut self) -> ReaderResult<char> {
        self.demand_next(false)?.value().to_char()
    }

    /// Yields the next value, if it is a `String`, or an error otherwise.
    fn next_str(&mut self) -> ReaderResult<Cow<'de, str>> {
        Ok(Cow::Owned(
            self.demand_next(false)?.value().to_string()?.to_owned(),
        ))
    }

    /// Yields the next value, if it is a `ByteString`, or an error otherwise.
    fn next_bytestring(&mut self) -> ReaderResult<Cow<'de, [u8]>> {
        Ok(Cow::Owned(
            self.demand_next(false)?.value().to_bytestring()?.to_owned(),
        ))
    }

    /// Yields the next value, if it is a `Symbol`, or an error otherwise.
    fn next_symbol(&mut self) -> ReaderResult<Cow<'de, str>> {
        Ok(Cow::Owned(
            self.demand_next(false)?.value().to_symbol()?.to_owned(),
        ))
    }

    #[doc(hidden)]
    fn open_option(&mut self) -> ReaderResult<Option<B::Type>> {
        let b = self.open_record(None)?;
        let label: &str = &self.next_symbol()?;
        match label {
            "None" => {
                self.ensure_complete(b, &B::Item::RecordField)?;
                Ok(None)
            }
            "Some" => Ok(Some(b)),
            _ => Err(error::Error::Expected(
                ExpectedKind::Option,
                Received::ReceivedRecordWithLabel(label.to_owned()),
            )),
        }
    }

    #[doc(hidden)]
    fn open_simple_record(&mut self, name: &str, arity: Option<usize>) -> ReaderResult<B::Type> {
        let b = self.open_record(arity)?;
        let label: &str = &self.next_symbol()?;
        if label == name {
            Ok(b)
        } else {
            Err(error::Error::Expected(
                ExpectedKind::SimpleRecord(name.to_owned(), arity),
                Received::ReceivedRecordWithLabel(label.to_owned()),
            ))
        }
    }

    /// Constructs a [ConfiguredReader] set with the given value for `read_annotations`.
    fn configured(self, read_annotations: bool) -> ConfiguredReader<'de, N, Self>
    where
        Self: std::marker::Sized,
    {
        ConfiguredReader {
            reader: self,
            read_annotations,
            phantom: PhantomData,
        }
    }

    #[doc(hidden)]
    fn ensure_more_expected(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<()> {
        if !self.close_compound(b, i)? {
            Ok(())
        } else {
            Err(error::Error::MissingItem)
        }
    }

    #[doc(hidden)]
    fn ensure_complete(&mut self, mut b: B::Type, i: &B::Item) -> ReaderResult<()> {
        if !self.close_compound(&mut b, i)? {
            Err(error::Error::MissingCloseDelimiter)
        } else {
            Ok(())
        }
    }
}

impl<'r, 'de, N: NestedValue, R: Reader<'de, N>> Reader<'de, N> for &'r mut R {
    fn next(&mut self, read_annotations: bool) -> io::Result<Option<N>> {
        (*self).next(read_annotations)
    }

    fn open_record(&mut self, arity: Option<usize>) -> ReaderResult<B::Type> {
        (*self).open_record(arity)
    }

    fn open_sequence_or_set(&mut self) -> ReaderResult<B::Item> {
        (*self).open_sequence_or_set()
    }

    fn open_sequence(&mut self) -> ReaderResult<()> {
        (*self).open_sequence()
    }

    fn open_set(&mut self) -> ReaderResult<()> {
        (*self).open_set()
    }

    fn open_dictionary(&mut self) -> ReaderResult<()> {
        (*self).open_dictionary()
    }

    fn boundary(&mut self, b: &B::Type) -> ReaderResult<()> {
        (*self).boundary(b)
    }

    fn close_compound(&mut self, b: &mut B::Type, i: &B::Item) -> ReaderResult<bool> {
        (*self).close_compound(b, i)
    }

    fn open_embedded(&mut self) -> ReaderResult<()> {
        (*self).open_embedded()
    }

    fn close_embedded(&mut self) -> ReaderResult<()> {
        (*self).close_embedded()
    }

    type Mark = R::Mark;

    fn mark(&mut self) -> io::Result<Self::Mark> {
        (*self).mark()
    }

    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
        (*self).restore(mark)
    }

    fn next_token(&mut self, read_embedded_annotations: bool) -> io::Result<Token<N>> {
        (*self).next_token(read_embedded_annotations)
    }

    fn next_annotations_and_token(&mut self) -> io::Result<(Vec<N>, Token<N>)> {
        (*self).next_annotations_and_token()
    }
}

/// Generic seekable stream of input bytes.
pub trait BinarySource<'de>: Sized {
    /// Allows structured backtracking to an earlier position in an input.
    type Mark;
    /// Retrieve a marker for the current position in the input.
    fn mark(&mut self) -> io::Result<Self::Mark>;
    /// Seek the input to a previously-saved position.
    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()>;

    /// Skip the next byte.
    fn skip(&mut self) -> io::Result<()>;
    /// Returns the next byte without advancing over it.
    fn peek(&mut self) -> io::Result<u8>;
    /// Returns and consumes the next `count` bytes, which must all be available. Always yields
    /// exactly `count` bytes or an error.
    fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>>;
    /// As [BinarySource::readbytes], but uses `bs` as destination for the read bytes as well
    /// as taking the size of `bs` as the count of bytes to read.
    fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()>;

    /// Constructs a [PackedReader][super::PackedReader] that will read from `self`.
    fn packed<N: NestedValue, Dec: DomainDecode<N::Embedded>>(
        &mut self,
        decode_embedded: Dec,
    ) -> super::PackedReader<'de, '_, N, Dec, Self> {
        super::PackedReader::new(self, decode_embedded)
    }

    /// Constructs a [PackedReader][super::PackedReader] that will read [IOValue]s from `self`.
    fn packed_iovalues(
        &mut self,
    ) -> super::PackedReader<'de, '_, IOValue, IOValueDomainCodec, Self> {
        self.packed(IOValueDomainCodec)
    }

    /// Constructs a [TextReader][super::TextReader] that will read from `self`.
    fn text<N: NestedValue, Dec: DomainParse<N::Embedded>>(
        &mut self,
        decode_embedded: Dec,
    ) -> super::TextReader<'de, '_, N, Dec, Self> {
        super::TextReader::new(self, decode_embedded)
    }

    /// Constructs a [TextReader][super::TextReader] that will read [IOValue]s from `self`.
    fn text_iovalues(
        &mut self,
    ) -> super::TextReader<'de, '_, IOValue, ViaCodec<IOValueDomainCodec>, Self> {
        self.text::<IOValue, _>(ViaCodec::new(IOValueDomainCodec))
    }
}

/// Implementation of [BinarySource] backed by an [`io::Read`]` + `[`io::Seek`] implementation.
pub struct IOBinarySource<R: io::Read + io::Seek> {
    /// The underlying byte source.
    pub read: R,
    #[doc(hidden)]
    /// One-place buffer for peeked bytes.
    pub buf: Option<u8>,
}

impl<R: io::Read + io::Seek> IOBinarySource<R> {
    /// Constructs an [IOBinarySource] from the given [`io::Read`]` + `[`io::Seek`]
    /// implementation.
    #[inline(always)]
    pub fn new(read: R) -> Self {
        IOBinarySource { read, buf: None }
    }
}

impl<'de, R: io::Read + io::Seek> BinarySource<'de> for IOBinarySource<R> {
    type Mark = u64;

    #[inline(always)]
    fn mark(&mut self) -> io::Result<Self::Mark> {
        Ok(self.read.stream_position()? - (if self.buf.is_some() { 1 } else { 0 }))
    }

    #[inline(always)]
    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
        self.read.seek(io::SeekFrom::Start(*mark))?;
        self.buf = None;
        Ok(())
    }

    #[inline(always)]
    fn skip(&mut self) -> io::Result<()> {
        if self.buf.is_none() {
            unreachable!();
        }
        self.buf = None;
        Ok(())
    }

    #[inline(always)]
    fn peek(&mut self) -> io::Result<u8> {
        match self.buf {
            Some(b) => Ok(b),
            None => {
                let b = &mut [0];
                match self.read.read(b)? {
                    0 => Err(io_eof()),
                    1 => {
                        self.buf = Some(b[0]);
                        Ok(b[0])
                    }
                    _ => unreachable!(),
                }
            }
        }
    }

    #[inline(always)]
    fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>> {
        if self.buf.is_some() {
            unreachable!();
        }
        let mut bs = vec![0; count];
        self.read.read_exact(&mut bs)?;
        Ok(Cow::Owned(bs))
    }

    #[inline(always)]
    fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
        if self.buf.is_some() {
            unreachable!();
        }
        self.read.read_exact(bs)
    }
}

/// Implementation of [BinarySource] backed by a slice of [u8].
pub struct BytesBinarySource<'de> {
    /// The underlying byte source.
    pub bytes: &'de [u8],
    #[doc(hidden)]
    /// Current position within `bytes`.
    pub index: usize,
}

impl<'de> BytesBinarySource<'de> {
    /// Constructs a [BytesBinarySource] from the given `u8` slice.
    #[inline(always)]
    pub fn new(bytes: &'de [u8]) -> Self {
        BytesBinarySource { bytes, index: 0 }
    }
}

impl<'de> BinarySource<'de> for BytesBinarySource<'de> {
    type Mark = usize;

    #[inline(always)]
    fn mark(&mut self) -> io::Result<Self::Mark> {
        Ok(self.index)
    }

    #[inline(always)]
    fn restore(&mut self, mark: &Self::Mark) -> io::Result<()> {
        self.index = *mark;
        Ok(())
    }

    #[inline(always)]
    fn skip(&mut self) -> io::Result<()> {
        if self.index >= self.bytes.len() {
            unreachable!();
        }
        self.index += 1;
        Ok(())
    }

    #[inline(always)]
    fn peek(&mut self) -> io::Result<u8> {
        if self.index >= self.bytes.len() {
            Err(io_eof())
        } else {
            Ok(self.bytes[self.index])
        }
    }

    #[inline(always)]
    fn readbytes(&mut self, count: usize) -> io::Result<Cow<'de, [u8]>> {
        if self.index + count > self.bytes.len() {
            Err(io_eof())
        } else {
            let bs = &self.bytes[self.index..self.index + count];
            self.index += count;
            Ok(Cow::Borrowed(bs))
        }
    }

    #[inline(always)]
    fn readbytes_into(&mut self, bs: &mut [u8]) -> io::Result<()> {
        let count = bs.len();
        if self.index + count > self.bytes.len() {
            Err(io_eof())
        } else {
            bs.copy_from_slice(&self.bytes[self.index..self.index + count]);
            self.index += count;
            Ok(())
        }
    }
}

/// A combination of a [Reader] with presets governing its operation.
pub struct ConfiguredReader<'de, N: NestedValue, R: Reader<'de, N>> {
    /// The underlying [Reader].
    pub reader: R,
    /// Configuration as to whether to include or discard annotations while reading.
    pub read_annotations: bool,
    phantom: PhantomData<&'de N>,
}

impl<'de, N: NestedValue, R: Reader<'de, N>> ConfiguredReader<'de, N, R> {
    /// Constructs a [ConfiguredReader] based on the given `reader`.
    pub fn new(reader: R) -> Self {
        reader.configured(true)
    }

    /// Updates the `read_annotations` field of `self`.
    pub fn set_read_annotations(&mut self, read_annotations: bool) {
        self.read_annotations = read_annotations
    }

    /// Retrieve the next parseable value, treating end-of-input as an error.
    ///
    /// Delegates directly to [Reader::demand_next].
    pub fn demand_next(&mut self) -> io::Result<N> {
        self.reader.demand_next(self.read_annotations)
    }
}

impl<'de, N: NestedValue, R: Reader<'de, N>> std::iter::Iterator for ConfiguredReader<'de, N, R> {
    type Item = io::Result<N>;
    fn next(&mut self) -> Option<Self::Item> {
        match self.reader.next(self.read_annotations) {
            Err(e) => Some(Err(e)),
            Ok(None) => None,
            Ok(Some(v)) => Some(Ok(v)),
        }
    }
}