Skip to main content

sluice/parser/
mod.rs

1use std::fmt;
2use std::io::Read;
3
4use crate::domain::document::Document;
5use crate::domain::header::IndexHeader;
6use crate::error::ParseError;
7
8pub(crate) mod document;
9pub(crate) mod primitives;
10
11use document::read_document;
12use primitives::{read_i64, read_u8, try_read_field_count};
13
14/// Streaming parser over a Maven Central index binary stream.
15///
16/// The reader owns the underlying byte source and yields one `Document` per
17/// call to `Iterator::next`. A clean EOF on the next document's field-count
18/// read terminates the iterator; a mid-document EOF is reported as a
19/// `TruncatedDocument` error.
20///
21/// `IndexReader` is I/O-neutral: wrap your gzip decoder outside the crate and
22/// pass the resulting `Read` in.
23pub struct IndexReader<R: Read> {
24    inner: R,
25    header: IndexHeader,
26}
27
28impl<R: Read> IndexReader<R> {
29    /// Read and validate the 9-byte header, returning a parser ready to stream
30    /// documents.
31    ///
32    /// # Errors
33    ///
34    /// Returns [`ParseError::UnsupportedVersion`] if the version byte is not
35    /// `0x01`, or [`ParseError::Io`] if the header cannot be read.
36    pub fn new(mut inner: R) -> Result<Self, ParseError> {
37        let version = read_u8(&mut inner)?;
38        if version != 0x01 {
39            return Err(ParseError::UnsupportedVersion(version));
40        }
41        let ts = read_i64(&mut inner)?;
42        let timestamp_millis = if ts == -1 { None } else { Some(ts) };
43        Ok(Self {
44            inner,
45            header: IndexHeader {
46                version,
47                timestamp_millis,
48            },
49        })
50    }
51
52    /// Return the parsed header for this stream.
53    #[must_use]
54    pub fn header(&self) -> &IndexHeader {
55        &self.header
56    }
57}
58
59impl<R: Read> fmt::Debug for IndexReader<R> {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        f.debug_struct("IndexReader")
62            .field("header", &self.header)
63            .finish_non_exhaustive()
64    }
65}
66
67impl<R: Read> Iterator for IndexReader<R> {
68    type Item = Result<Document, ParseError>;
69
70    fn next(&mut self) -> Option<Self::Item> {
71        match try_read_field_count(&mut self.inner) {
72            Ok(None) => None,
73            Ok(Some(field_count)) => Some(read_document(&mut self.inner, field_count)),
74            Err(e) => Some(Err(e)),
75        }
76    }
77}