Skip to main content

webc/v3/read/
owned.rs

1use std::{
2    collections::BTreeMap,
3    fs::File,
4    io::{Read, Seek},
5    path::Path,
6    sync::OnceLock,
7};
8
9use bytes::Buf;
10use sha2::Digest;
11use shared_buffer::OwnedBuffer;
12
13use crate::{
14    DetectError, Magic, Version,
15    metadata::Manifest,
16    v3::{
17        Index, Span, Tag,
18        read::{
19            AtomsSection, ManifestSection, Section, SectionError, VolumeSection,
20            dir_entry::DirEntryError, scanner::InvalidSize, sections::SectionConversionError,
21        },
22    },
23};
24
25/// A reader for owned data that is already in memory.
26#[derive(Debug, Clone, PartialEq)]
27pub struct OwnedReader {
28    buffer: OwnedBuffer,
29    index: Index,
30    manifest: Manifest,
31    atoms_hash: [u8; 32],
32    atoms: BTreeMap<String, ([u8; 32], OwnedBuffer)>,
33    hash: OnceLock<[u8; 32]>,
34}
35
36impl OwnedReader {
37    pub fn parse(webc: impl Into<OwnedBuffer>) -> Result<Self, OwnedReaderError> {
38        let webc: OwnedBuffer = webc.into();
39
40        // Make sure we're actually reading a WEBC file we can support
41        let version = crate::detect(webc.clone().reader())?;
42        if version != Version::V3 {
43            return Err(OwnedReaderError::UnsupportedVersion(version));
44        }
45        let index = read_index(webc.clone())?;
46
47        // We extract the manifest and atoms eagerly because that's what most
48        // people will want.
49        let manifest =
50            parse_section(&webc, index.manifest.span).and_then(|section: ManifestSection| {
51                section.manifest().map_err(OwnedReaderError::Manifest)
52            })?;
53        let atoms_section: AtomsSection = parse_section(&webc, index.atoms.span)?;
54        let atoms = atoms_section
55            .iter()
56            .map(|result| result.map(|(s, h, b)| (s.to_string(), (h, b))))
57            .collect::<Result<BTreeMap<String, ([u8; 32], OwnedBuffer)>, DirEntryError>>()
58            .map_err(OwnedReaderError::Atoms)?;
59
60        Ok(OwnedReader {
61            buffer: webc,
62            index,
63            atoms_hash: *atoms_section.get_hash(),
64            atoms,
65            manifest,
66            hash: OnceLock::new(),
67        })
68    }
69
70    pub fn from_path(path: impl AsRef<Path>) -> Result<Self, OwnedReaderError> {
71        let buffer = OwnedBuffer::mmap(path.as_ref())?;
72        OwnedReader::parse(buffer)
73    }
74
75    /// Try to parse a [`File`] into an [`OwnedReader`].
76    ///
77    /// This will try to memory-map the file if supported by the OS, otherwise
78    /// it will read the entire file into memory.
79    pub fn from_file(mut file: File) -> Result<Self, OwnedReaderError> {
80        if let Ok(buffer) = OwnedBuffer::from_file(&file) {
81            return OwnedReader::parse(buffer);
82        }
83
84        // Fall back to the allocating version
85        file.rewind().map_err(OwnedReaderError::Io)?;
86        let mut contents = Vec::new();
87        file.read_to_end(&mut contents)
88            .map_err(OwnedReaderError::Io)?;
89
90        OwnedReader::parse(contents)
91    }
92
93    pub fn webc_hash(&self) -> Option<[u8; 32]> {
94        Some(
95            *self
96                .hash
97                .get_or_init(|| sha2::Sha256::digest(self.buffer.as_slice()).into()),
98        )
99    }
100
101    pub fn manifest(&self) -> &Manifest {
102        &self.manifest
103    }
104
105    pub fn index(&self) -> &Index {
106        &self.index
107    }
108
109    pub fn atoms_hash(&self) -> [u8; 32] {
110        self.atoms_hash
111    }
112
113    pub fn atom_names(&self) -> impl Iterator<Item = &str> + '_ {
114        self.atoms.keys().map(|s| s.as_str())
115    }
116
117    pub fn iter_atoms(&self) -> impl Iterator<Item = (&str, [u8; 32], &OwnedBuffer)> + '_ {
118        self.atoms.iter().map(|(s, (h, b))| (s.as_str(), *h, b))
119    }
120
121    pub fn get_atom(&self, name: &str) -> Option<&([u8; 32], OwnedBuffer)> {
122        self.atoms.get(name)
123    }
124
125    pub fn volume_names(&self) -> impl Iterator<Item = &str> + '_ {
126        self.index.volumes.keys().map(|s| s.as_str())
127    }
128
129    pub fn iter_volumes(
130        &self,
131    ) -> impl Iterator<Item = Result<(&str, VolumeSection), OwnedReaderError>> {
132        self.index.volumes.iter().map(|(name, entry)| {
133            let volume: VolumeSection = parse_section(&self.buffer, entry.span)?;
134            Ok((name.as_str(), volume))
135        })
136    }
137
138    pub fn get_volume(&self, name: &str) -> Result<VolumeSection, OwnedReaderError> {
139        let entry = self
140            .index
141            .volumes
142            .get(name)
143            .ok_or_else(|| OwnedReaderError::NoSuchVolume {
144                name: name.to_string(),
145            })?;
146
147        parse_section(&self.buffer, entry.span)
148    }
149}
150
151fn parse_section<T>(buffer: &OwnedBuffer, span: Span) -> Result<T, OwnedReaderError>
152where
153    T: TryFrom<Section, Error = SectionConversionError>,
154{
155    let (tag, hash, data) = get_section(buffer, span)?;
156
157    let section = Section::parse(tag, Some(hash), data.clone())
158        .map_err(|error| OwnedReaderError::Section { error, tag, data })?;
159
160    T::try_from(section).map_err(OwnedReaderError::from)
161}
162
163fn get_section(
164    buffer: &OwnedBuffer,
165    span: Span,
166) -> Result<(u8, [u8; 32], OwnedBuffer), OwnedReaderError> {
167    get(buffer, span).and_then(read_raw_section)
168}
169
170fn get(buffer: &OwnedBuffer, span: Span) -> Result<OwnedBuffer, OwnedReaderError> {
171    if buffer.len() < span.end() {
172        Err(OwnedReaderError::IndexOutOfBounds {
173            offset: span.end(),
174            bytes_available: buffer.len(),
175        })
176    } else {
177        Ok(buffer.slice(span.start..span.end()))
178    }
179}
180
181fn read_raw_index_section(mut buffer: OwnedBuffer) -> Result<(u8, OwnedBuffer), OwnedReaderError> {
182    const TAG_AND_LEN: usize = std::mem::size_of::<u8>() + std::mem::size_of::<u64>();
183
184    if buffer.len() < TAG_AND_LEN {
185        return Err(OwnedReaderError::Io(std::io::Error::from(
186            std::io::ErrorKind::UnexpectedEof,
187        )));
188    }
189
190    let tag = buffer.get_u8();
191    let length: usize = buffer.get_u64_le().try_into()?;
192
193    if buffer.len() < length {
194        return Err(OwnedReaderError::Io(std::io::Error::from(
195            std::io::ErrorKind::UnexpectedEof,
196        )));
197    }
198
199    let data = buffer.slice(..length);
200    buffer.advance(length);
201
202    Ok((tag, data))
203}
204
205fn read_raw_section(
206    mut buffer: OwnedBuffer,
207) -> Result<(u8, [u8; 32], OwnedBuffer), OwnedReaderError> {
208    const TAG_AND_LEN: usize = std::mem::size_of::<u8>() + 32 + std::mem::size_of::<u64>();
209
210    if buffer.len() < TAG_AND_LEN {
211        return Err(OwnedReaderError::Io(std::io::Error::from(
212            std::io::ErrorKind::UnexpectedEof,
213        )));
214    }
215
216    let tag = buffer.get_u8();
217    let mut hash = [0u8; 32];
218    buffer.copy_to_slice(&mut hash);
219    let length: usize = buffer.get_u64_le().try_into()?;
220
221    if buffer.len() < length {
222        return Err(OwnedReaderError::Io(std::io::Error::from(
223            std::io::ErrorKind::UnexpectedEof,
224        )));
225    }
226
227    let data = buffer.slice(..length);
228    buffer.advance(length);
229
230    Ok((tag, hash, data))
231}
232
233fn read_index(mut webc: OwnedBuffer) -> Result<Index, OwnedReaderError> {
234    // Skip the magic bytes and version number
235    const HEADER_LENGTH: usize = std::mem::size_of::<Magic>() + std::mem::size_of::<Version>();
236    webc.advance(HEADER_LENGTH);
237
238    let (tag, data) = read_raw_index_section(webc)?;
239
240    match Section::parse(tag, None, data.clone()) {
241        Ok(Section::Index(index_reader)) => {
242            let index = index_reader.index().map_err(OwnedReaderError::Index)?;
243            Ok(index)
244        }
245        Ok(_) => Err(OwnedReaderError::UnexpectedSection {
246            expected_tag: Tag::Index,
247            actual_tag: tag,
248            offset: HEADER_LENGTH,
249        }),
250        Err(error) => Err(OwnedReaderError::Section { error, tag, data }),
251    }
252}
253
254/// Errors that may be emitted by [`OwnedReader`].
255#[derive(Debug, thiserror::Error)]
256#[non_exhaustive]
257pub enum OwnedReaderError {
258    #[error(transparent)]
259    Io(#[from] std::io::Error),
260    #[error("Invalid magic bytes, {}", _0.escape_ascii())]
261    InvalidMagic(Magic),
262    #[error("The version, {_0}, isn't supported")]
263    UnsupportedVersion(Version),
264    #[error("Expected to find a {expected_tag} at offset {offset:#x}, but found a \"{}\"", Tag::display(*actual_tag))]
265    UnexpectedSection {
266        expected_tag: Tag,
267        actual_tag: u8,
268        offset: usize,
269    },
270    #[error(
271        "Tried to access memory at offset {offset}, but only {bytes_available} bytes are available"
272    )]
273    IndexOutOfBounds {
274        offset: usize,
275        bytes_available: usize,
276    },
277    #[error("Unable to parse the index as CBOR")]
278    Index(ciborium::de::Error<std::io::Error>),
279    #[error("Unable to parse the manifest as CBOR")]
280    Manifest(ciborium::de::Error<std::io::Error>),
281    #[error("Unable to decode a section")]
282    Section {
283        #[source]
284        error: SectionError,
285        tag: u8,
286        data: OwnedBuffer,
287    },
288    #[error("Found the wrong section")]
289    IncorrectSection(#[from] SectionConversionError),
290    #[error("Volume not found: \"{name}\"")]
291    NoSuchVolume { name: String },
292    #[error("Unable to determine the atoms")]
293    Atoms(DirEntryError),
294    #[error("Unable to detect the WEBC file's version number")]
295    Detect(#[from] DetectError),
296    #[error(transparent)]
297    Mmap(#[from] shared_buffer::MmapError),
298    #[error(transparent)]
299    IntegerConversion(#[from] std::num::TryFromIntError),
300}
301
302impl From<InvalidSize> for OwnedReaderError {
303    fn from(value: InvalidSize) -> Self {
304        let InvalidSize { expected, actual } = value;
305        OwnedReaderError::IndexOutOfBounds {
306            offset: expected,
307            bytes_available: actual,
308        }
309    }
310}