webc/v2/read/
sections.rs

1use std::ops::Range;
2
3use bytes::Buf;
4use shared_buffer::OwnedBuffer;
5
6use crate::{
7    metadata::Manifest,
8    readable_bytes::readable_bytes,
9    v2::{
10        read::{
11            dir_entry::{DirEntryError, FileEntry},
12            volume_header::{FileMetadata, HeaderEntry, VolumeHeader, VolumeHeaderError},
13            Directory,
14        },
15        Index, Span, Tag,
16    },
17    PathSegmentError, PathSegments, ToPathSegments,
18};
19
20/// Errors that may occur while parsing a [`Section`].
21#[derive(Debug, thiserror::Error)]
22#[non_exhaustive]
23pub enum SectionError {
24    #[error("The tag doesn't indicate the start of a section")]
25    UnsupportedSection,
26    #[error("Unable to parse the section as CBOR")]
27    Cbor(#[from] ciborium::value::Error),
28    #[error(
29        "Unable to parse \"{}\" as a UTF8 volume name",
30        name.escape_ascii(),
31    )]
32    InvalidVolumeName {
33        error: std::str::Utf8Error,
34        name: OwnedBuffer,
35    },
36    #[error("Invalid section length, expected at least {expected} bytes but only {available} were available")]
37    InvalidSectionLength { expected: usize, available: usize },
38    #[error(transparent)]
39    Overflow(#[from] std::num::TryFromIntError),
40}
41
42/// The different sections in a webc file.
43#[derive(Debug, Clone, PartialEq)]
44#[non_exhaustive]
45pub enum Section {
46    Index(IndexSection),
47    Manifest(ManifestSection),
48    Atoms(AtomsSection),
49    Volume(VolumeSection),
50}
51
52impl Section {
53    pub fn parse(tag: u8, data: OwnedBuffer) -> Result<Section, SectionError> {
54        let tag = Tag::from_u8(tag).ok_or(SectionError::UnsupportedSection)?;
55
56        match tag {
57            Tag::Index => Ok(IndexSection(data).into()),
58            Tag::Manifest => Ok(ManifestSection(data).into()),
59            Tag::Atoms => {
60                let atoms = AtomsSection::parse(data)?;
61                Ok(atoms.into())
62            }
63            Tag::Volume => {
64                let volume = VolumeSection::parse(data)?;
65                Ok(volume.into())
66            }
67            _ => Err(SectionError::UnsupportedSection),
68        }
69    }
70
71    pub fn as_index(&self) -> Option<&IndexSection> {
72        if let Self::Index(v) = self {
73            Some(v)
74        } else {
75            None
76        }
77    }
78
79    pub fn as_manifest(&self) -> Option<&ManifestSection> {
80        if let Self::Manifest(v) = self {
81            Some(v)
82        } else {
83            None
84        }
85    }
86
87    pub fn as_atoms(&self) -> Option<&AtomsSection> {
88        if let Self::Atoms(v) = self {
89            Some(v)
90        } else {
91            None
92        }
93    }
94
95    pub fn as_volume(&self) -> Option<&VolumeSection> {
96        if let Self::Volume(v) = self {
97            Some(v)
98        } else {
99            None
100        }
101    }
102}
103
104impl From<IndexSection> for Section {
105    fn from(value: IndexSection) -> Self {
106        Section::Index(value)
107    }
108}
109
110impl TryFrom<Section> for IndexSection {
111    type Error = SectionConversionError;
112
113    fn try_from(value: Section) -> Result<Self, Self::Error> {
114        match value {
115            Section::Index(section) => Ok(section),
116            _ => Err(SectionConversionError),
117        }
118    }
119}
120
121impl From<ManifestSection> for Section {
122    fn from(value: ManifestSection) -> Self {
123        Section::Manifest(value)
124    }
125}
126
127impl TryFrom<Section> for ManifestSection {
128    type Error = SectionConversionError;
129
130    fn try_from(value: Section) -> Result<Self, Self::Error> {
131        match value {
132            Section::Manifest(section) => Ok(section),
133            _ => Err(SectionConversionError),
134        }
135    }
136}
137
138impl From<AtomsSection> for Section {
139    fn from(value: AtomsSection) -> Self {
140        Section::Atoms(value)
141    }
142}
143
144impl TryFrom<Section> for AtomsSection {
145    type Error = SectionConversionError;
146
147    fn try_from(value: Section) -> Result<Self, Self::Error> {
148        match value {
149            Section::Atoms(section) => Ok(section),
150            _ => Err(SectionConversionError),
151        }
152    }
153}
154
155impl From<VolumeSection> for Section {
156    fn from(value: VolumeSection) -> Self {
157        Section::Volume(value)
158    }
159}
160
161impl TryFrom<Section> for VolumeSection {
162    type Error = SectionConversionError;
163
164    fn try_from(value: Section) -> Result<Self, Self::Error> {
165        match value {
166            Section::Volume(section) => Ok(section),
167            _ => Err(SectionConversionError),
168        }
169    }
170}
171
172/// The error type returned when [`TryFrom`] can't convert a [`Section`] to the
173/// desired type.
174#[derive(Debug, Copy, Clone, PartialEq, thiserror::Error)]
175#[error("Unable to convert the section to the desired type")]
176pub struct SectionConversionError;
177
178fn length_delimited_section(
179    mut buffer: OwnedBuffer,
180) -> Result<(OwnedBuffer, OwnedBuffer), SectionError> {
181    if buffer.len() < std::mem::size_of::<u64>() {
182        return Err(SectionError::InvalidSectionLength {
183            expected: std::mem::size_of::<u64>(),
184            available: buffer.len(),
185        });
186    }
187    let length: usize = buffer.get_u64_le().try_into()?;
188
189    if buffer.len() < length {
190        return Err(SectionError::InvalidSectionLength {
191            expected: length,
192            available: buffer.len(),
193        });
194    }
195    let head = buffer.slice(..length);
196    buffer.advance(length);
197
198    Ok((head, buffer))
199}
200
201/// A section containing the file's [`Index`].
202#[derive(Clone, PartialEq)]
203pub struct IndexSection(OwnedBuffer);
204
205impl IndexSection {
206    /// Lazily parse the section into an [`Index`].
207    pub fn index(&self) -> Result<Index, ciborium::de::Error<std::io::Error>> {
208        // Note: we need to add some special handling for the index section because
209        // it may or may not contain trailing padding bytes.
210        let index = ciborium::de::from_reader(self.0.as_slice())?;
211        // Note: explicitly don't call the de.end() method.
212        Ok(index)
213    }
214}
215
216impl std::fmt::Debug for IndexSection {
217    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
218        f.debug_tuple("IndexSection")
219            .field(&readable_bytes(&self.0))
220            .finish()
221    }
222}
223
224/// A section containing the file's [`Manifest`].
225#[derive(Clone, PartialEq)]
226pub struct ManifestSection(OwnedBuffer);
227
228impl ManifestSection {
229    /// Get a reference to the bytes this section contains.
230    pub fn bytes(&self) -> &OwnedBuffer {
231        &self.0
232    }
233
234    /// Deserialize into the canonical [`Manifest`] format.
235    ///
236    /// This is just shorthand for calling [`ManifestSection::deserialize()`]
237    /// with the right types.
238    pub fn manifest(&self) -> Result<Manifest, ciborium::de::Error<std::io::Error>> {
239        self.deserialize()
240    }
241
242    /// Deserialize the manifest section into a custom type.
243    pub fn deserialize<T>(&self) -> Result<T, ciborium::de::Error<std::io::Error>>
244    where
245        for<'a> T: serde::Deserialize<'a>,
246    {
247        ciborium::from_reader(self.0.as_slice())
248    }
249}
250
251impl std::fmt::Debug for ManifestSection {
252    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
253        f.debug_tuple("ManifestSection")
254            .field(&readable_bytes(&self.0))
255            .finish()
256    }
257}
258
259/// A section containing the atoms volume.
260#[derive(Clone, PartialEq)]
261pub struct AtomsSection {
262    header: OwnedBuffer,
263    data: OwnedBuffer,
264    data_offset: usize,
265}
266
267impl AtomsSection {
268    fn parse(buffer: OwnedBuffer) -> Result<Self, SectionError> {
269        const OFFSET_INTO_VOLUME: usize = std::mem::size_of::<u8>() + std::mem::size_of::<u64>();
270        let initial_length = buffer.len();
271
272        let (header, rest) = length_delimited_section(buffer)?;
273
274        let (data, rest) = length_delimited_section(rest)?;
275        let data_offset = OFFSET_INTO_VOLUME + initial_length - rest.len() - data.len();
276
277        Ok(AtomsSection {
278            header,
279            data,
280            data_offset,
281        })
282    }
283
284    pub fn get_atom(&self, atom_name: &str) -> Result<OwnedBuffer, LookupError> {
285        lookup_file(self.header(), &self.data, [atom_name])
286    }
287
288    pub fn get_atom_with_offset(&self, atom_name: &str) -> Result<OwnedBuffer, LookupError> {
289        lookup_file(self.header(), &self.data, [atom_name])
290    }
291
292    /// Iterate over all the atoms in this [`AtomsSection`].
293    pub fn iter(&self) -> impl Iterator<Item = Result<(&str, OwnedBuffer), DirEntryError>> {
294        self.iter_with_offsets()
295            .map(|result| result.map(|(name, data, _)| (name, data)))
296    }
297
298    /// Iterate over all the atoms in this [`AtomsSection`], including their
299    /// offsets relative to the start of the volume.
300    pub fn iter_with_offsets(
301        &self,
302    ) -> impl Iterator<Item = Result<(&str, OwnedBuffer, Span), DirEntryError>> {
303        let data_offset = self.data_offset;
304
305        self.iter_entries().map(move |result| {
306            result
307                .map_err(DirEntryError::from)
308                .and_then(|(name, meta)| {
309                    let entry = FileEntry::from_metadata(meta, data_offset, self.data.clone())?;
310                    let data = entry.bytes().clone();
311                    let span = Span::new(
312                        self.data_offset + meta.start_offset,
313                        meta.end_offset - meta.start_offset,
314                    );
315                    Ok((name, data, span))
316                })
317        })
318    }
319
320    fn iter_entries(
321        &self,
322    ) -> impl Iterator<Item = Result<(&str, FileMetadata), VolumeHeaderError>> {
323        let header = self.header();
324        FallibleIterator::new(header.root_directory().map(|dir| dir.entries())).filter_map(
325            |result| match result {
326                Ok((name, HeaderEntry::File(file))) => Some(Ok((name, file))),
327                Ok(_) => None,
328                Err(e) => Some(Err(e)),
329            },
330        )
331    }
332
333    /// The lazily parsed volume header.
334    pub(crate) fn header(&self) -> VolumeHeader<'_> {
335        VolumeHeader::new(&self.header)
336    }
337}
338
339impl std::fmt::Debug for AtomsSection {
340    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
341        let AtomsSection {
342            header,
343            data,
344            data_offset,
345        } = self;
346        f.debug_struct("AtomsSection")
347            .field("header", &readable_bytes(header))
348            .field("data", &readable_bytes(data))
349            .field("data_offset", data_offset)
350            .finish()
351    }
352}
353
354pub(crate) enum FallibleIterator<I, T, E>
355where
356    I: Iterator<Item = Result<T, E>>,
357{
358    Ok(I),
359    Err(Option<E>),
360}
361
362impl<I, T, E> FallibleIterator<I, T, E>
363where
364    I: Iterator<Item = Result<T, E>>,
365{
366    pub(crate) fn new(result: Result<I, E>) -> Self {
367        match result {
368            Ok(iter) => FallibleIterator::Ok(iter),
369            Err(err) => FallibleIterator::Err(Some(err)),
370        }
371    }
372}
373
374impl<I, T, E> Iterator for FallibleIterator<I, T, E>
375where
376    I: Iterator<Item = Result<T, E>>,
377{
378    type Item = I::Item;
379
380    fn next(&mut self) -> Option<Self::Item> {
381        match self {
382            FallibleIterator::Ok(iter) => iter.next(),
383            FallibleIterator::Err(e) => e.take().map(Err),
384        }
385    }
386}
387
388/// A volume section containing a directory tree.
389#[derive(Clone, PartialEq)]
390pub struct VolumeSection {
391    name: String,
392    header: OwnedBuffer,
393    data: OwnedBuffer,
394    data_offset: usize,
395}
396
397impl VolumeSection {
398    /// Parse the payload of a volume section, starting after the initial tag
399    /// and length.
400    fn parse(buffer: OwnedBuffer) -> Result<Self, SectionError> {
401        const OFFSET_INTO_VOLUME: usize = std::mem::size_of::<u8>() + std::mem::size_of::<u64>();
402        let initial_length = buffer.len();
403
404        let (name, rest) = length_delimited_section(buffer)?;
405        let name = std::str::from_utf8(&name)
406            .map(|s| s.to_string())
407            .map_err(|error| SectionError::InvalidVolumeName { error, name })?;
408
409        let (header, rest) = length_delimited_section(rest)?;
410
411        let (data, rest) = length_delimited_section(rest)?;
412        let data_offset = OFFSET_INTO_VOLUME + initial_length - rest.len() - data.len();
413
414        Ok(VolumeSection {
415            name,
416            header,
417            data,
418            data_offset,
419        })
420    }
421
422    pub fn name(&self) -> &str {
423        &self.name
424    }
425
426    /// The lazily parsed volume header.
427    pub(crate) fn header(&self) -> VolumeHeader<'_> {
428        VolumeHeader::new(&self.header)
429    }
430
431    pub fn lookup_file(&self, path: impl ToPathSegments) -> Result<OwnedBuffer, LookupError> {
432        lookup_file(self.header(), &self.data, path)
433    }
434
435    pub fn root(&self) -> Result<Directory<'_>, VolumeHeaderError> {
436        self.header()
437            .root_directory()
438            .map(|root| Directory::new(root, self.data_offset, self.data.clone()))
439    }
440}
441
442impl std::fmt::Debug for VolumeSection {
443    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444        let VolumeSection {
445            name,
446            header,
447            data,
448            data_offset,
449        } = self;
450
451        f.debug_struct("VolumeSection")
452            .field("name", &name)
453            .field("header", &readable_bytes(header))
454            .field("data", &readable_bytes(data))
455            .field("data_offset", data_offset)
456            .finish()
457    }
458}
459
460fn lookup_file(
461    header: VolumeHeader<'_>,
462    data: &OwnedBuffer,
463    path: impl ToPathSegments,
464) -> Result<OwnedBuffer, LookupError> {
465    let path_segments = path.to_path_segments()?;
466
467    match header.find(&path_segments)? {
468        Some(HeaderEntry::File(offsets)) => {
469            let range = offsets.start_offset..offsets.end_offset;
470
471            if range.end > data.len() {
472                return Err(LookupError::InvalidItemLocation {
473                    path: path_segments,
474                    range,
475                });
476            }
477
478            Ok(data.slice(range))
479        }
480        Some(HeaderEntry::Directory(_)) => Err(LookupError::IsADirectory),
481        None => Err(LookupError::NotFound),
482    }
483}
484
485/// Errors that may occur while looking up an item in a volume.
486#[derive(Debug, thiserror::Error)]
487#[non_exhaustive]
488pub enum LookupError {
489    #[error("Is a directory")]
490    IsADirectory,
491    #[error("Not found")]
492    NotFound,
493    #[error("Unable to parse the volume header")]
494    Header(#[from] VolumeHeaderError),
495    #[error("Invalid path")]
496    InvalidPath(#[from] PathSegmentError),
497    #[error("The metadata for \"{path}\" says the file is at {range:?}, which is out of bounds")]
498    InvalidItemLocation {
499        path: PathSegments,
500        range: Range<usize>,
501    },
502}
503
504#[cfg(test)]
505mod tests {
506    use std::collections::BTreeMap;
507
508    use crate::{
509        utils::{length_field, sha256},
510        v2::{Checksum, IndexEntry, Signature, Span},
511    };
512
513    use super::*;
514
515    #[test]
516    fn read_an_index_section() {
517        let index = Index {
518            manifest: IndexEntry {
519                span: Span::new(1, 2),
520                checksum: Checksum::none(),
521            },
522            atoms: IndexEntry {
523                span: Span::new(3, 4),
524                checksum: Checksum::sha256([0xaa; 32]),
525            },
526            volumes: BTreeMap::new(),
527            signature: Signature::none(),
528        };
529        let mut bytes = vec![];
530        ciborium::into_writer(&index, &mut bytes).unwrap();
531        let bytes: OwnedBuffer = bytes.into();
532
533        let section = Section::parse(Tag::Index.as_u8(), bytes.clone()).unwrap();
534
535        assert_eq!(section, Section::Index(IndexSection(bytes)));
536        assert_eq!(section.as_index().unwrap().index().unwrap(), index);
537    }
538
539    #[test]
540    fn read_the_kitchen_sink_volume_section() {
541        let xyz_txt = [0xaa; 10];
542        let file1_txt = [0xbb; 5];
543        let file2_txt = [0xcc; 8];
544        let file3_txt = [0xdd; 2];
545        let raw = bytes! {
546            // ==== Name ====
547            length_field("volume"),
548            "volume",
549
550            // ==== Header section ====
551            // header length
552            407_u64.to_le_bytes(),
553
554            // ---- Root directory ----
555            Tag::Directory,
556            42_u64.to_le_bytes(),
557            // first entry
558            51_u64.to_le_bytes(),
559            length_field("a"),
560            "a",
561            // second entry
562            358_u64.to_le_bytes(),
563            length_field("file3.txt"),
564            "file3.txt",
565
566            // ---- "/a" ----
567            Tag::Directory,
568            34_u64.to_le_bytes(),
569            // first entry
570            94_u64.to_le_bytes(),
571            length_field("b"),
572            "b",
573            // second entry
574            249_u64.to_le_bytes(),
575            length_field("c"),
576            "c",
577
578            // ---- "/a/b/" ----
579            Tag::Directory,
580            48_u64.to_le_bytes(),
581            // first entry
582            151_u64.to_le_bytes(),
583            length_field("file1.txt"),
584            "file1.txt",
585            // second entry
586            200_u64.to_le_bytes(),
587            length_field("xyz.txt"),
588            "xyz.txt",
589
590            // ---- "/a/b/file1.txt" ----
591            Tag::File,
592            0_u64.to_le_bytes(),
593            5_u64.to_le_bytes(),
594            sha256(file1_txt),
595
596            // ---- "/a/b/xyz.txt" ----
597            Tag::File,
598            5_u64.to_le_bytes(),
599            15_u64.to_le_bytes(),
600            sha256(xyz_txt),
601
602            // ---- "/a/c/" ----
603            Tag::Directory,
604            42_u64.to_le_bytes(),
605            // First entry
606            300_u64.to_le_bytes(),
607            length_field("d"),
608            "d",
609            // Second entry
610            309_u64.to_le_bytes(),
611            length_field("file2.txt"),
612            "file2.txt",
613
614            // ---- "/a/c/d" ----
615            Tag::Directory,
616            0_u64.to_le_bytes(),
617
618            // ---- "/a/c/file2.txt" ----
619            Tag::File,
620            15_u64.to_le_bytes(),
621            23_u64.to_le_bytes(),
622            sha256(file2_txt),
623
624            // ---- "file3.txt" ----
625            Tag::File,
626            23_u64.to_le_bytes(),
627            25_u64.to_le_bytes(),
628            sha256(file3_txt),
629
630            // ==== Data section ====
631            // data length
632            25_u64.to_le_bytes(),
633            // Raw file data
634            file1_txt,
635            xyz_txt,
636            file2_txt,
637            file3_txt,
638        };
639
640        let volume = VolumeSection::parse(raw.into()).unwrap();
641
642        let root_items: Vec<_> = volume
643            .root()
644            .unwrap()
645            .entries()
646            .filter_map(|result| result.ok())
647            .map(|(name, _)| name)
648            .collect();
649        assert_eq!(root_items, &["a", "file3.txt"]);
650        assert_eq!(
651            volume
652                .lookup_file(["a", "b", "file1.txt"])
653                .unwrap()
654                .as_ref(),
655            file1_txt,
656        );
657        assert_eq!(
658            volume
659                .lookup_file(["a", "c", "file2.txt"])
660                .unwrap()
661                .as_ref(),
662            file2_txt,
663        );
664        assert_eq!(
665            volume.lookup_file(["file3.txt"]).unwrap().as_ref(),
666            file3_txt
667        );
668        assert_eq!(
669            volume.lookup_file(["a", "b", "xyz.txt"]).unwrap().as_ref(),
670            xyz_txt
671        );
672    }
673}