Skip to main content

webc/v3/write/
volumes.rs

1use std::{
2    collections::BTreeMap,
3    fmt,
4    fs::File,
5    io::{BufReader, Read, Write},
6    path::{Path, PathBuf},
7};
8
9use bytes::{BufMut, Bytes, BytesMut};
10use sha2::{Digest, Sha256};
11
12use crate::{
13    DirectoryFromPathError, PathSegment, readable_bytes,
14    v3::{Span, Tag, Timestamps},
15};
16
17/// The main parts of a volume.
18#[derive(Debug, Clone, PartialEq, Eq, Hash)]
19pub(crate) struct VolumeParts {
20    pub(crate) header: Bytes,
21    pub(crate) data: Bytes,
22}
23
24impl VolumeParts {
25    pub(crate) fn serialize(dir: Directory<'_>) -> Result<Self, std::io::Error> {
26        let serializer = Serializer::default();
27        serializer.serialize(dir)
28    }
29
30    /// Finish serializing this into a named volume.
31    pub(crate) fn volume(&self, name: &str) -> Bytes {
32        let VolumeParts { header, data } = self;
33
34        let mut buffer = BytesMut::with_capacity(
35            header.len() + data.len() + name.len() + 3 * std::mem::size_of::<u64>(),
36        );
37
38        buffer.put_u64_le(name.len().try_into().unwrap());
39        buffer.extend_from_slice(name.as_bytes());
40        buffer.put_u64_le(header.len().try_into().unwrap());
41        buffer.extend_from_slice(header);
42        buffer.put_u64_le(data.len().try_into().unwrap());
43        buffer.extend_from_slice(data);
44
45        buffer.freeze()
46    }
47
48    /// Finish serializing this into the atoms volume.
49    ///
50    /// The atoms section is almost identical to a normal volume
51    /// ([`VolumeParts::volume()`]), except it doesn't have a name.
52    pub(crate) fn atoms(&self) -> Bytes {
53        let VolumeParts { header, data } = self;
54
55        let mut buffer =
56            BytesMut::with_capacity(header.len() + data.len() + 2 * std::mem::size_of::<u64>());
57
58        buffer.put_u64_le(header.len().try_into().unwrap());
59        buffer.extend_from_slice(header);
60        buffer.put_u64_le(data.len().try_into().unwrap());
61        buffer.extend_from_slice(data);
62
63        buffer.freeze()
64    }
65}
66
67#[derive(Debug, Default, Clone, PartialEq)]
68struct Serializer {
69    header: BytesMut,
70    data: BytesMut,
71}
72
73impl Serializer {
74    fn serialize(mut self, dir: Directory<'_>) -> Result<VolumeParts, std::io::Error> {
75        self.serialize_directory(dir)?;
76        let Serializer { header, data } = self;
77
78        Ok(VolumeParts {
79            header: header.freeze(),
80            data: data.freeze(),
81        })
82    }
83
84    fn serialize_dir_entry(
85        &mut self,
86        dir_entry: DirEntry<'_>,
87    ) -> Result<(Span, [u8; 32]), std::io::Error> {
88        match dir_entry {
89            DirEntry::Dir(d) => self.serialize_directory(d),
90            DirEntry::File(f) => self.serialize_file(f),
91        }
92    }
93
94    fn serialize_directory(
95        &mut self,
96        dir: Directory<'_>,
97    ) -> Result<(Span, [u8; 32]), std::io::Error> {
98        const DUMMY_U64: [u8; std::mem::size_of::<u64>()] =
99            [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xc0, 0xde];
100
101        let overall_start = self.header.len();
102        self.header.put_u8(Tag::Directory.as_u8());
103
104        // We'll fill in the length directory length field at the end
105        let directory_length_ix = self.header.len();
106        self.header.extend(DUMMY_U64);
107
108        // Add the timestamps of the directory
109        let timestamps_start = self.header.len();
110        dir.timestamps.write_to(&mut self.header)?;
111
112        // Add hash of the directory
113        let mut hasher = sha2::Sha256::new();
114        let hash_start = self.header.len();
115        self.header.extend_from_slice(&[0; 32]);
116
117        let mut offset_fields = BTreeMap::new();
118
119        for name in dir.children.keys() {
120            // each entry in a directory is stored as (offset, hash, name_length, name)
121
122            // Note: we don't actually know where the entry will be placed in
123            // the header, so let's write a dummy value and come back later.
124            let ix = self.header.len();
125
126            // offset
127            self.header.extend(DUMMY_U64);
128            // hash
129            self.header.extend_from_slice(&[0; 32]);
130            // name_length
131            self.header
132                .extend(u64::try_from(name.len()).unwrap().to_le_bytes());
133            // name
134            self.header.extend_from_slice(name.as_bytes());
135
136            offset_fields.insert(name.clone(), ix);
137        }
138
139        let end = self.header.len();
140        let span = Span::new(overall_start, end - overall_start);
141
142        // Patch up the directory length
143        let length = u64::try_from(end - timestamps_start).unwrap().to_le_bytes();
144        self.header[directory_length_ix..directory_length_ix + length.len()]
145            .copy_from_slice(&length);
146
147        for (name, entry) in dir.children {
148            let (Span { start, .. }, hash) = self.serialize_dir_entry(entry)?;
149
150            // Now we've serialized the entry, we can fill in its offset
151            let offset_field = offset_fields[&name];
152            let offset = u64::try_from(start).unwrap().to_le_bytes();
153            self.header[offset_field..offset_field + offset.len()].copy_from_slice(&offset);
154
155            let hash_offset = offset_field + offset.len();
156
157            self.header[hash_offset..hash_offset + hash.len()].copy_from_slice(hash.as_slice());
158
159            // hash of a directory is the hash of all of its entries
160            hasher.update(hash);
161        }
162
163        // Patch up the directory hash
164        let hash: [u8; 32] = hasher.finalize().into();
165        self.header[hash_start..hash_start + hash.len()].copy_from_slice(&hash);
166
167        Ok((span, hash))
168    }
169
170    fn serialize_file(&mut self, file: FileEntry<'_>) -> Result<(Span, [u8; 32]), std::io::Error> {
171        // First, we serialize the file's data
172        let data_start = self.data.len();
173        let mut cs = Sha256ChecksumWriter::new(BufMut::writer(&mut self.data));
174        file.content.write_to(&mut cs)?;
175        let checksum = cs.finish();
176        let data_end = self.data.len();
177
178        // Now, we can update the header with its metadata
179        let start = self.header.len();
180
181        // File tag
182        self.header.put_u8(Tag::File.as_u8());
183        // Data range
184        self.header
185            .extend(u64::try_from(data_start).unwrap().to_le_bytes());
186        self.header
187            .extend(u64::try_from(data_end).unwrap().to_le_bytes());
188        self.header.extend(checksum);
189        file.timestamps.write_to(&mut self.header)?;
190        let end = self.header.len();
191
192        Ok((Span::new(start, end - start), checksum))
193    }
194}
195
196struct Sha256ChecksumWriter<W> {
197    writer: W,
198    state: Sha256,
199}
200
201impl<W> Sha256ChecksumWriter<W> {
202    fn new(writer: W) -> Self {
203        Sha256ChecksumWriter {
204            writer,
205            state: Sha256::default(),
206        }
207    }
208
209    fn finish(self) -> [u8; 32] {
210        self.state.finalize().into()
211    }
212}
213
214impl<W: Write> Write for Sha256ChecksumWriter<W> {
215    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
216        let bytes_written = self.writer.write(buf)?;
217        self.state.update(&buf[..bytes_written]);
218        Ok(bytes_written)
219    }
220
221    fn flush(&mut self) -> std::io::Result<()> {
222        Ok(())
223    }
224}
225
226/// A directory in a volume.
227#[non_exhaustive]
228#[derive(Debug, Default)]
229pub struct Directory<'a> {
230    /// The items in this directory.
231    pub children: BTreeMap<PathSegment, DirEntry<'a>>,
232    pub timestamps: Timestamps,
233}
234
235impl<'a> Directory<'a> {
236    pub fn new(children: BTreeMap<PathSegment, DirEntry<'a>>, timestamps: Timestamps) -> Self {
237        Directory {
238            children,
239            timestamps,
240        }
241    }
242
243    pub const fn with_timestamps(timestamps: Timestamps) -> Self {
244        Directory {
245            children: BTreeMap::new(),
246            timestamps,
247        }
248    }
249}
250
251impl<'a> Extend<(PathSegment, DirEntry<'a>)> for Directory<'a> {
252    fn extend<T: IntoIterator<Item = (PathSegment, DirEntry<'a>)>>(&mut self, iter: T) {
253        self.children.extend(iter)
254    }
255}
256
257impl Directory<'static> {
258    /// Load a [`Directory`] from a directory on disk.
259    pub fn from_path(directory: impl AsRef<Path>) -> Result<Self, std::io::Error> {
260        let directory = directory.as_ref();
261
262        let mut children: BTreeMap<PathSegment, DirEntry<'_>> = BTreeMap::new();
263
264        for entry in directory.read_dir()? {
265            let entry = entry?;
266            let path = entry.path();
267
268            let name = match path
269                .strip_prefix(directory)
270                .expect("The path was derived from our directory")
271                .to_str()
272            {
273                Some(s) => s.parse().unwrap(),
274                None => continue,
275            };
276
277            let file_type = entry.file_type()?;
278            if file_type.is_dir() {
279                let dir = Directory::from_path(&path)?;
280                children.insert(name, DirEntry::Dir(dir));
281            } else {
282                children.insert(name, DirEntry::File(FileEntry::from_path(path)?));
283            }
284        }
285
286        let meta = directory.metadata()?;
287        let timestamps = Timestamps::from_metadata(&meta)?;
288
289        Ok(Directory {
290            children,
291            timestamps,
292        })
293    }
294
295    pub fn from_path_with_walker(
296        directory: impl AsRef<Path>,
297        walker: ignore::Walk,
298    ) -> Result<Self, DirectoryFromPathError> {
299        crate::from_path_with_walker::from_path_with_walker::<Directory<'_>, DirEntry<'_>>(
300            directory,
301            walker,
302            |dir_path| {
303                let meta = dir_path.metadata()?;
304                let timestamps = Timestamps::from_metadata(&meta)?;
305
306                Ok(Self {
307                    children: BTreeMap::new(),
308                    timestamps,
309                })
310            },
311            DirEntry::Dir,
312            |path| {
313                let file = FileEntry::from_path(path)?;
314                Ok(DirEntry::File(file))
315            },
316            |dir| &mut dir.children,
317            |entry| match entry {
318                DirEntry::Dir(d) => Some(d),
319                DirEntry::File(_) => None,
320            },
321        )
322    }
323}
324
325/// A single entry in a directory.
326#[derive(Debug)]
327pub enum DirEntry<'a> {
328    /// A [`Directory`].
329    Dir(Directory<'a>),
330    /// A [`FileEntry`].
331    File(FileEntry<'a>),
332}
333
334impl<'a> From<Directory<'a>> for DirEntry<'a> {
335    fn from(value: Directory<'a>) -> Self {
336        DirEntry::Dir(value)
337    }
338}
339
340impl<'a, F> From<F> for DirEntry<'a>
341where
342    FileEntry<'a>: From<F>,
343{
344    fn from(value: F) -> Self {
345        DirEntry::File(value.into())
346    }
347}
348
349#[derive(Debug)]
350pub struct FileEntry<'a> {
351    timestamps: Timestamps,
352    pub(crate) content: FileContent<'a>,
353}
354
355impl<'a> FileEntry<'a> {
356    pub fn borrowed(bytes: &'a [u8], timestamps: Timestamps) -> FileEntry<'a> {
357        FileEntry {
358            timestamps,
359            content: FileContent::Borrowed(bytes),
360        }
361    }
362
363    pub fn owned(bytes: impl Into<Bytes>, timestamps: Timestamps) -> FileEntry<'a> {
364        FileEntry {
365            timestamps,
366            content: FileContent::Owned(bytes.into()),
367        }
368    }
369
370    pub fn reader(reader: Box<dyn Read>, timestamps: Timestamps) -> FileEntry<'a> {
371        FileEntry {
372            timestamps,
373            content: FileContent::Reader(reader),
374        }
375    }
376
377    /// Create a new [`FileEntry`] from a file on disk.
378    ///
379    /// To avoid having too many open file handles at a time, the file will only
380    /// be opened on the first read.
381    ///
382    /// Beware of [time-of-check to time-of-use][toctou] issues. This function
383    /// checks that the file exists when first called, but permissions may
384    /// change or the file may still be deleted/moved/replaced before the first
385    /// read.
386    ///
387    /// [toctou]: https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use
388    pub fn from_path(path: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
389        struct LazyReader {
390            path: PathBuf,
391            reader: Option<BufReader<File>>,
392        }
393        impl Read for LazyReader {
394            fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
395                let r = match &mut self.reader {
396                    Some(r) => r,
397                    None => {
398                        let f = File::open(&self.path)?;
399                        self.reader.insert(BufReader::new(f))
400                    }
401                };
402
403                r.read(buf)
404            }
405        }
406
407        let path = path.into();
408        let meta = path.metadata()?;
409
410        let timestamps = Timestamps::from_metadata(&meta)?;
411        let reader = Box::new(LazyReader { path, reader: None });
412
413        Ok(FileEntry::reader(reader, timestamps))
414    }
415}
416
417/// Some file-like object which can be written to a WEBC file.
418pub(crate) enum FileContent<'a> {
419    /// Bytes borrowed from somewhere else.
420    Borrowed(&'a [u8]),
421    /// Owned bytes.
422    Owned(Bytes),
423    /// A readable object.
424    Reader(Box<dyn Read>),
425}
426
427impl<'a> fmt::Debug for FileContent<'a> {
428    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
429        match self {
430            FileContent::Borrowed(b) => f
431                .debug_tuple("Borrowed")
432                .field(&readable_bytes::readable_bytes(b))
433                .finish(),
434            FileContent::Owned(b) => f
435                .debug_tuple("Owned")
436                .field(&readable_bytes::readable_bytes(b))
437                .finish(),
438            FileContent::Reader(_) => f.debug_tuple("Reader").finish(),
439        }
440    }
441}
442
443impl FileContent<'_> {
444    fn write_to(self, mut writer: impl Write) -> Result<(), std::io::Error> {
445        match self {
446            FileContent::Borrowed(slice) => writer.write_all(slice),
447            FileContent::Owned(bytes) => writer.write_all(&bytes),
448            FileContent::Reader(mut reader) => {
449                std::io::copy(&mut reader, &mut writer)?;
450                Ok(())
451            }
452        }
453    }
454}
455
456impl<'a> From<&'a [u8]> for FileEntry<'a> {
457    fn from(value: &'a [u8]) -> Self {
458        FileEntry::borrowed(value, Timestamps::default())
459    }
460}
461
462impl<'a, const N: usize> From<&'a [u8; N]> for FileEntry<'a> {
463    fn from(value: &'a [u8; N]) -> Self {
464        FileEntry::borrowed(value, Timestamps::default())
465    }
466}
467
468impl From<Vec<u8>> for FileEntry<'_> {
469    fn from(value: Vec<u8>) -> Self {
470        FileEntry::owned(value, Timestamps::default())
471    }
472}
473
474impl<const N: usize> From<[u8; N]> for FileEntry<'_> {
475    fn from(value: [u8; N]) -> Self {
476        FileEntry::owned(value.to_vec(), Timestamps::default())
477    }
478}
479
480impl From<Bytes> for FileEntry<'_> {
481    fn from(value: Bytes) -> Self {
482        FileEntry::owned(value, Timestamps::default())
483    }
484}
485
486#[cfg(test)]
487mod tests {
488    use std::time::{Duration, SystemTime};
489
490    use tempfile::tempdir;
491
492    use crate::utils::{length_field, sha256};
493
494    use super::*;
495
496    #[test]
497    fn from_path_with_ignore_ignores_hidden_and_ignored() {
498        let root = tempdir().unwrap();
499
500        let _hidden = std::fs::File::create(root.path().join(".hidden")).unwrap();
501
502        let _git = std::fs::File::create(root.path().join(".git")).unwrap();
503
504        let mut gitignore = std::fs::File::create(root.path().join(".ignore")).unwrap();
505        gitignore.write_all(b"ignore_me").unwrap();
506
507        let mut wasmerignore = std::fs::File::create(root.path().join(".wasmerignore")).unwrap();
508        wasmerignore.write_all(b"ignore_me_too").unwrap();
509
510        std::fs::File::create(root.path().join("ignore_me")).unwrap();
511        std::fs::File::create(root.path().join("ignore_me_too")).unwrap();
512
513        std::fs::File::create(root.path().join("include_me")).unwrap();
514
515        std::fs::create_dir(root.path().join("subdir")).unwrap();
516        std::fs::File::create(root.path().join("subdir/ignore_me")).unwrap();
517        std::fs::File::create(root.path().join("subdir/ignore_me_too")).unwrap();
518        std::fs::File::create(root.path().join("subdir/include_me_too")).unwrap();
519
520        std::fs::create_dir(root.path().join("subdir/othersub")).unwrap();
521        std::fs::File::create(root.path().join("subdir/othersub/include_me_please")).unwrap();
522
523        let walker = ignore::WalkBuilder::new(root.path())
524            .ignore(true)
525            .require_git(true)
526            .git_ignore(true)
527            .git_exclude(true)
528            .parents(true)
529            .hidden(true)
530            .follow_links(false)
531            .add_custom_ignore_filename(".wasmerignore")
532            .build();
533
534        let dir = Directory::from_path_with_walker(root.path(), walker).unwrap();
535
536        assert_eq!(dir.children.len(), 2);
537        assert!(matches!(
538            dir.children.get(&"include_me".try_into().unwrap()),
539            Some(DirEntry::File(_))
540        ));
541
542        let subdir = match dir.children.get(&"subdir".try_into().unwrap()).unwrap() {
543            DirEntry::Dir(d) => d,
544            DirEntry::File(_) => panic!("Expected dir"),
545        };
546        assert_eq!(subdir.children.len(), 2);
547        assert!(matches!(
548            subdir.children.get(&"include_me_too".try_into().unwrap()),
549            Some(DirEntry::File(_))
550        ));
551
552        let othersub = match subdir
553            .children
554            .get(&"othersub".try_into().unwrap())
555            .unwrap()
556        {
557            DirEntry::Dir(d) => d,
558            DirEntry::File(_) => panic!("Expected dir"),
559        };
560        assert_eq!(othersub.children.len(), 1);
561        assert!(matches!(
562            othersub
563                .children
564                .get(&"include_me_please".try_into().unwrap()),
565            Some(DirEntry::File(_))
566        ));
567    }
568
569    #[test]
570    fn write_empty_volume() {
571        let dir = Directory::default();
572
573        let hash: [u8; 32] = sha2::Sha256::new().finalize().into();
574
575        let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
576
577        assert_bytes_eq!(
578            header,
579            bytes! {
580                // ==== Header section ====
581                // ---- root directory ----
582                Tag::Directory,
583                56_u64.to_le_bytes(),
584                Timestamps::default(),
585                hash,
586            }
587        );
588        assert_bytes_eq!(
589            data,
590            bytes! {
591                // ==== data section ====
592                // (empty)
593            }
594        );
595    }
596
597    #[test]
598    fn write_empty_volume_with_non_zero_timestamps() {
599        let timestamps = Timestamps {
600            modified: SystemTime::UNIX_EPOCH + Duration::from_secs(2_000_000_000),
601        };
602
603        let hash: [u8; 32] = sha2::Sha256::new().finalize().into();
604
605        let dir = Directory {
606            children: BTreeMap::new(),
607            timestamps,
608        };
609
610        let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
611
612        assert_bytes_eq!(
613            header,
614            bytes! {
615                // ==== Header section ====
616                // ---- root directory ----
617                Tag::Directory,
618                56_u64.to_le_bytes(),
619
620                // timestamps
621                // accessed
622                0_000_000_000_u64.to_le_bytes(),
623                // modified
624                2_000_000_000_u64.to_le_bytes(),
625                // created
626                0_000_000_000_u64.to_le_bytes(),
627
628                hash,
629            }
630        );
631        assert_bytes_eq!(
632            data,
633            bytes! {
634                // ==== data section ====
635                // (empty)
636            }
637        );
638    }
639
640    #[test]
641    fn volume_with_single_file() {
642        let file3_txt = b"Hello, World!";
643        let timestamps = Timestamps {
644            modified: SystemTime::UNIX_EPOCH + Duration::from_secs(2_000_000_000),
645        };
646        let file_entry = FileEntry::borrowed(file3_txt.as_slice(), timestamps);
647
648        let children = BTreeMap::from_iter(Some((
649            "file3.txt".parse().unwrap(),
650            DirEntry::from(file_entry),
651        )));
652
653        let dir = Directory {
654            children,
655            timestamps: Timestamps::default(),
656        };
657
658        let file_hash: [u8; 32] = sha2::Sha256::digest(file3_txt).into();
659        let dir_hash: [u8; 32] = sha2::Sha256::digest(file_hash).into();
660
661        let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
662
663        assert_bytes_eq!(
664            header,
665            bytes! {
666                // ---- Root directory ----
667                Tag::Directory,
668                // overall length of this directory section
669                113_u64.to_le_bytes(),
670                // timestamps
671                Timestamps::default(),
672                // hash
673                dir_hash,
674                // entries
675                122_u64.to_le_bytes(),
676                file_hash,
677                length_field("file3.txt"),
678                "file3.txt",
679
680                // ---- /file3.txt ----
681                Tag::File,
682                0_u64.to_le_bytes(),
683                length_field(file3_txt),
684                sha256(file3_txt),
685                timestamps,
686            }
687        );
688        assert_bytes_eq!(data, file3_txt);
689    }
690
691    #[test]
692    fn volume_that_just_contains_files() {
693        let dir = dir_map! {
694            "file1.txt" => b"first",
695            "xyz.txt" => b"second",
696            "file2.txt" => b"third",
697        };
698
699        let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
700
701        let file1_hash: [u8; 32] = sha2::Sha256::digest(b"first").into();
702        let xyz_hash: [u8; 32] = sha2::Sha256::digest(b"second").into();
703        let file2_hash: [u8; 32] = sha2::Sha256::digest(b"third").into();
704
705        let mut dir_hasher = sha2::Sha256::new();
706        dir_hasher.update(file1_hash);
707        dir_hasher.update(file2_hash);
708        dir_hasher.update(xyz_hash);
709        let dir_hash: [u8; 32] = dir_hasher.finalize().into();
710
711        // Note: the initial order was "file1.txt", "xyz.txt", and "file2.txt",
712        // but the BTreeMap implicitly sorted them
713        assert_bytes_eq!(
714            header,
715            bytes! {
716                // ---- Root directory ----
717                Tag::Directory,
718                225_u64.to_le_bytes(),
719                // timestamps
720                Timestamps::default(),
721                // hash
722                dir_hash,
723                // first entry
724                234_u64.to_le_bytes(),
725                file1_hash,
726                length_field("file1.txt"),
727                "file1.txt",
728                // second entry
729                307_u64.to_le_bytes(),
730                file2_hash,
731                length_field("file2.txt"),
732                "file2.txt",
733                // third entry
734                380_u64.to_le_bytes(),
735                xyz_hash,
736                length_field("xyz.txt"),
737                "xyz.txt",
738
739                // ---- /file1.txt ----
740                Tag::File,
741                0_u64.to_le_bytes(),
742                5_u64.to_le_bytes(),
743                sha256("first"),
744                Timestamps::default(),
745                // --- "file2.txt" ---
746                Tag::File,
747                5_u64.to_le_bytes(),
748                10_u64.to_le_bytes(),
749                sha256("third"),
750                Timestamps::default(),
751                // --- "xyz.txt" ---
752                Tag::File,
753                10_u64.to_le_bytes(),
754                16_u64.to_le_bytes(),
755                sha256("second"),
756                Timestamps::default(),
757            }
758        );
759
760        assert_bytes_eq!(data, b"firstthirdsecond");
761    }
762
763    #[test]
764    fn header_with_single_directory() {
765        let dir = dir_map! {
766            "root" => dir_map!(),
767        };
768
769        let VolumeParts { header, .. } = VolumeParts::serialize(dir).unwrap();
770
771        let empty_hash: [u8; 32] = sha2::Sha256::new().finalize().into();
772        let dir_hash: [u8; 32] = sha2::Sha256::digest(empty_hash).into();
773
774        let expected = bytes! {
775            // ---- root directory ----
776            Tag::Directory,
777            108_u64.to_le_bytes(),
778            // timestamps
779            Timestamps::default(),
780            // hash
781            dir_hash,
782            // first entry
783            117_u64.to_le_bytes(),
784            empty_hash,
785            length_field("root"),
786            "root",
787
788            // ---- "/root" ----
789            Tag::Directory,
790            56_u64.to_le_bytes(),
791            // timestamps
792            Timestamps::default(),
793            // hash
794            empty_hash,
795
796        };
797        assert_bytes_eq!(header, expected);
798    }
799
800    #[test]
801    fn volume_with_nested_empty_directories() {
802        let dir = dir_map! {
803            "root" => dir_map! {
804                "nested" => dir_map! { },
805            },
806        };
807
808        let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
809
810        let empty_hash: [u8; 32] = sha2::Sha256::new().finalize().into();
811        let root_hash: [u8; 32] = sha2::Sha256::digest(empty_hash).into();
812        let dir_hash: [u8; 32] = sha2::Sha256::digest(root_hash).into();
813
814        assert_bytes_eq!(
815            header,
816            bytes! {
817                // ---- Root directory ----
818                Tag::Directory,
819                108_u64.to_le_bytes(),
820                // timestamps
821                Timestamps::default(),
822                // hash
823                dir_hash,
824                // first entry
825                117_u64.to_le_bytes(),
826                root_hash,
827                length_field("root"),
828                "root",
829
830                // ---- "/root" ----
831                Tag::Directory,
832                110_u64.to_le_bytes(),
833                // timestamps
834                Timestamps::default(),
835                root_hash,
836                // first entry
837                236_u64.to_le_bytes(),
838                empty_hash,
839                length_field("nested"),
840                "nested",
841
842                // ---- "/root/nested" ----
843                Tag::Directory,
844                56_u64.to_le_bytes(),
845                // timestamps
846                Timestamps::default(),
847                empty_hash,
848            }
849        );
850        assert!(data.is_empty());
851    }
852
853    #[test]
854    fn kitchen_sink() {
855        let xyz_txt = [0xaa; 10];
856        let file1_txt = [0xbb; 5];
857        let file2_txt = [0xcc; 8];
858        let file3_txt = [0xdd; 2];
859        let dir = dir_map! {
860            "a" => dir_map! {
861                "b" => dir_map! {
862                    "xyz.txt" => &xyz_txt,
863                    "file1.txt" => &file1_txt,
864                },
865                "c" => dir_map! {
866                    "d" => dir_map!(),
867                    "file2.txt" => &file2_txt,
868                },
869            },
870            "file3.txt" => &file3_txt,
871        };
872
873        let empty_hash: [u8; 32] = sha2::Sha256::new().finalize().into();
874
875        let xyz_hash: [u8; 32] = sha2::Sha256::digest(xyz_txt).into();
876        let file1_hash: [u8; 32] = sha2::Sha256::digest(file1_txt).into();
877        let file2_hash: [u8; 32] = sha2::Sha256::digest(file2_txt).into();
878        let file3_hash: [u8; 32] = sha2::Sha256::digest(file3_txt).into();
879
880        let mut b_hasher = sha2::Sha256::new();
881        b_hasher.update(file1_hash);
882        b_hasher.update(xyz_hash);
883        let b_hash: [u8; 32] = b_hasher.finalize().into();
884
885        let mut c_hasher = sha2::Sha256::new();
886        c_hasher.update(empty_hash);
887        c_hasher.update(file2_hash);
888        let c_hash: [u8; 32] = c_hasher.finalize().into();
889
890        let mut a_hasher = sha2::Sha256::new();
891        a_hasher.update(b_hash);
892        a_hasher.update(c_hash);
893        let a_hash: [u8; 32] = a_hasher.finalize().into();
894
895        let mut dir_hasher = sha2::Sha256::new();
896        dir_hasher.update(a_hash);
897        dir_hasher.update(file3_hash);
898        let dir_hash: [u8; 32] = dir_hasher.finalize().into();
899
900        let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
901
902        assert_bytes_eq!(
903            header,
904            bytes! {
905                    // ---- Root directory ----
906                    Tag::Directory,
907                    162_u64.to_le_bytes(),
908                    // timestamps
909                    Timestamps::default(),
910                    // hash
911                    dir_hash,
912                    // first entry
913                    171_u64.to_le_bytes(),
914                    a_hash,
915                    length_field("a"),
916                    "a",
917                    // second entry
918                    966_u64.to_le_bytes(),
919                    file3_hash,
920                    length_field("file3.txt"),
921                    "file3.txt",
922
923                    // ---- "/a" ----
924                    Tag::Directory,
925                    154_u64.to_le_bytes(),
926                    // timestamps
927                    Timestamps::default(),
928                    // hash
929                    a_hash,
930                    // first entry
931                    334_u64.to_le_bytes(),
932                    b_hash,
933                    length_field("b"),
934                    "b",
935                    // second entry
936                    657_u64.to_le_bytes(),
937                    c_hash,
938                    length_field("c"),
939                    "c",
940
941                    // ---- "/a/b/" ----
942                    Tag::Directory,
943                    168_u64.to_le_bytes(),
944                    // timestamps
945                    Timestamps::default(),
946                    // hash
947                    b_hash,
948                    // first entry
949                    511_u64.to_le_bytes(),
950                    file1_hash,
951                    length_field("file1.txt"),
952                    "file1.txt",
953                    // second entry
954                    584_u64.to_le_bytes(),
955                    xyz_hash,
956                    length_field("xyz.txt"),
957                    "xyz.txt",
958
959                    // ---- "/a/b/file1.txt" ----
960                    Tag::File,
961                    0_u64.to_le_bytes(),
962                    5_u64.to_le_bytes(),
963                    sha256(file1_txt),
964                    // timestamps
965                    Timestamps::default(),
966
967                    // ---- "/a/b/xyz.txt" ----
968                    Tag::File,
969                    5_u64.to_le_bytes(),
970                    15_u64.to_le_bytes(),
971                    sha256(xyz_txt),
972                    // timestamps
973                    Timestamps::default(),
974
975                    // ---- "/a/c/" ----
976                    Tag::Directory,
977                    162_u64.to_le_bytes(),
978                    // timestamps
979                    Timestamps::default(),
980                    // hash
981                    c_hash,
982                    // First entry
983                    828_u64.to_le_bytes(),
984                    empty_hash,
985                    length_field("d"),
986                    "d",
987                    // Second entry
988                    893_u64.to_le_bytes(),
989                    file2_hash,
990                    length_field("file2.txt"),
991                    "file2.txt",
992
993                    // ---- "/a/c/d" ----
994                    Tag::Directory,
995                    56_u64.to_le_bytes(),
996                    // timestamps
997                    Timestamps::default(),
998                    // hash
999                    empty_hash,
1000
1001                    // ---- "/a/c/file2.txt" ----
1002                    Tag::File,
1003                    15_u64.to_le_bytes(),
1004                    23_u64.to_le_bytes(),
1005                    sha256(file2_txt),
1006                    // timestamps
1007                    Timestamps::default(),
1008
1009                    // ---- "file3.txt" ----
1010                    Tag::File,
1011                    23_u64.to_le_bytes(),
1012                    25_u64.to_le_bytes(),
1013                    sha256(file3_txt),
1014                    // timestamps
1015                    Timestamps::default(),
1016            }
1017        );
1018        assert_bytes_eq!(
1019            data,
1020            [file1_txt.as_slice(), &xyz_txt, &file2_txt, &file3_txt].concat()
1021        );
1022    }
1023
1024    #[test]
1025    fn load_files_from_directory() {
1026        let temp = tempfile::tempdir().unwrap();
1027        let to = temp.path().join("path").join("to");
1028        let first = to.join("first.txt");
1029        let second = to.join("second.md");
1030        std::fs::create_dir_all(&to).unwrap();
1031        std::fs::write(first, "first".as_bytes()).unwrap();
1032        std::fs::write(second, "# Second".as_bytes()).unwrap();
1033
1034        let dir = Directory::from_path(temp.path()).unwrap();
1035
1036        let expected = dir_map! {
1037            "path" => dir_map! {
1038                "to" => dir_map! {
1039                    "first.txt" => b"first",
1040                    "second.md" => b"# Second",
1041                }
1042            }
1043        };
1044
1045        assert_directories_match(dir, expected);
1046    }
1047
1048    fn assert_directories_match(mut left: Directory<'_>, mut right: Directory<'_>) {
1049        let left_keys: Vec<_> = left.children.keys().cloned().collect();
1050        let right_keys: Vec<_> = right.children.keys().cloned().collect();
1051        assert_eq!(left_keys, right_keys);
1052
1053        for key in &left_keys {
1054            match (
1055                left.children.remove(key).unwrap(),
1056                right.children.remove(key).unwrap(),
1057            ) {
1058                (DirEntry::Dir(left), DirEntry::Dir(right)) => {
1059                    assert_directories_match(left, right)
1060                }
1061                (DirEntry::File(left), DirEntry::File(right)) => {
1062                    assert_files_match(left, right, key)
1063                }
1064                (DirEntry::Dir(_), DirEntry::File(_)) | (DirEntry::File(_), DirEntry::Dir(_)) => {
1065                    panic!()
1066                }
1067            }
1068        }
1069    }
1070
1071    fn assert_files_match(left: FileEntry<'_>, right: FileEntry<'_>, key: &str) {
1072        let mut left_buffer = Vec::new();
1073        left.content.write_to(&mut left_buffer).unwrap();
1074        let mut right_buffer = Vec::new();
1075        right.content.write_to(&mut right_buffer).unwrap();
1076
1077        assert_bytes_eq!(
1078            left_buffer,
1079            right_buffer,
1080            "Entries for \"{key}\" don't match"
1081        );
1082    }
1083}