webc/v1/
mod.rs

1//! Parsing code for v1 of the WEBC format.
2
3use std::{
4    borrow::Cow,
5    collections::{BTreeMap, BTreeSet},
6    fmt,
7    io::{Read, Seek},
8    ops::Deref,
9    path::{Component, Path, PathBuf},
10    result,
11};
12
13use crate::indexmap::IndexMap;
14use base64::{prelude::BASE64_STANDARD, Engine};
15use bytes::Bytes;
16use serde::{Deserialize, Serialize};
17use sha2::Digest;
18use shared_buffer::OwnedBuffer;
19use url::Url;
20
21#[cfg(feature = "crypto")]
22use sequoia_openpgp::{
23    parse::stream::{DetachedVerifierBuilder, MessageLayer, MessageStructure, VerificationHelper},
24    Cert,
25};
26
27use crate::{
28    metadata::{annotations::Emscripten, Manifest, UrlOrManifest},
29    Version, MAGIC,
30};
31
32/// Container file, lazily parsed from a set of `&'data [u8]` bytes
33#[derive(Debug, Clone, PartialEq)]
34pub struct WebC<'data> {
35    /// Version of the file format
36    pub version: u64,
37    /// Parsed checksum (optional in case of no encoded checksum)
38    pub checksum: Option<Checksum>,
39    /// Parsed signature (optional if file was not signed)
40    pub signature: Option<Signature>,
41    /// Manifest of the file, see section `ยง2.3.1` of the spec
42    pub manifest: Manifest,
43    /// Executable files, indexed into one volume (`a.wasm` => `a`, `b.wasm` => `b@0.2.1`)
44    pub atoms: Volume<'data>,
45    /// Filesystem volumes: default volume name is `atom` (containing files of the current package)
46    /// and `user/package@version` for external dependencies. Every dependency can be sandboxed to only
47    /// access its own filesystem volume, not external ones.
48    pub volumes: IndexMap<String, Volume<'data>>,
49}
50
51/// Memory-mapped version of the WebC file that
52/// carries its data along the parsed `WebC<'static>`
53#[derive(Debug, Clone)]
54pub struct WebCMmap {
55    pub webc_hash: [u8; 32],
56    /// WebC file, referencing the memory-mapped backed data
57    pub webc: WebC<'static>,
58    /// Note: The `webc` field has references into this shared state, so make
59    /// sure we don't drop it prematurely.
60    #[allow(dead_code)]
61    pub(crate) buffer: OwnedBuffer,
62}
63
64impl Deref for WebCMmap {
65    type Target = WebC<'static>;
66    fn deref(&self) -> &Self::Target {
67        &self.webc
68    }
69}
70
71impl WebCMmap {
72    /// Same as `WebC::parse`, but uses a memory-mapped file
73    pub fn parse(path: impl AsRef<Path>, options: &ParseOptions) -> ReadResult<Self> {
74        let path = path.as_ref();
75
76        std::fs::File::open(path)
77            .map_err(|e| Error(e.to_string()))
78            .and_then(|f| WebCMmap::from_file(f, options))
79            .map_err(|e| Error(format!("Could not open {}: {e}", path.display())))
80    }
81
82    pub fn from_file(mut file: std::fs::File, options: &ParseOptions) -> ReadResult<Self> {
83        let mut data = Vec::new();
84        file.read_to_end(&mut data)
85            .map_err(|e| Error(format!("Failed to read file: {e}")))?;
86        file.seek(std::io::SeekFrom::Start(0))
87            .map_err(|e| Error(format!("File to seek to the start of the file: {e}")))?;
88
89        let webc_hash: [u8; 32] = sha2::Sha256::digest(data.as_slice()).into();
90
91        let buffer = OwnedBuffer::from_file(&file).map_err(|e| Error(e.to_string()))?;
92
93        let webc = WebC::parse(&buffer, options)?;
94        // Safety: transmute the lifetime away. This is unsound. See the
95        // comments in WebcOwned::parse() for more.
96        let webc: WebC<'static> = unsafe { std::mem::transmute(webc) };
97
98        Ok(Self {
99            webc_hash,
100            webc,
101            buffer,
102        })
103    }
104
105    pub fn webc_hash(&self) -> Option<[u8; 32]> {
106        Some(self.webc_hash)
107    }
108
109    pub fn as_webc_ref(&self) -> WebC<'_> {
110        self.webc.clone()
111    }
112}
113
114/// Owned version of the WebC file that carries its data
115/// along the parsed `WebC<'static>`
116#[derive(Debug, Clone)]
117pub struct WebCOwned {
118    webc_hash: [u8; 32],
119    pub webc: WebC<'static>,
120    #[allow(dead_code)]
121    pub(crate) backing_data: Bytes,
122}
123
124impl WebCOwned {
125    /// Same as `WebC::parse`, but keeps the resulting `data` in memory,
126    /// instead of referencing it
127    pub fn parse(data: impl Into<Bytes>, options: &ParseOptions) -> ReadResult<Self> {
128        let data: Bytes = data.into();
129
130        let webc_hash: [u8; 32] = sha2::Sha256::digest(&data).into();
131
132        let webc = WebC::parse(&data, options)?;
133        // Safety: We're transmuting the lifetime away here because WebCOwned is
134        // technically a self-referential struct.
135        // This is unsound because we implement Deref and make the field public
136        // and it is possible to get a reference to something inside the WebC,
137        // drop this WebCOwned, then trigger a use-after-free bug... but, fixing
138        // it would require reworking a bunch of downstream code and that's not
139        // possible at the moment.
140        let webc: WebC<'static> = unsafe { std::mem::transmute(webc) };
141        Ok(Self {
142            webc_hash,
143            webc,
144            backing_data: data,
145        })
146    }
147
148    pub fn webc_hash(&self) -> Option<[u8; 32]> {
149        Some(self.webc_hash)
150    }
151
152    pub fn as_webc_ref(&self) -> WebC<'_> {
153        self.webc.clone()
154    }
155}
156
157impl Deref for WebCOwned {
158    type Target = WebC<'static>;
159    fn deref(&self) -> &Self::Target {
160        &self.webc
161    }
162}
163
164/// The error type used within the read module.
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub struct Error(pub String);
167
168impl fmt::Display for Error {
169    #[inline]
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        f.write_str(self.0.as_str())
172    }
173}
174
175impl std::error::Error for Error {}
176
177/// The result type used within the read module.
178pub type ReadResult<T> = result::Result<T, Error>;
179
180/// Calculated checksum of the file
181#[derive(Clone, PartialEq, Eq)]
182pub struct Checksum {
183    /// (crate-internal): how many bytes of the signature
184    /// are valid, how many are padding
185    pub valid_until: usize,
186    /// Type of checksum (16 bytes long, `------------`, `sha256----------`, etc.)
187    pub chk_type: String,
188    /// Data of the checksum bytes, 256 bytes long
189    pub data: Vec<u8>,
190    /// Whether the checksum has been validated during `WebC::parse`
191    pub valid: bool,
192}
193
194#[derive(Serialize)]
195struct DisplayableChecksum {
196    valid: bool,
197    chk_type: String,
198    data: String,
199}
200
201impl fmt::Debug for DisplayableChecksum {
202    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203        let json = serde_json::to_string_pretty(self).unwrap_or_default();
204        write!(f, "{json}")
205    }
206}
207
208impl fmt::Debug for Checksum {
209    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
210        let mut clone = self.clone();
211        clone.data.truncate(self.valid_until);
212        let base64 = BASE64_STANDARD.encode(&clone.data);
213        let displayable = DisplayableChecksum {
214            valid: self.valid,
215            chk_type: self.chk_type.clone(),
216            data: base64,
217        };
218        displayable.fmt(f)
219    }
220}
221
222/// Signature of the checksum of the file, such that
223/// `verify(WebC::get_checksum(), public_key)` is valid
224#[derive(Clone, PartialEq, Eq)]
225pub struct Signature {
226    /// (crate-internal): how many bytes of the signature
227    /// are valid, how many are padding
228    pub valid_until: usize,
229    /// Data of the signature
230    pub data: Vec<u8>,
231    /// Whether the signature has been checked to be valid
232    /// during parsing
233    pub valid: bool,
234}
235
236#[derive(Serialize)]
237struct DisplayableSignature {
238    valid: bool,
239    data: String,
240}
241
242impl fmt::Debug for DisplayableSignature {
243    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
244        let json = serde_json::to_string_pretty(self).unwrap_or_default();
245        write!(f, "{json}")
246    }
247}
248
249impl fmt::Debug for Signature {
250    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
251        let mut clone = self.clone();
252        clone.data.truncate(self.valid_until);
253        let base64 = BASE64_STANDARD.encode(&clone.data);
254        let displayable = DisplayableSignature {
255            valid: self.valid,
256            data: base64,
257        };
258        displayable.fmt(f)
259    }
260}
261
262/// Filesystem volume, containing the uncompressed files in an ordered directory structure
263#[derive(Default, Clone, PartialEq, Eq)]
264pub struct Volume<'data> {
265    /// Header, storing all the offsets and file names in order
266    pub header: VolumeHeader<'data>,
267    /// Volume filesystem
268    pub data: &'data [u8],
269}
270
271impl<'data> fmt::Debug for Volume<'data> {
272    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
273        self.header.fmt(f)?;
274        write!(f, "\r\ndata: [ ... ({} bytes) ]", self.data.len())
275    }
276}
277
278/// Specifies whether an input path is a directory or a file
279/// (since this distinction can't be made from the filename alone)
280#[derive(Clone, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)]
281pub enum DirOrFile {
282    Dir(PathBuf),
283    File(PathBuf),
284}
285
286impl fmt::Display for DirOrFile {
287    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288        write!(f, "{}", self.get_path_buf().display())
289    }
290}
291
292impl DirOrFile {
293    /// Returns the `PathBuf` of the `DirOrFile`
294    pub fn get_path_buf(&self) -> &PathBuf {
295        match &self {
296            DirOrFile::Dir(d) | DirOrFile::File(d) => d,
297        }
298    }
299
300    /// Returns all the "Normal" components of the PathBuf, note that
301    /// non-normal (such as ".", symlinks, etc. components) are ignored
302    pub fn components(&self) -> Vec<String> {
303        self.get_path_buf()
304            .components()
305            .filter_map(|c| match c {
306                Component::Normal(c) => Some(c.to_str()?.to_string()),
307                _ => None,
308            })
309            .collect()
310    }
311
312    /// Returns whether the `FileOrDir` is a directory
313    #[must_use]
314    pub fn is_dir(&self) -> bool {
315        match self {
316            DirOrFile::Dir(_) => true,
317            DirOrFile::File(_) => false,
318        }
319    }
320}
321
322impl<'a> Volume<'a> {
323    /// Serialize an atom volume.
324    ///
325    /// This is essentially [`Volume::serialize_files()`], but it will modify
326    /// the input files to uphold several atom-specific invariants - namely
327    /// that each atom is addressable by its module name. This means:
328    ///
329    /// - All atoms are hoisted to the top level folder
330    /// - Extensions are removed from filenames
331    pub fn serialize_atoms(files: BTreeMap<DirOrFile, Vec<u8>>) -> Vec<u8> {
332        let mut rewritten_files = BTreeMap::new();
333
334        for (entry, data) in files {
335            // Note: we want to ignore all directories, and strip the dirname
336            // and extension from any files.
337            if let DirOrFile::File(path) = entry {
338                if let Some(filename) = path.file_name() {
339                    rewritten_files.insert(DirOrFile::File(filename.into()), data);
340                }
341            }
342        }
343
344        Volume::serialize_files(rewritten_files)
345    }
346
347    /// Create a volume from a set of initial files
348    pub fn serialize_files(files: BTreeMap<DirOrFile, Vec<u8>>) -> Vec<u8> {
349        // Input:
350        //
351        // /a/c/file.txt: [... text file with 10000 bytes ...], false
352        // /b:            [], true (empty directory)
353
354        // strip the "/" prefix from all paths
355        let files = files
356            .into_iter()
357            .map(|(path, file)| {
358                let new_path = match path.get_path_buf().strip_prefix("/") {
359                    Ok(o) => o.to_path_buf(),
360                    Err(_) => path.get_path_buf().clone(),
361                };
362
363                (new_path, (file, path.is_dir()))
364            })
365            .collect::<BTreeMap<_, _>>();
366
367        let mut volume_content = Vec::new();
368        let mut file_path_offsets = BTreeMap::new();
369
370        // all files including parent directories
371        //
372        // [/, /a, /b, /a/c, /a/c/file.txt]
373        let mut all_files = BTreeMap::new();
374        for (path, (_, is_dir)) in files.iter() {
375            all_files.insert(path.clone(), *is_dir);
376
377            let mut components = path
378                .components()
379                .filter_map(|r| match r {
380                    std::path::Component::Normal(n) => Some(n.to_str().unwrap_or("").to_string()),
381                    _ => None,
382                })
383                .collect::<Vec<_>>();
384
385            if !is_dir {
386                components.pop();
387            }
388
389            while !components.is_empty() {
390                let parent_path = components.clone().join("/");
391                let path = Path::new(&parent_path).to_path_buf();
392                all_files.insert(path, true);
393                components.pop();
394            }
395        }
396
397        for (path, (mut file, is_dir)) in files.into_iter() {
398            if !is_dir {
399                // path is a file
400                let cursor = volume_content.len();
401                let file_len = file.len();
402                volume_content.append(&mut file);
403                file_path_offsets.insert(path.clone(), (cursor, cursor + file_len));
404            }
405        }
406
407        // 0: ["/"]
408        // 1: ["/a", "/b", "/c"]
409        // 2: ["/a/c"]
410        // 3: ["/a/c/file.txt"]
411        let mut files_grouped_by_level = BTreeMap::new();
412        for (path, is_dir) in all_files.iter() {
413            let num_parents = path.ancestors().count().saturating_sub(2);
414            files_grouped_by_level
415                .entry(num_parents)
416                .or_insert_with(Vec::new)
417                .push((path.clone(), *is_dir));
418        }
419
420        // For every level: get how many items in the next directory
421        // start with the current path. Pre-sort the files in the subdirectory
422        //
423        // 1: [("a", ["a/c"]), ("b", [])]
424        // 2: [("a/c", ["a/c/file.txt"])]
425        // 3: [("a/c/file.txt", [])]
426        let mut directories_by_level_with_entrycount = BTreeMap::new();
427        for (level, paths) in files_grouped_by_level.iter() {
428            for (path, is_dir) in paths {
429                let mut files_in_directory =
430                    if files_grouped_by_level.get(&(level + 1)).is_none() || !is_dir {
431                        Vec::new()
432                    } else {
433                        files_grouped_by_level[&(level + 1)]
434                            .iter()
435                            .filter(|(next_level_entry, _next_level_is_dir)| {
436                                next_level_entry.starts_with(path)
437                            })
438                            .cloned()
439                            .collect()
440                    };
441
442                files_in_directory.sort();
443
444                directories_by_level_with_entrycount
445                    .entry(level)
446                    .or_insert_with(Vec::new)
447                    .push(((path.clone(), is_dir), files_in_directory));
448            }
449        }
450
451        // Now sort the directories levels internally
452        //
453        // 1: [("a", ["a/c"]), ("b", [])]
454        // 2: [("a/c", ["a/c/file.txt"])]
455        // 3: [("a/c/file.txt", [])]
456        for (_, paths) in directories_by_level_with_entrycount.iter_mut() {
457            paths.sort_by(|a, b| a.0.cmp(&b.0));
458        }
459
460        // Calculate offsets for the subdirectories
461        //
462        // - full file / directory name
463        // - file / directory name relative to parent
464        // - for each file in subdirectory:
465        //     - full file / directory name
466        //     - file / directory name relative to subdir
467        // - total size of subdirectory in bytes
468        //
469        // 1: (50 bytes directory level size = (2 * 24 bytes + 2 bytes for directory names), [
470        //    ("a", "a", 25),
471        //    ("b", "b", 0)
472        // ])
473        // 2: (25 bytes directory level size = (1 * 24 bytes + 1 byte for directory name), [
474        //    ("a/c", "c", 32),
475        // ])
476        // 3: (32 bytes directory level size = (1 * 24 bytes + 8 bytes for the file name), [
477        //    ("a/c/file.txt", "file.txt", 0)
478        // ])
479        let mut byte_size_of_each_level: BTreeMap<usize, _> = BTreeMap::new();
480
481        for (level, entries) in directories_by_level_with_entrycount.iter() {
482            let mut byte_size_of_level = entries
483                .iter()
484                .map(|((e, _), _)| get_parent(e))
485                .collect::<BTreeSet<_>>()
486                .len()
487                * 8;
488
489            let mut entries_subdir: Vec<(&PathBuf, String, usize)> = Vec::new();
490
491            for ((entry_name, _is_dir), subdir) in entries.iter() {
492                let entry_name_last_component = match get_last_component(entry_name) {
493                    Some(s) => s.to_string(),
494                    None => continue,
495                };
496
497                byte_size_of_level += entry_name_last_component.as_bytes().len() + 24;
498
499                let mut subdir_size = subdir
500                    .iter()
501                    .map(|(e, _)| get_parent(e))
502                    .collect::<BTreeSet<_>>()
503                    .len()
504                    * 8;
505
506                for (sub, _sub_is_dir) in subdir.iter() {
507                    // /a/c/file.txt => "file.txt"
508                    let subdir_last_component = match get_last_component(sub) {
509                        Some(s) => s.to_string(),
510                        None => continue,
511                    };
512                    subdir_size += subdir_last_component.as_bytes().len() + 24;
513                }
514
515                entries_subdir.push((entry_name, entry_name_last_component, subdir_size));
516            }
517
518            byte_size_of_each_level.insert(**level, (byte_size_of_level, entries_subdir));
519        }
520
521        // Now construct the directory level [FileEntry] bytes and encode them
522        //
523        // [
524        //    [FsEntry::Dir, "a", start: (8 + 50), end: (8 + 50) + (8 + 25)]
525        //    [FsEntry::Dir, "b", start: (8 + 50) + (8 + 25), end: (8 + 50) + (8 + 25)] (= empty directory)
526        // ],
527        // [
528        //    [FsEntry::Dir, "c", start: (8 + 50) + (8 + 25), end: (8 + 50) + (8 + 25) + (8 + 32)]
529        // ],
530        // [
531        //    [Fs::Entry::File, "file.txt", start: 0, end: 10000 ]
532        // ]
533        let mut levels = Vec::new();
534        let mut cursor = 0;
535        for (_, (dir_level_bytes, dir_level)) in byte_size_of_each_level.iter() {
536            // calculate at which byte offset in the header the next directory level will start
537            // 8 bytes reserved for directory level size
538            let next_level_start = cursor + dir_level_bytes;
539
540            let mut cur_level = Vec::new();
541            let mut next_dir_level_cursor = 0;
542
543            for (full_name, dir_or_file_name, subdir_len_bytes) in dir_level.iter() {
544                match file_path_offsets.get(&**full_name) {
545                    Some((start, end)) => {
546                        // path is a file, nothing to do
547                        cur_level.push((
548                            full_name,
549                            HeaderEntry {
550                                flags: Flags::File,
551                                text: dir_or_file_name.parse().unwrap(),
552                                offset_start: (*start as u64),
553                                offset_end: (*end as u64),
554                            },
555                        ));
556                    }
557                    None => {
558                        // path is a directory that potentially has subdirectories
559                        cur_level.push((
560                            full_name,
561                            HeaderEntry {
562                                flags: Flags::Dir,
563                                text: dir_or_file_name.parse().unwrap(),
564                                offset_start: next_level_start as u64 + next_dir_level_cursor,
565                                offset_end: next_level_start as u64
566                                    + next_dir_level_cursor
567                                    + (*subdir_len_bytes as u64),
568                            },
569                        ));
570                        next_dir_level_cursor += *subdir_len_bytes as u64;
571                    }
572                }
573            }
574
575            levels.push(cur_level);
576            cursor = next_level_start;
577        }
578
579        let mut header = Vec::new();
580
581        for fs_entries in levels.iter() {
582            let mut current_level = Vec::new();
583
584            let (mut current_dir, mut entries) = match fs_entries.first() {
585                Some((full_name, e)) => (get_parent(full_name), vec![e.clone()]),
586                None => continue,
587            };
588
589            for (full_name, entry) in fs_entries.iter().skip(1) {
590                let parent_of_current_entry = get_parent(full_name);
591
592                // each time the `current_dir` changes (for example from "/a/b/c" to "/a/b/d",
593                // we have to start a new directory section)
594                if parent_of_current_entry != current_dir {
595                    let mut buffer = Vec::new();
596                    for entry in entries.drain(..) {
597                        entry.write_to(&mut buffer);
598                    }
599                    current_level.extend(u64::try_from(buffer.len()).unwrap().to_le_bytes());
600                    current_level.extend(buffer);
601                    current_dir = parent_of_current_entry;
602                }
603                entries.push(entry.clone());
604            }
605
606            if !entries.is_empty() {
607                let mut buffer = Vec::new();
608                for entry in entries.drain(..) {
609                    entry.write_to(&mut buffer);
610                }
611                current_level.extend(u64::try_from(buffer.len()).unwrap().to_le_bytes());
612                current_level.extend(buffer);
613            }
614
615            header.extend(current_level);
616        }
617
618        let mut total = to_leb(header.len() as u64);
619        total.extend_from_slice(&header);
620        total.append(&mut volume_content);
621
622        total
623    }
624
625    /// Returns all files and directories with the corresponding `FsEntry`
626    pub fn get_all_file_and_dir_entries(
627        &'a self,
628    ) -> Result<BTreeMap<DirOrFile, FsEntry<'a>>, Error> {
629        let mut target = BTreeMap::new();
630        let mut levels = vec![(PathBuf::new(), self.header.top_level.clone())];
631
632        while !levels.is_empty() {
633            let mut next_levels = Vec::new();
634
635            for (parent_path, entries) in levels.iter() {
636                for entry in entries {
637                    let real_path = parent_path.clone().join(&*entry.text);
638                    let offset_start: usize =
639                        entry.offset_start.try_into().unwrap_or(u32::MAX as usize);
640                    let offset_end: usize =
641                        entry.offset_end.try_into().unwrap_or(u32::MAX as usize);
642
643                    match entry.fs_type {
644                        FsEntryType::File => {
645                            target.insert(DirOrFile::File(real_path.clone()), entry.clone());
646                        }
647                        FsEntryType::Dir => {
648                            let next_level_entries =
649                                FsEntry::parse(&self.header.header_data[offset_start..offset_end]);
650                            target.insert(DirOrFile::Dir(real_path.clone()), entry.clone());
651                            next_levels.push((real_path.clone(), next_level_entries));
652                        }
653                    }
654                }
655            }
656
657            levels = next_levels;
658        }
659
660        Ok(target)
661    }
662
663    /// Returns all entries in a "tree" sorted structure, i.e.
664    /// sorted in the same way you'd see the files in a tree explorer
665    pub fn get_all_file_entries_recursivesorted(&'a self) -> RecursiveFsEntryDir<'a> {
666        let mut target = RecursiveFsEntryDir {
667            name: "/".to_string(),
668            contents: Vec::new(),
669        };
670        let dir_entries = Self::specialsort_dir(&self.header.top_level[..]);
671        append_entries_recursive(self.header.header_data, dir_entries, &mut target);
672        target
673    }
674
675    /// Returns all entries in a "tree" sorted structure, i.e.
676    /// sorted in the same way you'd see the files in a tree explorer
677    pub fn get_all_file_entries_directorysorted(&'a self) -> Vec<(DirOrFile, FsEntry<'a>)> {
678        let mut target = Vec::new();
679
680        Self::specialsort_append_to_target(
681            PathBuf::new(),
682            &self.header.top_level,
683            self.header.header_data,
684            &mut target,
685        );
686
687        target
688    }
689
690    fn specialsort_append_to_target(
691        parent_path: PathBuf,
692        entries: &[FsEntry<'a>],
693        data: &'a [u8],
694        target: &mut Vec<(DirOrFile, FsEntry<'a>)>,
695    ) {
696        let dir_entries = entries
697            .iter()
698            .filter(|f| f.fs_type == FsEntryType::Dir)
699            .cloned()
700            .collect::<Vec<_>>();
701        let dir_entries = Self::specialsort_dir(&dir_entries);
702        for entry in dir_entries {
703            target.push((
704                DirOrFile::Dir(parent_path.join(entry.text.as_ref())),
705                entry.clone(),
706            ));
707            let offset_start: usize = entry.offset_start.try_into().unwrap_or(u32::MAX as usize);
708            let offset_end: usize = entry.offset_end.try_into().unwrap_or(u32::MAX as usize);
709            let fs_entry_bytes = match get_byte_slice(data, offset_start, offset_end) {
710                Some(s) => s,
711                None => {
712                    println!("cannot get byte slice");
713                    continue;
714                }
715            };
716            let dir_entries = FsEntry::parse(fs_entry_bytes);
717            Self::specialsort_append_to_target(
718                parent_path.join(entry.text.as_ref()),
719                &dir_entries,
720                data,
721                target,
722            );
723        }
724
725        let file_entries = entries
726            .iter()
727            .filter(|f| f.fs_type == FsEntryType::File)
728            .cloned()
729            .collect::<Vec<_>>();
730        let file_entries = Self::specialsort_dir(&file_entries);
731
732        for entry in file_entries {
733            target.push((
734                DirOrFile::File(parent_path.join(entry.text.as_ref())),
735                entry.clone(),
736            ));
737        }
738    }
739
740    fn specialsort_dir(entries: &[FsEntry<'a>]) -> Vec<FsEntry<'a>> {
741        use lexical_sort::lexical_cmp;
742
743        let mut dirs = entries
744            .iter()
745            .filter(|e| e.fs_type == FsEntryType::Dir)
746            .cloned()
747            .collect::<Vec<_>>();
748        dirs.sort_by(|a, b| lexical_cmp(a.text.as_ref(), b.text.as_ref()));
749
750        let mut files = entries
751            .iter()
752            .filter(|e| e.fs_type == FsEntryType::File)
753            .cloned()
754            .collect::<Vec<_>>();
755        files.sort_by(|a, b| lexical_cmp(a.text.as_ref(), b.text.as_ref()));
756
757        dirs.append(&mut files);
758        dirs
759    }
760
761    /// Generic walk function that walks recursively over the files and
762    /// calls a callback function with `self.data` on every entry.
763    pub fn walk<'b>(&'b self) -> VolumeIterator<'a, 'b> {
764        let parent = PathBuf::new();
765        VolumeIterator {
766            volume: self,
767            entries: Self::specialsort_dir(&self.header.top_level)
768                .iter()
769                .map(|v| match v.fs_type {
770                    FsEntryType::File => DirOrFile::File(parent.join(v.text.as_ref())),
771                    FsEntryType::Dir => DirOrFile::Dir(parent.join(v.text.as_ref())),
772                })
773                .collect(),
774        }
775    }
776
777    /// Returns all the files in this volume, indexed by the full path
778    /// (in unix fashion, i.e. "/", "/a", "/b/file.txt")
779    pub fn get_all_files_and_directories_with_bytes(
780        &self,
781    ) -> Result<BTreeSet<DirOrFileWithBytes<'_>>, Error> {
782        self.get_all_file_and_dir_entries()?
783            .into_iter()
784            .map(|(path, entry)| {
785                if entry.fs_type == FsEntryType::File {
786                    let offset_start: usize = entry
787                        .offset_start
788                        .try_into()
789                        .map_err(|e| Error(format!("{e}: {path}")))?;
790                    let offset_end: usize = entry
791                        .offset_end
792                        .try_into()
793                        .map_err(|e| Error(format!("{e}: {path}")))?;
794                    let data = self.data.get(offset_start..offset_end).ok_or_else(|| {
795                        Error(format!(
796                            "could not get data {offset_start}..{offset_end}: {path}"
797                        ))
798                    })?;
799                    Ok(DirOrFileWithBytes::File {
800                        path: path.get_path_buf().clone(),
801                        bytes: data,
802                    })
803                } else {
804                    Ok(DirOrFileWithBytes::Dir {
805                        path: path.get_path_buf().clone(),
806                    })
807                }
808            })
809            .collect()
810    }
811
812    /// Returns the number of files in this volume
813    pub fn count_files(&self) -> u64 {
814        let mut cursor = 0;
815        let mut num_files = 0;
816        while cursor < self.header.header_data.len() {
817            let next_directory_level = FsEntry::parse(&self.header.header_data[cursor..]);
818            num_files += next_directory_level
819                .iter()
820                .filter(|f| f.fs_type == FsEntryType::File)
821                .count() as u64;
822            cursor += FsEntry::calculate_byte_length(&next_directory_level);
823        }
824        num_files
825    }
826
827    /// Returns the number of directories in this volume
828    pub fn count_directories(&self) -> u64 {
829        let mut cursor = 0;
830        let mut num_files = 0;
831        while cursor < self.header.header_data.len() {
832            let next_directory_level = FsEntry::parse(&self.header.header_data[cursor..]);
833            num_files += next_directory_level
834                .iter()
835                .filter(|f| f.fs_type == FsEntryType::Dir)
836                .count() as u64;
837            cursor += FsEntry::calculate_byte_length(&next_directory_level);
838        }
839        num_files
840    }
841
842    pub fn list_directories(&self) -> Vec<String> {
843        self.get_all_file_and_dir_entries()
844            .unwrap_or_default()
845            .iter()
846            .filter_map(|(path, _)| match path {
847                DirOrFile::Dir(d) => Some(format!("{}", d.display())),
848                DirOrFile::File(_) => None,
849            })
850            .collect()
851    }
852
853    /// Parses a filesystem volume from a buffer of bytes
854    pub fn parse(data: &'a [u8]) -> Result<Self, Error> {
855        let leb_size = get_leb_size(data).ok_or(Error(
856            "Error parsing volume: could not read header size LEB128".to_string(),
857        ))?;
858
859        if data.len() < leb_size {
860            return Err(Error(format!(
861                "Error parsing volume: expected at least {leb_size} bytes, got {}",
862                data.len()
863            )));
864        }
865
866        let header_len: usize = from_leb(data)
867            .ok_or(Error(format!(
868                "Could not read header length from data (first {leb_size} bytes)"
869            )))?
870            .try_into()
871            .unwrap_or(usize::MAX);
872
873        if data.len() < header_len + leb_size {
874            return Err(Error(format!(
875                "Error parsing volume: expected at least {} bytes, got only {}",
876                header_len + leb_size,
877                data.len()
878            )));
879        }
880
881        let (header, data) = data[leb_size..].split_at(header_len);
882
883        let header = VolumeHeader::from_slice(header);
884
885        Ok(Self { header, data })
886    }
887
888    /// Returns file entries for `$path`
889    pub fn read_dir(&self, path: &str) -> Result<Vec<FsEntry<'a>>, Error> {
890        // removes redundant ".", "..", etc
891        let clean = path_clean::clean(path);
892
893        let mut components = Path::new(&clean)
894            .components()
895            .filter_map(|s| match s {
896                Component::Normal(s) => s.to_str(),
897                _ => None,
898            })
899            .collect::<Vec<_>>();
900
901        components.reverse();
902
903        let mut directory_to_search = self.header.top_level.clone();
904
905        while let Some(searched_directory_name) = components.pop() {
906            let found = match directory_to_search
907                .binary_search_by(|probe| (*probe.text).cmp(searched_directory_name))
908            {
909                Ok(i) => directory_to_search[i].clone(),
910                Err(_) => {
911                    return Err(Error(format!("Could not find directory {clean:?}: could not find  directory {searched_directory_name:?} (os error 2)")));
912                }
913            };
914
915            let offset_start: usize = found.offset_start.try_into().unwrap_or(u32::MAX as usize);
916            let offset_end: usize = found.offset_end.try_into().unwrap_or(u32::MAX as usize);
917
918            match found.fs_type {
919                FsEntryType::File => {
920                    return Err(Error(format!(
921                        "Could not find directory {clean:?} (os error 2)"
922                    )));
923                }
924                FsEntryType::Dir => {
925                    if offset_start == offset_end {
926                        directory_to_search = Vec::new();
927                    } else {
928                        let next_dir_level_to_decode = get_byte_slice(self.header.header_data, offset_start, offset_end)
929                        .ok_or(Error(format!("Could not find directory {clean:?}: could not decode directory {searched_directory_name:?} at byte offset {offset_start}..{offset_end} (os error -2)")))?;
930
931                        directory_to_search = FsEntry::parse(next_dir_level_to_decode);
932                    }
933                }
934            }
935        }
936
937        Ok(directory_to_search)
938    }
939
940    /// Returns the file entry for `$path`. Note that this does not
941    /// return the file contents directly, use `volume.get_file(path)` instead.
942    ///
943    /// # Errors
944    ///
945    /// Returns an error if the file is a directory.
946    pub fn get_file_entry(&self, path: &str) -> Result<OwnedFsEntryFile, Error> {
947        let clean = path_clean::clean(path); // removes redundant ".", "..", etc
948
949        let mut components = Path::new(&clean)
950            .components()
951            .filter_map(|s| match s {
952                Component::Normal(s) => s.to_str(),
953                _ => None,
954            })
955            .collect::<Vec<_>>();
956
957        components.reverse();
958
959        let mut directory_to_search = self.header.top_level.clone();
960
961        while let Some(searched_directory_name) = components.pop() {
962            let found = match directory_to_search
963                .binary_search_by(|probe| (*probe.text).cmp(searched_directory_name))
964            {
965                Ok(i) => directory_to_search[i].clone(),
966                Err(_) => {
967                    return Err(Error(format!("Could not find file {clean:?}: could not find file or directory {searched_directory_name:?} (os error 2)")));
968                }
969            };
970
971            let offset_start: usize = found.offset_start.try_into().unwrap_or(u32::MAX as usize);
972            let offset_end: usize = found.offset_end.try_into().unwrap_or(u32::MAX as usize);
973
974            match found.fs_type {
975                FsEntryType::File => {
976                    if !components.is_empty() {
977                        return Err(Error(format!("Could not find file {clean:?} (os error 2)")));
978                    }
979
980                    return Ok(OwnedFsEntryFile {
981                        text: path.to_string(),
982                        offset_start: offset_start as u64,
983                        offset_end: offset_end as u64,
984                    });
985                }
986                FsEntryType::Dir => {
987                    if offset_start == offset_end {
988                        directory_to_search = Vec::new();
989                    } else {
990                        let next_dir_level_to_decode = get_byte_slice(self.header.header_data, offset_start, offset_end)
991                        .ok_or(Error(format!("Could not find file {clean:?}: could not decode directory {searched_directory_name:?} at byte offset {offset_start}..{offset_end} (os error -2)")))?;
992
993                        directory_to_search = FsEntry::parse(next_dir_level_to_decode);
994                    }
995                }
996            }
997        }
998
999        Err(Error(format!("Could not find file {clean:?} (os error 2)")))
1000    }
1001
1002    /// Given an already-existing `OwnedFsEntryFile`, returns the byte slice for this
1003    /// file entry.
1004    ///
1005    /// # Errors
1006    ///
1007    /// The function returns an error if the file entry is out of bounds of the
1008    /// underlying data slice (should never happen)
1009    pub fn get_file_bytes(&self, entry: &OwnedFsEntryFile) -> Result<&'a [u8], Error> {
1010        static EMPTY_SLICE: &[u8] = &[];
1011
1012        let offset_start = entry.offset_start.try_into().unwrap_or(u32::MAX as usize);
1013        let offset_end = entry.offset_end.try_into().unwrap_or(u32::MAX as usize);
1014
1015        // empty file
1016        if offset_start == offset_end {
1017            return Ok(EMPTY_SLICE);
1018        }
1019
1020        get_byte_slice(self.data, offset_start, offset_end).ok_or(Error(format!(
1021            "Could not file file {:?} - filesystem corrupt at {}..{} (os error -1)",
1022            entry.text, entry.offset_start, entry.offset_end
1023        )))
1024    }
1025
1026    /// Returns the file contents (shorthand for
1027    /// `volume.get_file_bytes(volume.get_file_entry(path))`)
1028    pub fn get_file(&'a self, path: &str) -> Result<&'a [u8], Error> {
1029        let owned_file_entry = self.get_file_entry(path)?;
1030        self.get_file_bytes(&owned_file_entry)
1031    }
1032
1033    /// Serializes the volume into writable bytes (including
1034    /// the header and header length)
1035    pub fn into_bytes(&self) -> Vec<u8> {
1036        // TODO(felix): avoid extra allocation?
1037        let mut out = Vec::new();
1038        out.extend_from_slice(&to_leb(self.header.header_data.len() as u64));
1039        out.extend_from_slice(self.header.header_data);
1040        out.extend_from_slice(self.data);
1041        out
1042    }
1043}
1044
1045#[derive(Debug, Clone, PartialEq, Eq)]
1046struct HeaderEntry {
1047    flags: Flags,
1048    offset_start: u64,
1049    offset_end: u64,
1050    text: String,
1051}
1052
1053impl HeaderEntry {
1054    fn write_to(&self, buffer: &mut Vec<u8>) {
1055        // Note: The reference implementation diverges from the spec
1056        // here - the flag should actually go first.
1057        buffer.extend(self.text_length());
1058        buffer.extend(self.flags.as_bytes());
1059
1060        buffer.extend(self.offset_start.to_le_bytes());
1061        buffer.extend(self.offset_end.to_le_bytes());
1062        buffer.extend(self.text.as_bytes());
1063    }
1064
1065    fn text_length(&self) -> [u8; 7] {
1066        text_length(&self.text)
1067    }
1068}
1069
1070fn text_length(text: &str) -> [u8; 7] {
1071    let length = u64::try_from(text.len()).unwrap();
1072    let [head @ .., last] = length.to_le_bytes();
1073    assert_eq!(
1074        last,
1075        0,
1076        "Text length of {} is out of bounds (max = 2^56 = 72,057,594,037,927,936) for text {:?}",
1077        text.len(),
1078        &text[..250],
1079    );
1080    head
1081}
1082
1083#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
1084pub(crate) enum Flags {
1085    Dir = 0b00,
1086    File = 0b01,
1087}
1088
1089impl Flags {
1090    pub(crate) fn as_bytes(self) -> [u8; 1] {
1091        [self as u8]
1092    }
1093}
1094
1095/// Iterator over the paths in the file, yields PathBufs
1096/// until all files in the volume have been listed.
1097#[derive(Debug)]
1098pub struct VolumeIterator<'b, 'a: 'b> {
1099    pub volume: &'b Volume<'a>,
1100    pub entries: Vec<DirOrFile>,
1101}
1102
1103impl<'a, 'b> Iterator for VolumeIterator<'a, 'b> {
1104    type Item = DirOrFile;
1105
1106    fn next(&mut self) -> Option<Self::Item> {
1107        let next = self.entries.pop();
1108
1109        if let Some(DirOrFile::Dir(d)) = next.as_ref() {
1110            self.entries.extend(
1111                Volume::specialsort_dir(
1112                    &self
1113                        .volume
1114                        .read_dir(&format!("/{}", d.display()))
1115                        .unwrap_or_default(),
1116                )
1117                .iter()
1118                .map(|v| match v.fs_type {
1119                    FsEntryType::File => DirOrFile::File(d.join(v.text.as_ref())),
1120                    FsEntryType::Dir => DirOrFile::Dir(d.join(v.text.as_ref())),
1121                }),
1122            );
1123        }
1124
1125        next
1126    }
1127}
1128
1129#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Ord, Eq)]
1130pub enum DirOrFileWithBytes<'a> {
1131    Dir { path: PathBuf },
1132    File { path: PathBuf, bytes: &'a [u8] },
1133}
1134
1135impl<'a> DirOrFileWithBytes<'a> {
1136    pub fn get_path(&self) -> &PathBuf {
1137        match self {
1138            DirOrFileWithBytes::Dir { path } => path,
1139            DirOrFileWithBytes::File { path, .. } => path,
1140        }
1141    }
1142
1143    pub fn get_bytes(&self) -> Option<&'a [u8]> {
1144        match self {
1145            DirOrFileWithBytes::Dir { .. } => None,
1146            DirOrFileWithBytes::File { bytes, .. } => Some(bytes),
1147        }
1148    }
1149}
1150
1151fn append_entries_recursive<'b>(
1152    header: &'b [u8],
1153    entries: Vec<FsEntry<'b>>,
1154    parent: &mut RecursiveFsEntryDir<'b>,
1155) {
1156    for entry in entries.iter() {
1157        match entry.fs_type {
1158            FsEntryType::Dir => {
1159                let mut subdir = RecursiveFsEntryDir {
1160                    name: entry.text.as_ref().to_string(),
1161                    contents: Vec::new(),
1162                };
1163                let offset_start: usize =
1164                    entry.offset_start.try_into().unwrap_or(u32::MAX as usize);
1165                let offset_end: usize = entry.offset_end.try_into().unwrap_or(u32::MAX as usize);
1166                let fs_entry_bytes = match get_byte_slice(header, offset_start, offset_end) {
1167                    Some(s) => s,
1168                    None => continue,
1169                };
1170                let new_entries = Volume::specialsort_dir(FsEntry::parse(fs_entry_bytes).as_ref());
1171                append_entries_recursive(header, new_entries, &mut subdir);
1172                parent.contents.push(RecursiveFsEntry::Dir { dir: subdir });
1173            }
1174            FsEntryType::File => {
1175                parent.contents.push(RecursiveFsEntry::File {
1176                    file: entry.clone(),
1177                });
1178            }
1179        }
1180    }
1181}
1182
1183/// Since `env::temp_dir()` panics on wasm32-wasi, this
1184/// function provides a non-panicking replacement
1185pub fn webc_temp_dir() -> PathBuf {
1186    #[cfg(not(target_arch = "wasm32"))]
1187    {
1188        std::env::temp_dir()
1189    }
1190    #[cfg(target_arch = "wasm32")]
1191    {
1192        let random = rand::random::<u64>();
1193
1194        let dir = std::env::current_exe()
1195            .unwrap_or(Path::new("").to_path_buf())
1196            .join(&format!("temp-{random}"));
1197
1198        std::fs::create_dir_all(&dir).unwrap();
1199
1200        dir
1201    }
1202}
1203
1204fn to_leb(num: u64) -> Vec<u8> {
1205    let mut buf = Vec::new();
1206    match leb128::write::unsigned(&mut buf, num) {
1207        Ok(_) => buf,
1208        Err(_) => Vec::new(),
1209    }
1210}
1211
1212fn get_parent<P: AsRef<Path>>(path: P) -> String {
1213    match path.as_ref().parent() {
1214        Some(s) => format!("{}", s.display()),
1215        None => String::new(),
1216    }
1217}
1218
1219// Returns how many bytes the LEB128 would take up if it was read
1220fn get_leb_size(bytes: &[u8]) -> Option<usize> {
1221    use std::io::Cursor;
1222    let mut cursor = Cursor::new(bytes);
1223    let initial_pos = cursor.position(); // usually 0
1224    let _ = leb128::read::unsigned(&mut cursor).ok()?;
1225    Some((cursor.position() - initial_pos).min(u32::MAX as u64) as usize)
1226}
1227
1228fn from_leb(mut bytes: &[u8]) -> Option<u64> {
1229    leb128::read::unsigned(&mut bytes).ok()
1230}
1231
1232// /a/b/c => "c"
1233// /a/b/c/file.txt => "file.txt"
1234fn get_last_component(path: &Path) -> Option<&str> {
1235    match path.components().last()? {
1236        Component::Normal(s) => s.to_str(),
1237        _ => None,
1238    }
1239}
1240
1241/// Whether the file is a directory or a file
1242#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1243pub enum FsEntryType {
1244    /// File is a file
1245    File,
1246    /// File is a directory entry
1247    Dir,
1248}
1249
1250impl FsEntryType {
1251    /// 8-Bit ID of the file entry type
1252    pub fn get_id(&self) -> u8 {
1253        match self {
1254            FsEntryType::Dir => 0,
1255            FsEntryType::File => 1,
1256        }
1257    }
1258
1259    /// Reverse function of `self.get_id()`
1260    pub fn from_id(id: u8) -> Option<Self> {
1261        match id {
1262            0 => Some(FsEntryType::Dir),
1263            1 => Some(FsEntryType::File),
1264            _ => None,
1265        }
1266    }
1267}
1268
1269#[derive(Debug, PartialEq)]
1270pub struct RecursiveFsEntryDir<'a> {
1271    pub name: String,
1272    pub contents: Vec<RecursiveFsEntry<'a>>,
1273}
1274
1275#[derive(Debug, PartialEq)]
1276pub enum RecursiveFsEntry<'a> {
1277    File { file: FsEntry<'a> },
1278    Dir { dir: RecursiveFsEntryDir<'a> },
1279}
1280
1281/// Same as `FsEntry` but with an owned `text: String`,
1282/// instead of a `&str`
1283#[derive(Debug, Clone, PartialEq)]
1284pub enum OwnedFsEntry {
1285    /// File entry
1286    File(OwnedFsEntryFile),
1287    /// Directory entry
1288    Dir(OwnedFsEntryDir),
1289}
1290
1291impl OwnedFsEntry {
1292    /// Returns the text component of the path, i.e. `"file.txt"` for `/a/b/file.txt`
1293    pub fn get_name(&self) -> &str {
1294        match self {
1295            OwnedFsEntry::File(f) => f.text.as_str(),
1296            OwnedFsEntry::Dir(d) => d.text.as_str(),
1297        }
1298    }
1299}
1300
1301/// Owned version of the `FsEntry` with `fs_type = FsEntryType::File`
1302#[derive(Debug, Clone, PartialEq, Eq)]
1303pub struct OwnedFsEntryFile {
1304    /// Same as `FsEntry::text`, but owned as a `String`
1305    pub text: String,
1306    /// Starting offset in bytes into the `volume.data` field
1307    pub offset_start: u64,
1308    /// Ending offset in bytes into the `volume.data` field
1309    pub offset_end: u64,
1310}
1311
1312impl OwnedFsEntryFile {
1313    pub fn get_len(&self) -> u64 {
1314        self.offset_end.saturating_sub(self.offset_start)
1315    }
1316}
1317
1318/// Owned version of the `FsEntry` with `fs_type = FsEntryType::Dir`
1319#[derive(Debug, Clone, PartialEq)]
1320pub struct OwnedFsEntryDir {
1321    /// Same as `FsEntry::text`, but owned as a `String`
1322    pub text: String,
1323    /// Entries of the directory
1324    pub files: Vec<OwnedFsEntry>,
1325}
1326
1327/// Directory or file entry, parsed without any allocation
1328#[derive(Debug, Clone, PartialEq, Eq)]
1329pub struct FsEntry<'a> {
1330    /// If the `FsEntryType == Dir`, then `offset_start..offset_end` points
1331    /// to the start / end bytes of the next directory level, relative to the
1332    /// file header
1333    ///
1334    /// If the `FsEntryType = File`, then `offset_start..offset_end` points
1335    /// to the actual file contents in the `volume.data` field
1336    ///
1337    /// Inside of a directory level, all files are grouped by the name
1338    /// of the parent directory, at parsing time only the top-level
1339    /// directories are parsed
1340    pub fs_type: FsEntryType,
1341    /// Directory / file name, for example `usr`, `lib` or `var` in `"/usr/lib/var"`
1342    pub text: Cow<'a, str>,
1343    // See documentation for `fs_type`
1344    pub offset_start: u64,
1345    // See documentation for `fs_type`
1346    pub offset_end: u64,
1347}
1348
1349impl<'a> FsEntry<'a> {
1350    /// Returns the length of the file in bytes (0 for directories)
1351    pub fn get_len(&self) -> u64 {
1352        self.offset_end.saturating_sub(self.offset_start)
1353    }
1354
1355    pub fn calculate_byte_length(entries: &[Self]) -> usize {
1356        (entries.len() * 24)
1357            + entries
1358                .iter()
1359                .map(|e| e.text.as_bytes().len())
1360                .sum::<usize>()
1361            + 8
1362    }
1363
1364    /// Serializes a list of `FsEntry` into bytes (usually
1365    /// done to encode one directory level)
1366    ///
1367    /// # Binary format
1368    ///
1369    /// ```no_run,ignore
1370    /// [8 bytes]: size of the directory level itself
1371    ///
1372    /// [
1373    ///   [1 byte]:  file entry type (0 = Directory, 1 = File, .. ?)
1374    ///   [7 bytes]: text length N (only 7 bytes long instead of 8, maximum file
1375    ///              name length = 268435456 instead of 4294967296 bytes)
1376    ///   [8 bytes]: offset_start
1377    ///   [8 bytes]: offset_end
1378    ///   [n bytes]: text (directory / file name)
1379    /// ]
1380    /// ```
1381    pub fn into_bytes(entries: &[Self]) -> Option<Vec<u8>> {
1382        let mut out = Vec::new();
1383
1384        for entry in entries {
1385            let self_text_bytes = entry.text.as_bytes();
1386
1387            // insanely long file name
1388            if self_text_bytes.len() > 268435456 {
1389                return None;
1390            }
1391
1392            let mut text_len_bytes = (self_text_bytes.len() as u64).to_le_bytes();
1393            text_len_bytes[7] = entry.fs_type.get_id(); // 0th byte = least important byte
1394            out.extend_from_slice(&text_len_bytes);
1395            out.extend_from_slice(&entry.offset_start.to_le_bytes());
1396            out.extend_from_slice(&entry.offset_end.to_le_bytes());
1397            out.extend_from_slice(self_text_bytes);
1398        }
1399
1400        let mut final_out = Vec::new();
1401        let len = out.len() as u64;
1402        let bytes_len = len.to_le_bytes();
1403        final_out.extend_from_slice(&bytes_len);
1404        final_out.append(&mut out);
1405
1406        Some(final_out)
1407    }
1408
1409    /// Reverse function of `Self::into_bytes`, parses one directory level
1410    /// from a set of bytes. One additional feature is that not more than `n`
1411    /// bytes are parsed if `n` is the size of the serialized directory level,
1412    /// even if the input buffer is larger than `n`.
1413    ///
1414    /// If the directory level could not be parsed, the parsing is interrupted
1415    /// and the given file entries are returns as-is (no check for completeness)
1416    pub fn parse(data: &'a [u8]) -> Vec<Self> {
1417        let mut entries = Vec::new();
1418
1419        if data.is_empty() || data.len() < 8 {
1420            return entries;
1421        }
1422
1423        // first 8 bytes = data len
1424        let directory_len_bytes = [
1425            data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
1426        ];
1427
1428        let directory_len = u64::from_le_bytes(directory_len_bytes);
1429        let directory_len: usize = directory_len.try_into().unwrap_or(u32::MAX as usize);
1430
1431        if data.len() < directory_len + 8 {
1432            return entries; // technically an error
1433        }
1434
1435        let data = &data[8..directory_len + 8];
1436
1437        let mut cursor = 0;
1438        while cursor < data.len() {
1439            let fs_type = data[cursor + 7]; // 0 = File, 1 = Directory
1440            if (cursor + 24) > data.len() {
1441                break;
1442            }
1443
1444            let text_size = [
1445                data[cursor],
1446                data[cursor + 1],
1447                data[cursor + 2],
1448                data[cursor + 3],
1449                data[cursor + 4],
1450                data[cursor + 5],
1451                data[cursor + 6],
1452                0,
1453            ];
1454            let text_size = u64::from_le_bytes(text_size);
1455
1456            let text_size: usize = text_size.try_into().unwrap_or(u32::MAX as usize);
1457
1458            let offset_start = [
1459                data[cursor + 8],
1460                data[cursor + 9],
1461                data[cursor + 10],
1462                data[cursor + 11],
1463                data[cursor + 12],
1464                data[cursor + 13],
1465                data[cursor + 14],
1466                data[cursor + 15],
1467            ];
1468            let offset_start = u64::from_le_bytes(offset_start);
1469
1470            let offset_end = [
1471                data[cursor + 16],
1472                data[cursor + 17],
1473                data[cursor + 18],
1474                data[cursor + 19],
1475                data[cursor + 20],
1476                data[cursor + 21],
1477                data[cursor + 22],
1478                data[cursor + 23],
1479            ];
1480            let offset_end = u64::from_le_bytes(offset_end);
1481
1482            if (cursor + 24 + text_size) > data.len() {
1483                break; // directory corrupt?
1484            }
1485
1486            let text_result = std::str::from_utf8(&data[cursor + 24..(cursor + 24 + text_size)]);
1487
1488            cursor += 24 + text_size;
1489
1490            let text = match text_result {
1491                Ok(o) => o,
1492                Err(_) => {
1493                    continue;
1494                }
1495            };
1496
1497            let fs_type = match FsEntryType::from_id(fs_type) {
1498                Some(s) => s,
1499                None => {
1500                    continue;
1501                }
1502            };
1503
1504            entries.push(FsEntry {
1505                fs_type,
1506                offset_start,
1507                offset_end,
1508                text: Cow::Borrowed(text),
1509            });
1510        }
1511
1512        entries
1513    }
1514}
1515
1516/// Header of a filesystem volume, describing a serialized
1517/// list of directories and file paths
1518#[derive(Default, Clone, PartialEq, Eq)]
1519pub struct VolumeHeader<'a> {
1520    /// Top-level files / directories already parsed
1521    pub top_level: Vec<FsEntry<'a>>,
1522    /// Unserialized header data as raw bytes
1523    pub header_data: &'a [u8],
1524}
1525
1526impl<'a> fmt::Debug for VolumeHeader<'a> {
1527    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1528        self.top_level.fmt(f)?;
1529        write!(
1530            f,
1531            "\r\nheader_data: [ ... ({} bytes) ],",
1532            self.header_data.len()
1533        )
1534    }
1535}
1536
1537impl<'a> VolumeHeader<'a> {
1538    /// Parses the top-level directory entries from a slice of bytes,
1539    /// see `FsEntry::into_bytes` for information about the binary format
1540    pub fn from_slice(data: &'a [u8]) -> Self {
1541        Self {
1542            top_level: FsEntry::parse(data),
1543            header_data: data,
1544        }
1545    }
1546
1547    /// Same as `&self.header_data`, API for consistency
1548    pub fn into_vec(&self) -> &'a [u8] {
1549        self.header_data
1550    }
1551}
1552
1553/// Whether to sign the bytes when deserializing
1554/// the WebC file to bytes
1555#[derive(Debug, Clone, PartialEq)]
1556pub enum GenerateChecksum {
1557    /// Signature bytes get zeroed
1558    NoChecksum,
1559    /// Sha256 checksum of the file is calculated and padded
1560    /// with zeroes, but no signature is generated
1561    Sha256,
1562    /// Sha256 checksum is generated and the checksum
1563    /// is signed with the given key (cert must be able to
1564    /// sign at least 256 bytes)
1565    #[cfg(feature = "crypto")]
1566    SignedSha256 { key: Cert },
1567}
1568
1569impl GenerateChecksum {
1570    /// Returns the ID for the Checksum type:
1571    ///
1572    /// - no checksum: `----------------`
1573    /// - sha256 checksum: `sha256----------`
1574    /// - sha256 checksum, signed with key: `sha256-signed---`
1575    ///
1576    pub fn get_key(&self) -> Vec<u8> {
1577        match self {
1578            GenerateChecksum::NoChecksum => b"----------------".to_vec(),
1579            GenerateChecksum::Sha256 => b"sha256----------".to_vec(),
1580            #[cfg(feature = "crypto")]
1581            GenerateChecksum::SignedSha256 { .. } => b"sha256-signed---".to_vec(),
1582        }
1583    }
1584}
1585
1586impl Default for GenerateChecksum {
1587    fn default() -> Self {
1588        Self::NoChecksum
1589    }
1590}
1591
1592/// Options on what to parse from the file
1593#[derive(Debug, Clone)]
1594pub struct ParseOptions {
1595    /// If set, will verify the file against the given public key
1596    /// and error out if the key does not match
1597    #[cfg(feature = "crypto")]
1598    pub key: Option<Cert>,
1599    /// If the manifest should be parsed (will be skipped over otherwise)
1600    pub parse_manifest: bool,
1601    /// If the filesystem should be parsed (will be empty otherwise)
1602    pub parse_volumes: bool,
1603    /// If the atoms should be parsed
1604    pub parse_atoms: bool,
1605}
1606
1607impl Default for ParseOptions {
1608    fn default() -> Self {
1609        Self {
1610            #[cfg(feature = "crypto")]
1611            key: None,
1612            parse_manifest: true,
1613            parse_volumes: true,
1614            parse_atoms: true,
1615        }
1616    }
1617}
1618
1619#[allow(clippy::if_same_then_else)]
1620fn get_byte_slice(input: &[u8], start: usize, end: usize) -> Option<&[u8]> {
1621    if start == end && input.len() > start {
1622        Some(&input[start..end])
1623    } else if start < end && input.len() > start && input.len() >= end {
1624        Some(&input[start..end])
1625    } else {
1626        None
1627    }
1628}
1629
1630#[derive(Debug, Serialize, Deserialize)]
1631struct InternalPackageMeta {
1632    name: String,
1633    version: String,
1634}
1635
1636/// Needed to easily deserialize an `WasiCommandAnnotation`
1637/// from the free-form `command.annotations`
1638#[derive(Default, Debug, Clone, Serialize, Deserialize)]
1639struct WasiCommandAnnotationsDeserializer {
1640    #[serde(default)]
1641    wasi: Option<crate::metadata::annotations::Wasi>,
1642}
1643
1644fn get_wasi_command_annotation(
1645    val: &IndexMap<String, ciborium::value::Value>,
1646) -> Option<crate::metadata::annotations::Wasi> {
1647    let desc: WasiCommandAnnotationsDeserializer = ciborium::Value::serialized(val)
1648        .unwrap()
1649        .deserialized()
1650        .unwrap();
1651
1652    desc.wasi
1653}
1654
1655/// Needed to easily deserialize an `EmscriptenCommandAnnotation`
1656/// from the free-form `command.annotations`
1657#[derive(Default, Debug, Clone, Serialize, Deserialize)]
1658struct EmscriptenCommandAnnotationsDeserializer {
1659    #[serde(default)]
1660    emscripten: Option<Emscripten>,
1661}
1662
1663fn get_emscripten_command_annotation(
1664    val: &IndexMap<String, ciborium::Value>,
1665) -> Option<Emscripten> {
1666    let desc: EmscriptenCommandAnnotationsDeserializer = ciborium::Value::serialized(val)
1667        .unwrap()
1668        .deserialized()
1669        .unwrap();
1670    desc.emscripten
1671}
1672
1673impl<'a> WebC<'a> {
1674    pub fn get_main_args_for_command(&self, command: &str) -> Result<Vec<String>, String> {
1675        let command = self
1676            .manifest
1677            .commands
1678            .get(command)
1679            .ok_or(format!("Command {command:?} not found in manifest"))?;
1680
1681        let atom_description =
1682            get_emscripten_command_annotation(&command.annotations).ok_or(format!(
1683                "no \"atom\" or \"wasi.atom\" or \"emscripten.atom\" found in command {command:#?}"
1684            ))?;
1685
1686        let main_args = atom_description.main_args.as_ref().ok_or(format!(
1687            "command {command:?} has no atom to start the command with"
1688        ))?;
1689
1690        Ok(main_args.clone())
1691    }
1692
1693    #[allow(deprecated)]
1694    pub fn get_atom_name_for_command(&self, api: &str, command: &str) -> Result<String, String> {
1695        let command = self
1696            .manifest
1697            .commands
1698            .get(command)
1699            .ok_or(format!("Command {command:?} not found in manifest"))?;
1700
1701        match api {
1702            "emscripten" => {
1703                let atom_description = get_emscripten_command_annotation(&command.annotations).ok_or(format!(
1704                    "no \"atom\" or \"wasi.atom\" or \"emscripten.atom\" found in command {command:#?}"
1705                ))?;
1706
1707                let atom_name = atom_description.atom.as_ref().ok_or(format!(
1708                    "command {command:?} has no atom to start the command with"
1709                ))?;
1710
1711                Ok(atom_name.to_string())
1712            }
1713            "wasi" => {
1714                let wasi = get_wasi_command_annotation(&command.annotations).ok_or(format!(
1715                    "no \"atom\" or \"wasi.atom\" or \"emscripten.atom\" found in command {command:#?}"
1716                ))?;
1717
1718                Ok(wasi.atom)
1719            }
1720            _ => Err(String::new()),
1721        }
1722    }
1723
1724    /// Checks whether the file starts with the header MAGIC
1725    pub fn check_magic_header(data: &[u8]) -> Result<(), Error> {
1726        let magic = get_byte_slice(data, 0, MAGIC.len()).ok_or(Error(
1727            "Invalid WebC file (can't get magic header)".to_string(),
1728        ))?;
1729
1730        if magic != MAGIC {
1731            return Err(Error("Invalid Magic number".into()));
1732        }
1733
1734        Ok(())
1735    }
1736
1737    /// Determines the available volumes for a given package
1738    pub fn get_volumes_for_package(&self, package: &str) -> Vec<String> {
1739        if self.manifest.use_map.is_empty() {
1740            self.volumes.keys().cloned().collect()
1741        } else if package == self.get_package_name() {
1742            self.volumes
1743                .keys()
1744                .filter(|s| s.starts_with("self"))
1745                .cloned()
1746                .collect()
1747        } else {
1748            // TODO: inaccurate!
1749            self.volumes
1750                .keys()
1751                .filter(|s| s.contains(package))
1752                .cloned()
1753                .collect()
1754        }
1755    }
1756
1757    pub fn list_directories(&self, volume: &str) -> Vec<String> {
1758        self.volumes
1759            .get(volume)
1760            .map(|v| v.list_directories())
1761            .unwrap_or_default()
1762    }
1763
1764    /// Returns the directory entries or an error if the directory does not exist
1765    pub fn read_dir(&self, package: &str, path: &str) -> Result<Vec<FsEntry<'a>>, Error> {
1766        for volume in self.get_volumes_for_package(package) {
1767            let v = match self.volumes.get(&volume) {
1768                Some(s) => s,
1769                None => {
1770                    continue;
1771                }
1772            };
1773
1774            match v.read_dir(path) {
1775                Ok(s) => {
1776                    return Ok(s);
1777                }
1778                Err(_) => {
1779                    continue;
1780                }
1781            }
1782        }
1783
1784        Err(Error(format!(
1785            "\"{package}://{path}\" does not exist (os error 2)"
1786        )))
1787    }
1788
1789    /// Looks for the first volume containing "entry", scoped to the given package
1790    pub fn get_file_entry(&self, package: &str, path: &str) -> Option<(String, OwnedFsEntryFile)> {
1791        let mut available_volumes = self.get_volumes_for_package(package);
1792        let mut path = path.to_string();
1793        let mut volume_selected = None;
1794
1795        for v in available_volumes.iter() {
1796            let v_scheme = format!("{v}://");
1797            if path.starts_with(&v_scheme) {
1798                volume_selected = Some(v.clone());
1799                path = path.replacen(&v_scheme, "", 1);
1800                break;
1801            }
1802        }
1803
1804        if let Some(v) = volume_selected.as_ref() {
1805            available_volumes = vec![v.clone()];
1806        }
1807
1808        for volume in available_volumes {
1809            match self
1810                .volumes
1811                .get(&volume)
1812                .and_then(|v| v.get_file_entry(&path).ok())
1813            {
1814                Some(s) => return Some((volume.clone(), s)),
1815                None => continue,
1816            };
1817        }
1818        None
1819    }
1820
1821    /// Checks whether the version of the file is supported by the parsing implementation
1822    pub fn get_check_version(data: &[u8]) -> Result<u64, Error> {
1823        let version = get_byte_slice(data, MAGIC.len(), MAGIC.len() + Version::V1.len()).ok_or(
1824            Error("Invalid WebC version (can't get version)".to_string()),
1825        )?;
1826
1827        if version != Version::V1 {
1828            return Err(Error("Version not supported".into()));
1829        }
1830
1831        let version = std::str::from_utf8(version)
1832            .map_err(|e| Error(format!("Invalid version: {e}")))?
1833            .parse::<u64>()
1834            .map_err(|e| Error(format!("Invalid version: {e}")))?;
1835
1836        Ok(version)
1837    }
1838
1839    /// Returns the bytes of the checksum
1840    pub fn get_checksum_bytes(data: &[u8]) -> Result<&[u8], Error> {
1841        get_byte_slice(
1842            data,
1843            MAGIC.len() + Version::V1.len() + 16,
1844            MAGIC.len() + Version::V1.len() + 16 + 256,
1845        )
1846        .ok_or(Error(
1847            "Invalid WebC checksum (can't get checksum)".to_string(),
1848        ))
1849    }
1850
1851    /// Returns the offset of the manifest start
1852    pub fn get_manifest_offset_size(data: &[u8]) -> ReadResult<(usize, usize)> {
1853        let (signature_offset, _) = Self::get_signature_offset_size(data)?;
1854        let manifest_start = signature_offset + 1024;
1855
1856        if data.get(manifest_start).is_none() {
1857            return Err(Error(format!(
1858                "Could not get manifest: data.len() < {manifest_start}"
1859            )));
1860        }
1861
1862        let manifest_size_len = get_leb_size(&data[manifest_start..]).ok_or(Error(format!(
1863            "could not read LEB128 for manifest length at offset {manifest_start}"
1864        )))?;
1865
1866        // actually parse the bytes
1867        let manifest_len = from_leb(&data[manifest_start..]).ok_or(Error(format!(
1868            "could not read LEB128 for manifest length at offset {manifest_start}"
1869        )))?;
1870
1871        Ok((
1872            manifest_start + manifest_size_len,
1873            manifest_len.try_into().unwrap_or(u32::MAX as usize),
1874        ))
1875    }
1876
1877    pub fn get_manifest(data: &[u8]) -> Result<Manifest, Error> {
1878        let (manifest_len_start, manifest_size) = Self::get_manifest_offset_size(data)?;
1879
1880        let manifest = get_byte_slice(data, manifest_len_start, manifest_len_start + manifest_size)
1881            .ok_or(Error(
1882                "Invalid WebC manifest (can't get manifest bytes)".to_string(),
1883            ))?;
1884
1885        ciborium::from_reader(manifest).map_err(|e| Error(format!("Failed to parse manifest: {e}")))
1886    }
1887
1888    /// Returns the offset of the `.atoms` section of the file
1889    pub fn get_atoms_volume_offset_size(data: &[u8]) -> ReadResult<(usize, usize)> {
1890        let (manifest_offset, manifest_size) = Self::get_manifest_offset_size(data)?;
1891
1892        let atom_start = manifest_offset + manifest_size;
1893        if data.get(atom_start).is_none() {
1894            return Err(Error(format!(
1895                "Could not get atom: data.len() < {atom_start}"
1896            )));
1897        }
1898
1899        let atom_size_len = get_leb_size(&data[atom_start..]).ok_or(Error(format!(
1900            "could not read LEB128 for atom length at offset {atom_start}"
1901        )))?;
1902
1903        let atom_len = from_leb(&data[atom_start..]).ok_or(Error(format!(
1904            "could not read LEB128 for atom length at offset {atom_start}"
1905        )))?;
1906
1907        Ok((
1908            atom_start + atom_size_len,
1909            atom_len.try_into().unwrap_or(u32::MAX as usize),
1910        ))
1911    }
1912
1913    /// Parses the `.atoms` section of the file
1914    pub fn get_atoms_volume(data: &'a [u8]) -> Result<Volume<'a>, Error> {
1915        let (atoms_volume_start, atoms_volume_size) = Self::get_atoms_volume_offset_size(data)?;
1916
1917        let atoms_volume = get_byte_slice(
1918            data,
1919            atoms_volume_start,
1920            atoms_volume_start + atoms_volume_size,
1921        )
1922        .ok_or(Error(
1923            "Invalid WebC atoms (can't get atoms volume bytes)".to_string(),
1924        ))?;
1925
1926        Volume::parse(atoms_volume).map_err(|e| Error(format!("Failed to parse atoms: {e}")))
1927    }
1928
1929    /// Returns the offsets of the "volume"
1930    pub fn get_volume_data_offsets(data: &[u8]) -> Result<BTreeMap<String, (usize, usize)>, Error> {
1931        let mut results = BTreeMap::new();
1932        let (atoms_volume_start, atoms_volume_size) = Self::get_atoms_volume_offset_size(data)?;
1933        let mut cursor = atoms_volume_start + atoms_volume_size;
1934        let mut volume_id = 0;
1935
1936        while get_byte_slice(data, cursor, data.len()).is_some() {
1937            let volume_name_len_len = get_leb_size(&data[cursor..]).ok_or(Error(format!(
1938                "Could not parse volume size length for volume {volume_id}"
1939            )))?;
1940
1941            let volume_name_bytes_len = from_leb(&data[cursor..]).ok_or(Error(format!(
1942                "Could not parse volume size for volume {volume_id}"
1943            )))?;
1944
1945            let volume_name_bytes_len: usize = volume_name_bytes_len
1946                .try_into()
1947                .unwrap_or(u32::MAX as usize);
1948
1949            let start = cursor + volume_name_len_len;
1950            let end = start + volume_name_bytes_len;
1951            let volume_name_bytes = get_byte_slice(data, start, end)
1952                .ok_or(Error(format!("Failed to parse name of volume {volume_id:?}: Expected {volume_name_bytes_len} bytes at offset {start}..{end}")))?;
1953
1954            let volume_name = std::str::from_utf8(volume_name_bytes)
1955            .map_err(|e| Error(format!("Failed to parse name of volume {volume_id:?} at offset {start}..{end}: {e}: {volume_name_bytes:?}")))?;
1956
1957            let volume_size_start = end;
1958            let _ = get_byte_slice(data, volume_size_start, data.len())
1959            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start}")))?;
1960
1961            let volume_size_len = get_leb_size(&data[volume_size_start..])
1962            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start}")))?;
1963            let volume_size_end = volume_size_start + volume_size_len;
1964            let volume_size = from_leb(&data[volume_size_start..])
1965            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start} + {volume_size_len}")))?;
1966
1967            let volume_size: usize = volume_size.try_into().unwrap_or(u32::MAX as usize);
1968            let volume_start = volume_size_end;
1969            let volume_end = volume_start + volume_size;
1970
1971            let leb_size = get_leb_size(&data[volume_start..volume_end]).ok_or(Error(
1972                "Error parsing volume: could not read header size LEB128".to_string(),
1973            ))?;
1974
1975            let header_len: usize = from_leb(&data[volume_start..volume_end])
1976                .ok_or(Error(format!(
1977                    "Could not read header length from data (first {leb_size} bytes)"
1978                )))?
1979                .try_into()
1980                .unwrap_or(usize::MAX);
1981
1982            let volume_start = volume_start + leb_size + header_len;
1983
1984            results.insert(volume_name.to_string(), (volume_start, volume_end));
1985            cursor = volume_end;
1986            volume_id += 1;
1987        }
1988
1989        Ok(results)
1990    }
1991
1992    pub fn parse_volumes_from_fileblock(
1993        data: &'a [u8],
1994    ) -> ReadResult<IndexMap<String, Volume<'a>>> {
1995        let mut map = IndexMap::new();
1996        let mut volume_id = 0;
1997        let mut cursor = 0;
1998
1999        while get_byte_slice(data, cursor, data.len()).is_some() {
2000            let volume_name_len_len = get_leb_size(&data[cursor..]).ok_or(Error(format!(
2001                "Could not parse volume size length for volume {volume_id}"
2002            )))?;
2003
2004            let volume_name_bytes_len = from_leb(&data[cursor..]).ok_or(Error(format!(
2005                "Could not parse volume size for volume {volume_id}"
2006            )))?;
2007
2008            let volume_name_bytes_len: usize = volume_name_bytes_len
2009                .try_into()
2010                .unwrap_or(u32::MAX as usize);
2011
2012            let start = cursor + volume_name_len_len;
2013            let end = start + volume_name_bytes_len;
2014            let volume_name_bytes = get_byte_slice(data, start, end)
2015                .ok_or(Error(format!("Failed to parse name of volume {volume_id:?}: Expected {volume_name_bytes_len} bytes at offset {start}..{end}")))?;
2016
2017            let volume_name = std::str::from_utf8(volume_name_bytes)
2018            .map_err(|e| Error(format!("Failed to parse name of volume {volume_id:?} at offset {start}..{end}: {e}: {volume_name_bytes:?}")))?;
2019
2020            let volume_size_start = end;
2021            let _ = get_byte_slice(data, volume_size_start, data.len())
2022            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start}")))?;
2023
2024            let volume_size_len = get_leb_size(&data[volume_size_start..])
2025            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start}")))?;
2026            let volume_size_end = volume_size_start + volume_size_len;
2027            let volume_size = from_leb(&data[volume_size_start..])
2028            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start} + {volume_size_len}")))?;
2029
2030            let volume_size: usize = volume_size.try_into().unwrap_or(u32::MAX as usize);
2031            let volume_start = volume_size_end;
2032            let volume_end = volume_start + volume_size;
2033            let volume_bytes = get_byte_slice(data, volume_start, volume_end)
2034            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected {volume_size} bytes at offset {volume_start}..{volume_end}")))?;
2035
2036            let volume = Volume::parse(volume_bytes).map_err(|e| {
2037                Error(format!(
2038                    "Failed to parse volume {volume_name:?} (size = {volume_size} bytes): {e}"
2039                ))
2040            })?;
2041
2042            map.insert(volume_name.to_string(), volume);
2043
2044            cursor = volume_end;
2045            volume_id += 1;
2046        }
2047
2048        Ok(map)
2049    }
2050
2051    /// Parses the `.volumes` section(s) of the file
2052    pub fn parse_volumes(data: &'a [u8]) -> ReadResult<IndexMap<String, Volume<'a>>> {
2053        let (atoms_volume_start, atoms_volume_size) = Self::get_atoms_volume_offset_size(data)?;
2054        let cursor = atoms_volume_start + atoms_volume_size;
2055        match get_byte_slice(data, cursor, data.len()) {
2056            Some(s) => Self::parse_volumes_from_fileblock(s),
2057            None => Ok(IndexMap::new()),
2058        }
2059    }
2060
2061    /// Computes the checksum of the file without cloning it
2062    pub fn compute_checksum(data: &[u8]) -> ReadResult<Option<Checksum>> {
2063        use sha2::Sha256;
2064
2065        let min_offset = MAGIC.len() + Version::V1.len();
2066        let max_offset = min_offset + 16;
2067        let checksum_type = get_byte_slice(data, min_offset, max_offset).ok_or(Error(format!(
2068            "Failed to get checksum type at offset {min_offset}..{max_offset}"
2069        )))?;
2070
2071        match checksum_type {
2072            b"----------------" => Ok(None),
2073            b"sha256----------" | b"sha256-signed---" => {
2074                let mut hasher = Sha256::new();
2075
2076                hasher.update(MAGIC);
2077                hasher.update(Version::V1);
2078                hasher.update(checksum_type);
2079                hasher.update([0; 256]);
2080                hasher.update([0; 4]);
2081                hasher.update([0; 1024]);
2082
2083                if data.len() > MAGIC.len() + Version::V1.len() + 16 + 256 + 4 + 1024 {
2084                    hasher.update(&data[(MAGIC.len() + Version::V1.len() + 16 + 256 + 4 + 1024)..]);
2085                };
2086
2087                let mut result = hasher.finalize().to_vec();
2088                let valid_until = result.len();
2089
2090                if result.len() < 256 {
2091                    result.resize(256, 0);
2092                }
2093
2094                let chk_type = std::str::from_utf8(checksum_type).unwrap().to_string();
2095
2096                Ok(Some(Checksum {
2097                    valid_until,
2098                    chk_type,
2099                    data: result,
2100                    valid: false,
2101                }))
2102            }
2103            _ => Err(Error(format!(
2104                "Invalid checksum type: {:?}",
2105                std::str::from_utf8(checksum_type)
2106            ))),
2107        }
2108    }
2109
2110    pub const fn get_signature_offset_start() -> usize {
2111        MAGIC.len() + Version::V1.len() + 16 + 256
2112    }
2113
2114    /// Returns the offset of the signature
2115    pub fn get_signature_offset_size(data: &[u8]) -> ReadResult<(usize, usize)> {
2116        let signature_offset_start = Self::get_signature_offset_start();
2117        let signature_size_bytes =
2118            get_byte_slice(data, signature_offset_start, signature_offset_start + 4).ok_or(
2119                Error(format!(
2120                    "Failed to get signature length at offset {signature_offset_start}..{}",
2121                    signature_offset_start + 4
2122                )),
2123            )?;
2124
2125        let signature_len_u32 = u32::from_le_bytes([
2126            signature_size_bytes[0],
2127            signature_size_bytes[1],
2128            signature_size_bytes[2],
2129            signature_size_bytes[3],
2130        ]);
2131
2132        let signature_len = signature_len_u32.min(1024) as usize;
2133
2134        Ok((signature_offset_start + 4, signature_len))
2135    }
2136
2137    /// Read the signature bytes
2138    pub fn get_signature_bytes(data: &[u8]) -> ReadResult<&[u8]> {
2139        let (offset, size) = Self::get_signature_offset_size(data)?;
2140
2141        get_byte_slice(data, offset, offset + size).ok_or(Error(format!(
2142            "Could not get signature at offset {}..{}",
2143            offset,
2144            offset + size
2145        )))
2146    }
2147
2148    /// Returns the (unverified) signature from the file
2149    pub fn get_signature(data: &[u8]) -> ReadResult<Option<Signature>> {
2150        let signature = Self::get_signature_bytes(data)?;
2151        let last_bytes = signature.iter().rev().take_while(|i| **i == 0).count();
2152        let valid_until = 1024_usize.saturating_sub(last_bytes);
2153        Ok(Some(Signature {
2154            valid_until,
2155            data: signature.to_vec(),
2156            valid: false,
2157        }))
2158    }
2159
2160    /// Verifies the file against a given key
2161    #[cfg(feature = "crypto")]
2162    pub fn verify_file(
2163        checksum: &Checksum,
2164        signature: &Signature,
2165        public_key: &Cert,
2166    ) -> ReadResult<bool> {
2167        verify_signature(&checksum.data, &signature.data, public_key)
2168            .map_err(|e| Error(format!("Error verifying signature: {e}")))
2169    }
2170
2171    /// Returns a reference to the manifest
2172    pub fn get_metadata(&self) -> &Manifest {
2173        &self.manifest
2174    }
2175
2176    /// Returns the current package name with
2177    pub fn get_package_name(&self) -> String {
2178        Self::get_package_name_from_manifest(&self.manifest)
2179    }
2180
2181    fn get_package_name_from_manifest(m: &Manifest) -> String {
2182        m.package
2183            .get("wapm")
2184            .map(|value| {
2185                let meta: InternalPackageMeta = value.deserialized().unwrap();
2186                format!("{}@{}", meta.name, meta.version)
2187            })
2188            .or_else(|| {
2189                let name = m.package.get("name")?;
2190                let name = match name {
2191                    ciborium::Value::Text(t) => t,
2192                    _ => return None,
2193                };
2194                let version = m.package.get("version")?;
2195                let version = match version {
2196                    ciborium::Value::Text(t) => t,
2197                    _ => return None,
2198                };
2199                Some(format!("{name}@{version}"))
2200            })
2201            .unwrap_or_default()
2202    }
2203
2204    /// Returns an atom by name for a given package
2205    pub fn get_atom(&self, package: &str, atom: &str) -> Result<&[u8], Error> {
2206        let full_atom_name = format!("{package}:{atom}");
2207        match self.atoms.get_file(&full_atom_name) {
2208            Ok(o) => Ok(o),
2209            Err(e) => {
2210                // look for the atom without the package name,
2211                // if it's the current package name
2212                if package != self.get_package_name() {
2213                    return Err(e);
2214                }
2215
2216                self.atoms.get_file(atom)
2217            }
2218        }
2219    }
2220
2221    /// Returns a reference to the filesystem volume of the package
2222    pub fn get_volume(&self, package: &str, volume: &str) -> Option<&Volume<'a>> {
2223        match self.volumes.get(&format!("{package}/{volume}")) {
2224            Some(s) => Some(s),
2225            None => {
2226                if package == self.get_package_name() {
2227                    self.volumes.get(volume)
2228                } else {
2229                    None
2230                }
2231            }
2232        }
2233    }
2234
2235    /// Returns a file for a given package - if you want to use a non-default
2236    /// volume, prefix the `file_path` with `volume://`, for example, `metadata://README.md`
2237    pub fn get_file(&self, package: &str, file_path: &str) -> Result<&[u8], Error> {
2238        // if the file path starts with "{volume}://", see if the package has a given volume
2239        let (volume, path) =
2240            Self::get_volume_name_from_path(file_path).unwrap_or(("atom", file_path));
2241        let full_volume_name = format!("{package}/{volume}");
2242        let volume = match self.volumes.get(&full_volume_name) {
2243            Some(o) => o,
2244            None => {
2245                // look for the volume without the package name,
2246                // if it's the current package name
2247                if package != self.get_package_name() {
2248                    return Err(Error(format!("Could not find volume {full_volume_name:?}")));
2249                }
2250
2251                self.volumes
2252                    .get(volume)
2253                    .ok_or(Error(format!("Could not find volume {volume:?}")))?
2254            }
2255        };
2256        volume.get_file(path)
2257    }
2258
2259    fn get_volume_name_from_path(s: &str) -> Option<(&str, &str)> {
2260        let (volume, path) = s.split_once("://")?;
2261        if !s.starts_with(&format!("{volume}://")) {
2262            None
2263        } else {
2264            Some((volume, path))
2265        }
2266    }
2267
2268    /// Returns a list of volumes for this package
2269    pub fn list_volumes(&self, package: &str) -> Vec<String> {
2270        let mut result = Vec::new();
2271        let search = format!("{package}/");
2272        for k in self.volumes.keys() {
2273            if k.starts_with(&search) {
2274                result.push(k.replacen(&search, "", 1));
2275            }
2276        }
2277        result
2278    }
2279
2280    /// Returns a list of bundled "package@version" strings contained in this package
2281    pub fn list_packages(&self) -> Vec<PackageInfo> {
2282        let mut packages = vec![PackageInfo::Internal {
2283            dependency_path: String::new(),
2284            name: self.get_package_name(),
2285        }];
2286        Self::get_packages_recursive("self", &self.manifest.use_map, &mut packages);
2287        packages.sort();
2288        packages.dedup();
2289        packages
2290    }
2291
2292    fn get_packages_recursive(
2293        parent_manifest: &str,
2294        use_map: &IndexMap<String, UrlOrManifest>,
2295        packages: &mut Vec<PackageInfo>,
2296    ) {
2297        for (k, v) in use_map.iter() {
2298            match v {
2299                UrlOrManifest::Url(u) => {
2300                    packages.push(PackageInfo::External {
2301                        name: k.clone(),
2302                        url: u.clone(),
2303                    });
2304                }
2305                UrlOrManifest::RegistryDependentUrl(u) => {
2306                    packages.push(PackageInfo::RegistryExternal {
2307                        name: k.clone(),
2308                        id: u.clone(),
2309                    });
2310                }
2311                UrlOrManifest::Manifest(m) => {
2312                    let name = Self::get_package_name_from_manifest(m);
2313                    packages.push(PackageInfo::Internal {
2314                        dependency_path: parent_manifest.to_string(),
2315                        name: name.clone(),
2316                    });
2317                    let dependency_path = format!("{parent_manifest}::{name}");
2318                    Self::get_packages_recursive(&dependency_path, &m.use_map, packages);
2319                }
2320            }
2321        }
2322    }
2323
2324    /// Returns the atoms in the root package
2325    pub fn list_atoms(&self) -> Vec<String> {
2326        self.list_atoms_for_package(&self.get_package_name())
2327    }
2328
2329    /// Returns a list of all atoms with bytes
2330    pub fn get_all_atoms(&self) -> IndexMap<String, &'a [u8]> {
2331        self.atoms
2332            .header
2333            .top_level
2334            .iter()
2335            .filter_map(|fs_entry| {
2336                Some((
2337                    fs_entry.text.to_string(),
2338                    self.atoms
2339                        .get_file_bytes(&OwnedFsEntryFile {
2340                            text: fs_entry.text.to_string(),
2341                            offset_start: fs_entry.offset_start,
2342                            offset_end: fs_entry.offset_end,
2343                        })
2344                        .ok()?,
2345                ))
2346            })
2347            .collect()
2348    }
2349
2350    /// List the atoms for a given package
2351    pub fn list_atoms_for_package(&self, package_orig: &str) -> Vec<String> {
2352        let package = format!("{package_orig}:");
2353        self.atoms
2354            .header
2355            .top_level
2356            .iter()
2357            .filter_map(|fs_entry| {
2358                if !fs_entry.text.contains(':') && !fs_entry.text.contains('@') {
2359                    Some(fs_entry.text.to_string())
2360                } else if !fs_entry.text.starts_with(&format!("{package_orig}::"))
2361                    && fs_entry.text.starts_with(&package)
2362                {
2363                    Some(fs_entry.text.replacen(&package, "", 1))
2364                } else if !fs_entry.text.starts_with("self::")
2365                    && fs_entry.text.starts_with("self:")
2366                    && package_orig == self.get_package_name()
2367                {
2368                    Some(fs_entry.text.to_string())
2369                } else {
2370                    None
2371                }
2372            })
2373            .collect()
2374    }
2375
2376    /// List the available commands for the root package
2377    pub fn list_commands(&self) -> Vec<&str> {
2378        self.get_metadata()
2379            .commands
2380            .keys()
2381            .map(|s| s.as_str())
2382            .collect()
2383    }
2384
2385    /// Parses the entire file, depending on the `ParseOptions`
2386    #[allow(unused_variables)]
2387    pub fn parse(data: &'a [u8], options: &ParseOptions) -> ReadResult<Self> {
2388        Self::check_magic_header(data)?;
2389        let version = Self::get_check_version(data)?;
2390        let mut checksum = Self::compute_checksum(data)?;
2391        #[allow(unused_mut)]
2392        let mut signature = Self::get_signature(data)?;
2393        let checksum_bytes = Self::get_checksum_bytes(data)?;
2394
2395        if let Some(checksum) = checksum.as_mut() {
2396            checksum.valid = checksum.data == checksum_bytes;
2397        }
2398
2399        #[cfg(feature = "crypto")]
2400        match (options.key.as_ref(), checksum.as_mut(), signature.as_mut()) {
2401            (Some(key), Some(checksum), Some(signature)) if checksum.valid => {
2402                signature.valid = verify_signature(&checksum.data, &signature.data, key).is_ok();
2403            }
2404            _ => {}
2405        }
2406
2407        let manifest = Self::get_manifest(data)?;
2408        let atoms_volume = Self::get_atoms_volume(data)?;
2409        let volumes = Self::parse_volumes(data)?;
2410
2411        Ok(WebC {
2412            version,
2413            checksum,
2414            signature,
2415            manifest,
2416            atoms: atoms_volume,
2417            volumes,
2418        })
2419    }
2420
2421    pub fn get_volumes_as_fileblock(&self) -> Vec<u8> {
2422        let mut file = Vec::new();
2423
2424        for (volume_name, volume) in self.volumes.iter() {
2425            // Serialize volume name
2426            let volume_name_bytes = volume_name.as_bytes();
2427            file.extend_from_slice(&to_leb(volume_name_bytes.len() as u64));
2428            file.extend(volume_name_bytes);
2429
2430            // Serialize volume content
2431            let volume_serialized = volume.into_bytes();
2432            file.extend_from_slice(&to_leb(volume_serialized.len() as u64));
2433            file.extend(&volume_serialized);
2434        }
2435
2436        file
2437    }
2438
2439    /// Serialize the .webc file into bytes
2440    pub fn into_bytes(&self, sign_bytes: GenerateChecksum) -> ReadResult<Vec<u8>> {
2441        use sha2::Sha256;
2442
2443        let mut file: Vec<u8> = vec![];
2444
2445        file.extend(MAGIC);
2446        file.extend(*Version::V1);
2447
2448        // 16 bytes: signature algo
2449        file.extend(sign_bytes.get_key());
2450        // 256 bytes: Reserve space reserved for checksum
2451        file.extend([0; 256]);
2452        // 4 bytes: Length of the signature in bytes
2453        file.extend([0; 4]);
2454        // 1024 bytes: Space reserved for the signature
2455        file.extend([0; 1024]);
2456
2457        // N bytes: length of manifest + manifest
2458        let mut manifest_serialized = vec![];
2459        ciborium::into_writer(&self.manifest, &mut manifest_serialized).unwrap();
2460
2461        file.extend_from_slice(&to_leb(manifest_serialized.len() as u64));
2462        file.extend(manifest_serialized);
2463
2464        // Serialize "atoms" volume
2465        let atoms_volume = self.atoms.into_bytes();
2466        file.extend_from_slice(&to_leb(atoms_volume.len() as u64));
2467        file.extend_from_slice(&atoms_volume);
2468
2469        for (volume_name, volume) in self.volumes.iter() {
2470            // Serialize volume name
2471            let volume_name_bytes = volume_name.as_bytes();
2472            file.extend_from_slice(&to_leb(volume_name_bytes.len() as u64));
2473            file.extend(volume_name_bytes);
2474
2475            // Serialize volume content
2476            let volume_serialized = volume.into_bytes();
2477            file.extend_from_slice(&to_leb(volume_serialized.len() as u64));
2478            file.extend(&volume_serialized);
2479        }
2480
2481        // Generate 256-byte checksum depending on requested algo
2482        let checksum = match sign_bytes {
2483            GenerateChecksum::NoChecksum => vec![0; 256],
2484            _ => {
2485                let mut hasher = Sha256::new();
2486                hasher.update(&file);
2487                let mut result = hasher.finalize().to_vec();
2488                if result.len() > 256 {
2489                    return Err(Error("SHA256 returned >256 byte hash (?)".to_string()));
2490                }
2491                if result.len() < 256 {
2492                    result.resize(256, 0);
2493                }
2494                result
2495            }
2496        };
2497
2498        assert_eq!(checksum.len(), 256);
2499
2500        // update checksum
2501        let idx_start = MAGIC.len() + Version::V1.len() + sign_bytes.get_key().len();
2502        let idx_end = idx_start + checksum.len();
2503        for (i, c) in (idx_start..idx_end).zip(checksum.iter()) {
2504            file[i] = *c;
2505        }
2506
2507        let (sig_len, signature) = match &sign_bytes {
2508            GenerateChecksum::NoChecksum | GenerateChecksum::Sha256 => (0_u32, vec![0; 1024]),
2509            #[cfg(feature = "crypto")]
2510            GenerateChecksum::SignedSha256 { key } => {
2511                let mut sig = create_signature(key, &checksum)
2512                    .map_err(|e| Error(format!("Failed to sign checksum: {e}")))?;
2513
2514                let len = sig.len();
2515
2516                if sig.len() > 1024 {
2517                    // TODO(felix): better error handling
2518                    return Err(Error(format!(
2519                        "Signature length out of bounds: {} bytes, max 1024 bytes",
2520                        sig.len()
2521                    )));
2522                }
2523
2524                if sig.len() < 1024 {
2525                    sig.resize(1024, 0);
2526                }
2527
2528                (len as u32, sig)
2529            }
2530        };
2531
2532        let sig_len_bytes = sig_len.to_le_bytes().to_vec();
2533
2534        assert_eq!(sig_len_bytes.len(), 4);
2535
2536        // update signature length
2537        let idx_start = idx_end;
2538        let idx_end = idx_start + sig_len_bytes.len();
2539        for (i, c) in (idx_start..idx_end).zip(sig_len_bytes.into_iter()) {
2540            file[i] = c;
2541        }
2542
2543        assert_eq!(signature.len(), 1024);
2544
2545        // update signature
2546        let idx_start = idx_end;
2547        let idx_end = idx_start + signature.len();
2548        for (i, c) in (idx_start..idx_end).zip(signature.into_iter()) {
2549            file[i] = c;
2550        }
2551
2552        Ok(file)
2553    }
2554}
2555
2556/// Information about the package name
2557#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize)]
2558pub enum PackageInfo {
2559    /// External dependency, ex. `"abc": "https://myhost.io/package/abc@1.2.3"`
2560    External { name: String, url: Url },
2561    /// External dependency that depends on a registry for resolving the file
2562    /// URL, ex. `"abc": "package/abc@1.2.3"`
2563    RegistryExternal { name: String, id: String },
2564    /// Internal (vendored) dependency
2565    Internal {
2566        dependency_path: String,
2567        name: String,
2568    },
2569}
2570
2571#[cfg(feature = "crypto")]
2572fn create_signature(cert: &Cert, message: &[u8]) -> Result<Vec<u8>, Error> {
2573    use sequoia_openpgp::policy::StandardPolicy as P;
2574    use sequoia_openpgp::serialize::stream::Message;
2575    use sequoia_openpgp::serialize::stream::Signer;
2576    use std::io::Write;
2577
2578    let policy = &P::new();
2579
2580    let keypair = cert
2581        .keys()
2582        .unencrypted_secret()
2583        .with_policy(policy, None)
2584        .supported()
2585        .alive()
2586        .revoked(false)
2587        .for_signing()
2588        .next()
2589        .unwrap()
2590        .key()
2591        .clone()
2592        .into_keypair()
2593        .map_err(|e| Error(format!("{e}")))?;
2594
2595    let mut target = Vec::new();
2596    let sink = Message::new(&mut target);
2597
2598    let mut signer = Signer::new(sink, keypair)
2599        .detached()
2600        .build()
2601        .map_err(|e| Error(format!("{e}")))?;
2602    signer
2603        .write_all(message)
2604        .map_err(|e| Error(format!("{e}")))?;
2605    signer.finalize().map_err(|e| Error(format!("{e}")))?;
2606
2607    Ok(target)
2608}
2609
2610// Verifies the signature of a .webc file where checksum = computed checksum of the
2611// file with zeroed signature + signature, public_key = the public key to verify against
2612#[cfg(feature = "crypto")]
2613fn verify_signature(
2614    checksum: &[u8],
2615    signature: &[u8],
2616    public_key: &Cert,
2617) -> Result<bool, anyhow::Error> {
2618    use sequoia_openpgp::parse::Parse;
2619    use sequoia_openpgp::policy::StandardPolicy as P;
2620
2621    let policy = &P::new();
2622
2623    // Make a helper that that feeds the sender's
2624    // public key to the verifier.
2625    let helper = CertVerifier { cert: public_key };
2626
2627    // Now, create a verifier with a helper using the given Certs.
2628    let mut verifier =
2629        DetachedVerifierBuilder::from_bytes(signature)?.with_policy(policy, None, helper)?;
2630
2631    // Verify the data.
2632    verifier.verify_bytes(checksum)?;
2633
2634    Ok(true)
2635}
2636
2637#[cfg(feature = "crypto")]
2638struct CertVerifier<'a> {
2639    cert: &'a Cert,
2640}
2641
2642#[cfg(feature = "crypto")]
2643impl<'a> VerificationHelper for CertVerifier<'a> {
2644    /// Impl to return the public keys for verification based on the given handle
2645    fn get_certs(
2646        &mut self,
2647        _ids: &[sequoia_openpgp::KeyHandle],
2648    ) -> sequoia_openpgp::Result<Vec<Cert>> {
2649        Ok(vec![self.cert.clone()])
2650    }
2651
2652    /// Impl to verify the signature with the public key
2653    fn check(&mut self, structure: MessageStructure<'_>) -> sequoia_openpgp::Result<()> {
2654        let mut good = false;
2655
2656        for (i, layer) in structure.into_iter().enumerate() {
2657            match (i, layer) {
2658                (0, MessageLayer::SignatureGroup { results }) => match results.into_iter().next() {
2659                    Some(Ok(_)) => good = true,
2660                    Some(Err(e)) => return Err(sequoia_openpgp::Error::from(e).into()),
2661                    None => return Err(anyhow::anyhow!("No signature")),
2662                },
2663                _ => return Err(anyhow::anyhow!("Unexpected message structure")),
2664            }
2665        }
2666
2667        if !good {
2668            return Err(anyhow::anyhow!("Signature verification failed"));
2669        }
2670
2671        Ok(())
2672    }
2673}
2674
2675pub type FileMap = BTreeMap<DirOrFile, Vec<u8>>;
2676
2677pub fn pack_directory(dir: &Path) -> Result<FileMap, String> {
2678    let mut files = BTreeMap::new();
2679
2680    // by default, this builder will ignore:
2681    // - entries in .git/info/exclude
2682    // - entries in .gitignore
2683    // - hidden files
2684    let walker = ignore::WalkBuilder::new(dir).build();
2685
2686    for entry in walker {
2687        let entry = entry.as_ref().map_err(|e| format!("{entry:?}: {e}"))?;
2688
2689        let original_path = entry.path();
2690        let path = original_path.strip_prefix(dir).unwrap_or(original_path);
2691        let file_str = path.display().to_string();
2692        if file_str.is_empty() {
2693            continue;
2694        }
2695
2696        if original_path.is_dir() {
2697            files.insert(DirOrFile::Dir(path.to_path_buf()), Vec::new());
2698        } else {
2699            let file_contents =
2700                std::fs::read(original_path).map_err(|e| format!("{file_str:?}: {e}"))?;
2701            files.insert(DirOrFile::File(path.to_path_buf()), file_contents);
2702        }
2703    }
2704
2705    Ok(files)
2706}
2707
2708#[cfg(test)]
2709mod tests {
2710    use std::io::Write;
2711
2712    use super::*;
2713    use tempfile::tempdir;
2714    use FsEntryType::*;
2715
2716    #[test]
2717    fn ignore_hidden_and_git_related() {
2718        let root = tempdir().unwrap();
2719
2720        let _hidden = std::fs::File::create(root.path().join(".hidden")).unwrap();
2721
2722        let _git = std::fs::File::create(root.path().join(".git")).unwrap();
2723
2724        let mut gitignore = std::fs::File::create(root.path().join(".gitignore")).unwrap();
2725        gitignore.write_all(b"ignore_me").unwrap();
2726
2727        let _ignore_me = std::fs::File::create(root.path().join("ignore_me")).unwrap();
2728
2729        let _include_me = std::fs::File::create(root.path().join("include_me")).unwrap();
2730
2731        let map = pack_directory(root.path()).unwrap();
2732
2733        // These files must be excluded:
2734        // - .git
2735        // - .gitignore
2736        // - .hidden
2737        // - ignore_me
2738        assert_eq!(map.len(), 1);
2739        assert!(map.contains_key(&DirOrFile::File("include_me".parse().unwrap())));
2740    }
2741
2742    #[test]
2743    fn serialize_header_entry() {
2744        let entry = HeaderEntry {
2745            flags: Flags::File,
2746            offset_start: 23,
2747            offset_end: 1024,
2748            text: "file.txt".parse().unwrap(),
2749        };
2750
2751        let mut buffer = Vec::new();
2752        entry.write_to(&mut buffer);
2753
2754        assert_bytes_eq!(
2755            buffer,
2756            bytes! {
2757                text_length("file.txt"),
2758                Flags::File,
2759                23_u64.to_le_bytes(),
2760                1024_u64.to_le_bytes(),
2761                "file.txt",
2762            }
2763        );
2764    }
2765
2766    #[test]
2767    fn test_specialsort_append_to_target() {
2768        let mut map = BTreeMap::new();
2769
2770        map.insert(
2771            DirOrFile::File(Path::new("10.txt").to_path_buf()),
2772            b"hello".to_vec(),
2773        );
2774        map.insert(
2775            DirOrFile::File(Path::new("104.txt").to_path_buf()),
2776            b"hello".to_vec(),
2777        );
2778        map.insert(DirOrFile::Dir(Path::new("a100").to_path_buf()), Vec::new());
2779        map.insert(DirOrFile::Dir(Path::new("a101").to_path_buf()), Vec::new());
2780        map.insert(
2781            DirOrFile::File(Path::new("a101/test.txt").to_path_buf()),
2782            b"hello".to_vec(),
2783        );
2784        map.insert(
2785            DirOrFile::File(Path::new("file1.txt").to_path_buf()),
2786            b"hello".to_vec(),
2787        );
2788        map.insert(
2789            DirOrFile::File(Path::new("file4.txt").to_path_buf()),
2790            b"hello".to_vec(),
2791        );
2792        map.insert(
2793            DirOrFile::File(Path::new("file2.txt").to_path_buf()),
2794            b"hello".to_vec(),
2795        );
2796
2797        let volume_bytes = Volume::serialize_files(map);
2798        let volume = Volume::parse(&volume_bytes).unwrap();
2799        assert_eq!(
2800            volume.get_all_file_entries_directorysorted(),
2801            vec![
2802                (
2803                    DirOrFile::Dir(Path::new("a100").to_path_buf()),
2804                    FsEntry {
2805                        fs_type: Dir,
2806                        text: Cow::Borrowed("a100"),
2807                        offset_start: 224,
2808                        offset_end: 224
2809                    }
2810                ),
2811                (
2812                    DirOrFile::Dir(Path::new("a101").to_path_buf()),
2813                    FsEntry {
2814                        fs_type: Dir,
2815                        text: Cow::Borrowed("a101"),
2816                        offset_start: 224,
2817                        offset_end: 264
2818                    }
2819                ),
2820                (
2821                    DirOrFile::File(Path::new("a101/test.txt").to_path_buf()),
2822                    FsEntry {
2823                        fs_type: File,
2824                        text: Cow::Borrowed("test.txt"),
2825                        offset_start: 10,
2826                        offset_end: 15
2827                    }
2828                ),
2829                (
2830                    DirOrFile::File(Path::new("10.txt").to_path_buf()),
2831                    FsEntry {
2832                        fs_type: File,
2833                        text: Cow::Borrowed("10.txt"),
2834                        offset_start: 0,
2835                        offset_end: 5
2836                    }
2837                ),
2838                (
2839                    DirOrFile::File(Path::new("104.txt").to_path_buf()),
2840                    FsEntry {
2841                        fs_type: File,
2842                        text: Cow::Borrowed("104.txt"),
2843                        offset_start: 5,
2844                        offset_end: 10
2845                    }
2846                ),
2847                (
2848                    DirOrFile::File(Path::new("file1.txt").to_path_buf()),
2849                    FsEntry {
2850                        fs_type: File,
2851                        text: Cow::Borrowed("file1.txt"),
2852                        offset_start: 15,
2853                        offset_end: 20
2854                    }
2855                ),
2856                (
2857                    DirOrFile::File(Path::new("file2.txt").to_path_buf()),
2858                    FsEntry {
2859                        fs_type: File,
2860                        text: Cow::Borrowed("file2.txt"),
2861                        offset_start: 20,
2862                        offset_end: 25
2863                    }
2864                ),
2865                (
2866                    DirOrFile::File(Path::new("file4.txt").to_path_buf()),
2867                    FsEntry {
2868                        fs_type: File,
2869                        text: Cow::Borrowed("file4.txt"),
2870                        offset_start: 25,
2871                        offset_end: 30
2872                    }
2873                ),
2874            ]
2875        );
2876    }
2877
2878    #[test]
2879    fn webc_invalid_data() {
2880        let content = WebC::parse(b"Nweb", &ParseOptions::default());
2881        pretty_assertions::assert_eq!(
2882            content.unwrap_err().0.as_str(),
2883            "Invalid WebC file (can\'t get magic header)"
2884        );
2885
2886        let content = WebC::parse(b"\0webc0x1", &ParseOptions::default());
2887        pretty_assertions::assert_eq!(content.unwrap_err().0.as_str(), "Version not supported");
2888
2889        let content = WebC::parse(b"\0webc001", &ParseOptions::default());
2890        pretty_assertions::assert_eq!(
2891            content.unwrap_err().0.as_str(),
2892            "Failed to get checksum type at offset 8..24"
2893        );
2894
2895        pretty_assertions::assert_eq!(
2896            WebC::compute_checksum(b"\0webc001----------------"),
2897            Ok(None)
2898        );
2899
2900        let content = WebC::parse(b"\0webc001----------------", &ParseOptions::default());
2901        pretty_assertions::assert_eq!(
2902            content.unwrap_err().0.as_str(),
2903            "Failed to get signature length at offset 280..284"
2904        );
2905    }
2906
2907    #[test]
2908    fn test_encode_decode_file_entry() {
2909        use crate::v1::FsEntryType::*;
2910        use std::borrow::Cow;
2911        let entries = vec![
2912            FsEntry {
2913                fs_type: Dir,
2914                text: Cow::Borrowed("a"),
2915                offset_start: 58,
2916                offset_end: 91,
2917            },
2918            FsEntry {
2919                fs_type: Dir,
2920                text: Cow::Borrowed("b"),
2921                offset_start: 91,
2922                offset_end: 91,
2923            },
2924        ];
2925
2926        pretty_assertions::assert_eq!(
2927            FsEntry::parse(&FsEntry::into_bytes(&entries).unwrap_or_default()),
2928            entries
2929        );
2930    }
2931
2932    #[test]
2933    fn test_volume() {
2934        let mut files = BTreeMap::new();
2935        files.insert(
2936            DirOrFile::File(Path::new("/a/c/file.txt").to_path_buf()),
2937            b"hello".to_vec(),
2938        );
2939        files.insert(DirOrFile::Dir(Path::new("/b").to_path_buf()), Vec::new());
2940        let volume_bytes = Volume::serialize_files(files);
2941        let volume = Volume::parse(&volume_bytes).unwrap();
2942        pretty_assertions::assert_eq!(volume.get_file("/a/c/file.txt"), Ok(&b"hello"[..]));
2943    }
2944
2945    #[test]
2946    fn test_encode_decode_webc() {
2947        let mut files = BTreeMap::new();
2948        files.insert(
2949            DirOrFile::File(Path::new("atom.wasm").to_path_buf()),
2950            b"atom wasm content".to_vec(),
2951        );
2952        let atom_volume = Volume::serialize_atoms(files);
2953        let atom_volume = Volume::parse(&atom_volume).unwrap();
2954
2955        let mut files = BTreeMap::new();
2956        files.insert(
2957            DirOrFile::File(Path::new("dependency.txt").to_path_buf()),
2958            b"dependency!".to_vec(),
2959        );
2960        let file_volume = Volume::serialize_files(files);
2961        let file_volume = Volume::parse(&file_volume).unwrap();
2962
2963        let webc = WebC {
2964            version: 1,
2965            checksum: None,
2966            signature: Some(Signature {
2967                valid_until: 1024,
2968                valid: false,
2969                data: Vec::new(),
2970            }),
2971            manifest: Manifest {
2972                origin: None,
2973                use_map: IndexMap::default(),
2974                package: IndexMap::default(),
2975                atoms: IndexMap::default(),
2976                commands: IndexMap::default(),
2977                bindings: Vec::new(),
2978                entrypoint: None,
2979            },
2980            atoms: atom_volume,
2981            volumes: {
2982                let mut map = IndexMap::default();
2983                map.insert("files".to_string(), file_volume);
2984                map
2985            },
2986        };
2987
2988        let bytes = webc.into_bytes(GenerateChecksum::NoChecksum).unwrap();
2989
2990        pretty_assertions::assert_eq!(WebC::parse(&bytes, &ParseOptions::default()).unwrap(), webc);
2991    }
2992
2993    #[test]
2994    fn test_insert_wrong_file() {
2995        let volume_bytes = Volume::serialize_files(
2996            [(
2997                DirOrFile::File(Path::new("/a/b/c/test.txt").to_path_buf()),
2998                b"hello".to_vec(),
2999            )]
3000            .iter()
3001            .map(|(a, b)| (a.clone(), b.clone()))
3002            .collect(),
3003        );
3004
3005        let volume = Volume::parse(&volume_bytes).unwrap();
3006        assert_eq!(
3007            volume.header.top_level,
3008            vec![FsEntry {
3009                fs_type: FsEntryType::Dir,
3010                text: Cow::Borrowed("a"),
3011                offset_start: 33,
3012                offset_end: 66,
3013            }]
3014        );
3015
3016        let mut volumes = IndexMap::new();
3017        volumes.insert("atom".to_string(), volume);
3018
3019        let atom_volume_bytes = Volume::serialize_atoms(
3020            [(DirOrFile::File("path/to/a".into()), b"".to_vec())]
3021                .iter()
3022                .map(|(a, b)| (a.clone(), b.clone()))
3023                .collect(),
3024        );
3025
3026        let file = WebC {
3027            version: 1,
3028            checksum: None,
3029            signature: None,
3030            manifest: Manifest::default(),
3031            atoms: Volume::parse(&atom_volume_bytes).unwrap(),
3032            volumes,
3033        };
3034
3035        assert_eq!(
3036            file.get_file(&file.get_package_name(), "/a/b/c/test.txt"),
3037            Ok(&b"hello"[..])
3038        );
3039    }
3040
3041    #[test]
3042    fn test_walk_volume() {
3043        let volume = Volume::serialize_files({
3044            let mut map = BTreeMap::new();
3045            map.insert(
3046                DirOrFile::File(Path::new("test.txt").to_path_buf()),
3047                Vec::new(),
3048            );
3049            map.insert(DirOrFile::Dir(Path::new("a").to_path_buf()), Vec::new());
3050            map.insert(
3051                DirOrFile::File(Path::new("a/tmp2.txt").to_path_buf()),
3052                Vec::new(),
3053            );
3054            map
3055        });
3056        let volume = Volume::parse(&volume).unwrap();
3057        let files = volume.walk().collect::<Vec<_>>();
3058
3059        assert_eq!(
3060            files,
3061            vec![
3062                DirOrFile::File(Path::new("test.txt").to_path_buf()),
3063                DirOrFile::Dir(Path::new("a").to_path_buf()),
3064                DirOrFile::File(Path::new("a/tmp2.txt").to_path_buf()),
3065            ]
3066        )
3067    }
3068
3069    #[test]
3070    fn test_serialize_deserialize_volumes() {
3071        let mut volumes = IndexMap::new();
3072
3073        let volume_a_bytes = Volume::serialize_files(
3074            [(
3075                DirOrFile::File(Path::new("test.txt").to_path_buf()),
3076                b"hello".to_vec(),
3077            )]
3078            .iter()
3079            .map(|(a, b)| (a.clone(), b.clone()))
3080            .collect(),
3081        );
3082        let volume_b_bytes = Volume::serialize_files(
3083            [(
3084                DirOrFile::File(Path::new("test2.txt").to_path_buf()),
3085                b"hello2".to_vec(),
3086            )]
3087            .iter()
3088            .map(|(a, b)| (a.clone(), b.clone()))
3089            .collect(),
3090        );
3091
3092        volumes.insert("a".to_string(), Volume::parse(&volume_a_bytes).unwrap());
3093        volumes.insert("b".to_string(), Volume::parse(&volume_b_bytes).unwrap());
3094        let file = WebC {
3095            version: 1,
3096            checksum: None,
3097            signature: None,
3098            manifest: Manifest::default(),
3099            atoms: Volume::parse(&volume_b_bytes).unwrap(),
3100            volumes,
3101        };
3102
3103        let volume_serialized = file.get_volumes_as_fileblock();
3104        let volumes_parsed = WebC::parse_volumes_from_fileblock(&volume_serialized).unwrap();
3105        assert_eq!(volumes_parsed["a"].get_file("test.txt"), Ok(&b"hello"[..]));
3106    }
3107}