Skip to main content

webc/v1/
mod.rs

1//! Parsing code for v1 of the WEBC format.
2
3use std::{
4    borrow::Cow,
5    collections::{BTreeMap, BTreeSet},
6    fmt,
7    io::{Read, Seek},
8    ops::Deref,
9    path::{Component, Path, PathBuf},
10    result,
11};
12
13use crate::indexmap::IndexMap;
14use base64::{Engine, prelude::BASE64_STANDARD};
15use bytes::Bytes;
16use serde::{Deserialize, Serialize};
17use sha2::Digest;
18use shared_buffer::OwnedBuffer;
19use url::Url;
20
21#[cfg(feature = "crypto")]
22use sequoia_openpgp::{
23    Cert,
24    parse::stream::{DetachedVerifierBuilder, MessageLayer, MessageStructure, VerificationHelper},
25};
26
27use crate::{
28    MAGIC, Version,
29    metadata::{Manifest, UrlOrManifest, annotations::Emscripten},
30};
31
32/// Container file, lazily parsed from a set of `&'data [u8]` bytes
33#[derive(Debug, Clone, PartialEq)]
34pub struct WebC<'data> {
35    /// Version of the file format
36    pub version: u64,
37    /// Parsed checksum (optional in case of no encoded checksum)
38    pub checksum: Option<Checksum>,
39    /// Parsed signature (optional if file was not signed)
40    pub signature: Option<Signature>,
41    /// Manifest of the file, see section `ยง2.3.1` of the spec
42    pub manifest: Manifest,
43    /// Executable files, indexed into one volume (`a.wasm` => `a`, `b.wasm` => `b@0.2.1`)
44    pub atoms: Volume<'data>,
45    /// Filesystem volumes: default volume name is `atom` (containing files of the current package)
46    /// and `user/package@version` for external dependencies. Every dependency can be sandboxed to only
47    /// access its own filesystem volume, not external ones.
48    pub volumes: IndexMap<String, Volume<'data>>,
49}
50
51/// Memory-mapped version of the WebC file that
52/// carries its data along the parsed `WebC<'static>`
53#[derive(Debug, Clone)]
54pub struct WebCMmap {
55    pub webc_hash: [u8; 32],
56    /// WebC file, referencing the memory-mapped backed data
57    pub webc: WebC<'static>,
58    /// Note: The `webc` field has references into this shared state, so make
59    /// sure we don't drop it prematurely.
60    #[allow(dead_code)]
61    pub(crate) buffer: OwnedBuffer,
62}
63
64impl Deref for WebCMmap {
65    type Target = WebC<'static>;
66    fn deref(&self) -> &Self::Target {
67        &self.webc
68    }
69}
70
71impl WebCMmap {
72    /// Same as `WebC::parse`, but uses a memory-mapped file
73    pub fn parse(path: impl AsRef<Path>, options: &ParseOptions) -> ReadResult<Self> {
74        let path = path.as_ref();
75
76        std::fs::File::open(path)
77            .map_err(|e| Error(e.to_string()))
78            .and_then(|f| WebCMmap::from_file(f, options))
79            .map_err(|e| Error(format!("Could not open {}: {e}", path.display())))
80    }
81
82    pub fn from_file(mut file: std::fs::File, options: &ParseOptions) -> ReadResult<Self> {
83        let mut data = Vec::new();
84        file.read_to_end(&mut data)
85            .map_err(|e| Error(format!("Failed to read file: {e}")))?;
86        file.seek(std::io::SeekFrom::Start(0))
87            .map_err(|e| Error(format!("File to seek to the start of the file: {e}")))?;
88
89        let webc_hash: [u8; 32] = sha2::Sha256::digest(data.as_slice()).into();
90
91        let buffer = OwnedBuffer::from_file(&file).map_err(|e| Error(e.to_string()))?;
92
93        let webc = WebC::parse(&buffer, options)?;
94        // Safety: transmute the lifetime away. This is unsound. See the
95        // comments in WebcOwned::parse() for more.
96        let webc: WebC<'static> = unsafe { std::mem::transmute(webc) };
97
98        Ok(Self {
99            webc_hash,
100            webc,
101            buffer,
102        })
103    }
104
105    pub fn webc_hash(&self) -> Option<[u8; 32]> {
106        Some(self.webc_hash)
107    }
108
109    pub fn as_webc_ref(&self) -> WebC<'_> {
110        self.webc.clone()
111    }
112}
113
114/// Owned version of the WebC file that carries its data
115/// along the parsed `WebC<'static>`
116#[derive(Debug, Clone)]
117pub struct WebCOwned {
118    webc_hash: [u8; 32],
119    pub webc: WebC<'static>,
120    #[allow(dead_code)]
121    pub(crate) backing_data: Bytes,
122}
123
124impl WebCOwned {
125    /// Same as `WebC::parse`, but keeps the resulting `data` in memory,
126    /// instead of referencing it
127    pub fn parse(data: impl Into<Bytes>, options: &ParseOptions) -> ReadResult<Self> {
128        let data: Bytes = data.into();
129
130        let webc_hash: [u8; 32] = sha2::Sha256::digest(&data).into();
131
132        let webc = WebC::parse(&data, options)?;
133        // Safety: We're transmuting the lifetime away here because WebCOwned is
134        // technically a self-referential struct.
135        // This is unsound because we implement Deref and make the field public
136        // and it is possible to get a reference to something inside the WebC,
137        // drop this WebCOwned, then trigger a use-after-free bug... but, fixing
138        // it would require reworking a bunch of downstream code and that's not
139        // possible at the moment.
140        let webc: WebC<'static> = unsafe { std::mem::transmute(webc) };
141        Ok(Self {
142            webc_hash,
143            webc,
144            backing_data: data,
145        })
146    }
147
148    pub fn webc_hash(&self) -> Option<[u8; 32]> {
149        Some(self.webc_hash)
150    }
151
152    pub fn as_webc_ref(&self) -> WebC<'_> {
153        self.webc.clone()
154    }
155}
156
157impl Deref for WebCOwned {
158    type Target = WebC<'static>;
159    fn deref(&self) -> &Self::Target {
160        &self.webc
161    }
162}
163
164/// The error type used within the read module.
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub struct Error(pub String);
167
168impl fmt::Display for Error {
169    #[inline]
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        f.write_str(self.0.as_str())
172    }
173}
174
175impl std::error::Error for Error {}
176
177/// The result type used within the read module.
178pub type ReadResult<T> = result::Result<T, Error>;
179
180/// Calculated checksum of the file
181#[derive(Clone, PartialEq, Eq)]
182pub struct Checksum {
183    /// (crate-internal): how many bytes of the signature
184    /// are valid, how many are padding
185    pub valid_until: usize,
186    /// Type of checksum (16 bytes long, `------------`, `sha256----------`, etc.)
187    pub chk_type: String,
188    /// Data of the checksum bytes, 256 bytes long
189    pub data: Vec<u8>,
190    /// Whether the checksum has been validated during `WebC::parse`
191    pub valid: bool,
192}
193
194#[derive(Serialize)]
195struct DisplayableChecksum {
196    valid: bool,
197    chk_type: String,
198    data: String,
199}
200
201impl fmt::Debug for DisplayableChecksum {
202    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203        let json = serde_json::to_string_pretty(self).unwrap_or_default();
204        write!(f, "{json}")
205    }
206}
207
208impl fmt::Debug for Checksum {
209    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
210        let mut clone = self.clone();
211        clone.data.truncate(self.valid_until);
212        let base64 = BASE64_STANDARD.encode(&clone.data);
213        let displayable = DisplayableChecksum {
214            valid: self.valid,
215            chk_type: self.chk_type.clone(),
216            data: base64,
217        };
218        displayable.fmt(f)
219    }
220}
221
222/// Signature of the checksum of the file, such that
223/// `verify(WebC::get_checksum(), public_key)` is valid
224#[derive(Clone, PartialEq, Eq)]
225pub struct Signature {
226    /// (crate-internal): how many bytes of the signature
227    /// are valid, how many are padding
228    pub valid_until: usize,
229    /// Data of the signature
230    pub data: Vec<u8>,
231    /// Whether the signature has been checked to be valid
232    /// during parsing
233    pub valid: bool,
234}
235
236#[derive(Serialize)]
237struct DisplayableSignature {
238    valid: bool,
239    data: String,
240}
241
242impl fmt::Debug for DisplayableSignature {
243    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
244        let json = serde_json::to_string_pretty(self).unwrap_or_default();
245        write!(f, "{json}")
246    }
247}
248
249impl fmt::Debug for Signature {
250    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
251        let mut clone = self.clone();
252        clone.data.truncate(self.valid_until);
253        let base64 = BASE64_STANDARD.encode(&clone.data);
254        let displayable = DisplayableSignature {
255            valid: self.valid,
256            data: base64,
257        };
258        displayable.fmt(f)
259    }
260}
261
262/// Filesystem volume, containing the uncompressed files in an ordered directory structure
263#[derive(Default, Clone, PartialEq, Eq)]
264pub struct Volume<'data> {
265    /// Header, storing all the offsets and file names in order
266    pub header: VolumeHeader<'data>,
267    /// Volume filesystem
268    pub data: &'data [u8],
269}
270
271impl<'data> fmt::Debug for Volume<'data> {
272    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
273        self.header.fmt(f)?;
274        write!(f, "\r\ndata: [ ... ({} bytes) ]", self.data.len())
275    }
276}
277
278/// Specifies whether an input path is a directory or a file
279/// (since this distinction can't be made from the filename alone)
280#[derive(Clone, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)]
281pub enum DirOrFile {
282    Dir(PathBuf),
283    File(PathBuf),
284}
285
286impl fmt::Display for DirOrFile {
287    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288        write!(f, "{}", self.get_path_buf().display())
289    }
290}
291
292impl DirOrFile {
293    /// Returns the `PathBuf` of the `DirOrFile`
294    pub fn get_path_buf(&self) -> &PathBuf {
295        match &self {
296            DirOrFile::Dir(d) | DirOrFile::File(d) => d,
297        }
298    }
299
300    /// Returns all the "Normal" components of the PathBuf, note that
301    /// non-normal (such as ".", symlinks, etc. components) are ignored
302    pub fn components(&self) -> Vec<String> {
303        self.get_path_buf()
304            .components()
305            .filter_map(|c| match c {
306                Component::Normal(c) => Some(c.to_str()?.to_string()),
307                _ => None,
308            })
309            .collect()
310    }
311
312    /// Returns whether the `FileOrDir` is a directory
313    #[must_use]
314    pub fn is_dir(&self) -> bool {
315        match self {
316            DirOrFile::Dir(_) => true,
317            DirOrFile::File(_) => false,
318        }
319    }
320}
321
322impl<'a> Volume<'a> {
323    /// Serialize an atom volume.
324    ///
325    /// This is essentially [`Volume::serialize_files()`], but it will modify
326    /// the input files to uphold several atom-specific invariants - namely
327    /// that each atom is addressable by its module name. This means:
328    ///
329    /// - All atoms are hoisted to the top level folder
330    /// - Extensions are removed from filenames
331    pub fn serialize_atoms(files: BTreeMap<DirOrFile, Vec<u8>>) -> Vec<u8> {
332        let mut rewritten_files = BTreeMap::new();
333
334        for (entry, data) in files {
335            // Note: we want to ignore all directories, and strip the dirname
336            // and extension from any files.
337            if let DirOrFile::File(path) = entry
338                && let Some(filename) = path.file_name()
339            {
340                rewritten_files.insert(DirOrFile::File(filename.into()), data);
341            }
342        }
343
344        Volume::serialize_files(rewritten_files)
345    }
346
347    /// Create a volume from a set of initial files
348    pub fn serialize_files(files: BTreeMap<DirOrFile, Vec<u8>>) -> Vec<u8> {
349        // Input:
350        //
351        // /a/c/file.txt: [... text file with 10000 bytes ...], false
352        // /b:            [], true (empty directory)
353
354        // strip the "/" prefix from all paths
355        let files = files
356            .into_iter()
357            .map(|(path, file)| {
358                let new_path = match path.get_path_buf().strip_prefix("/") {
359                    Ok(o) => o.to_path_buf(),
360                    Err(_) => path.get_path_buf().clone(),
361                };
362
363                (new_path, (file, path.is_dir()))
364            })
365            .collect::<BTreeMap<_, _>>();
366
367        let mut volume_content = Vec::new();
368        let mut file_path_offsets = BTreeMap::new();
369
370        // all files including parent directories
371        //
372        // [/, /a, /b, /a/c, /a/c/file.txt]
373        let mut all_files = BTreeMap::new();
374        for (path, (_, is_dir)) in files.iter() {
375            all_files.insert(path.clone(), *is_dir);
376
377            let mut components = path
378                .components()
379                .filter_map(|r| match r {
380                    std::path::Component::Normal(n) => Some(n.to_str().unwrap_or("").to_string()),
381                    _ => None,
382                })
383                .collect::<Vec<_>>();
384
385            if !is_dir {
386                components.pop();
387            }
388
389            while !components.is_empty() {
390                let parent_path = components.clone().join("/");
391                let path = Path::new(&parent_path).to_path_buf();
392                all_files.insert(path, true);
393                components.pop();
394            }
395        }
396
397        for (path, (mut file, is_dir)) in files.into_iter() {
398            if !is_dir {
399                // path is a file
400                let cursor = volume_content.len();
401                let file_len = file.len();
402                volume_content.append(&mut file);
403                file_path_offsets.insert(path.clone(), (cursor, cursor + file_len));
404            }
405        }
406
407        // 0: ["/"]
408        // 1: ["/a", "/b", "/c"]
409        // 2: ["/a/c"]
410        // 3: ["/a/c/file.txt"]
411        let mut files_grouped_by_level = BTreeMap::new();
412        for (path, is_dir) in all_files.iter() {
413            let num_parents = path.ancestors().count().saturating_sub(2);
414            files_grouped_by_level
415                .entry(num_parents)
416                .or_insert_with(Vec::new)
417                .push((path.clone(), *is_dir));
418        }
419
420        // For every level: get how many items in the next directory
421        // start with the current path. Pre-sort the files in the subdirectory
422        //
423        // 1: [("a", ["a/c"]), ("b", [])]
424        // 2: [("a/c", ["a/c/file.txt"])]
425        // 3: [("a/c/file.txt", [])]
426        let mut directories_by_level_with_entrycount = BTreeMap::new();
427        for (level, paths) in files_grouped_by_level.iter() {
428            for (path, is_dir) in paths {
429                let mut files_in_directory =
430                    if !files_grouped_by_level.contains_key(&(level + 1)) || !is_dir {
431                        Vec::new()
432                    } else {
433                        files_grouped_by_level[&(level + 1)]
434                            .iter()
435                            .filter(|(next_level_entry, _next_level_is_dir)| {
436                                next_level_entry.starts_with(path)
437                            })
438                            .cloned()
439                            .collect()
440                    };
441
442                files_in_directory.sort();
443
444                directories_by_level_with_entrycount
445                    .entry(level)
446                    .or_insert_with(Vec::new)
447                    .push(((path.clone(), is_dir), files_in_directory));
448            }
449        }
450
451        // Now sort the directories levels internally
452        //
453        // 1: [("a", ["a/c"]), ("b", [])]
454        // 2: [("a/c", ["a/c/file.txt"])]
455        // 3: [("a/c/file.txt", [])]
456        for (_, paths) in directories_by_level_with_entrycount.iter_mut() {
457            paths.sort_by(|a, b| a.0.cmp(&b.0));
458        }
459
460        // Calculate offsets for the subdirectories
461        //
462        // - full file / directory name
463        // - file / directory name relative to parent
464        // - for each file in subdirectory:
465        //     - full file / directory name
466        //     - file / directory name relative to subdir
467        // - total size of subdirectory in bytes
468        //
469        // 1: (50 bytes directory level size = (2 * 24 bytes + 2 bytes for directory names), [
470        //    ("a", "a", 25),
471        //    ("b", "b", 0)
472        // ])
473        // 2: (25 bytes directory level size = (1 * 24 bytes + 1 byte for directory name), [
474        //    ("a/c", "c", 32),
475        // ])
476        // 3: (32 bytes directory level size = (1 * 24 bytes + 8 bytes for the file name), [
477        //    ("a/c/file.txt", "file.txt", 0)
478        // ])
479        let mut byte_size_of_each_level: BTreeMap<usize, _> = BTreeMap::new();
480
481        for (level, entries) in directories_by_level_with_entrycount.iter() {
482            let mut byte_size_of_level = entries
483                .iter()
484                .map(|((e, _), _)| get_parent(e))
485                .collect::<BTreeSet<_>>()
486                .len()
487                * 8;
488
489            let mut entries_subdir: Vec<(&PathBuf, String, usize)> = Vec::new();
490
491            for ((entry_name, _is_dir), subdir) in entries.iter() {
492                let entry_name_last_component = match get_last_component(entry_name) {
493                    Some(s) => s.to_string(),
494                    None => continue,
495                };
496
497                byte_size_of_level += entry_name_last_component.len() + 24;
498
499                let mut subdir_size = subdir
500                    .iter()
501                    .map(|(e, _)| get_parent(e))
502                    .collect::<BTreeSet<_>>()
503                    .len()
504                    * 8;
505
506                for (sub, _sub_is_dir) in subdir.iter() {
507                    // /a/c/file.txt => "file.txt"
508                    let subdir_last_component = match get_last_component(sub) {
509                        Some(s) => s.to_string(),
510                        None => continue,
511                    };
512                    subdir_size += subdir_last_component.len() + 24;
513                }
514
515                entries_subdir.push((entry_name, entry_name_last_component, subdir_size));
516            }
517
518            byte_size_of_each_level.insert(**level, (byte_size_of_level, entries_subdir));
519        }
520
521        // Now construct the directory level [FileEntry] bytes and encode them
522        //
523        // [
524        //    [FsEntry::Dir, "a", start: (8 + 50), end: (8 + 50) + (8 + 25)]
525        //    [FsEntry::Dir, "b", start: (8 + 50) + (8 + 25), end: (8 + 50) + (8 + 25)] (= empty directory)
526        // ],
527        // [
528        //    [FsEntry::Dir, "c", start: (8 + 50) + (8 + 25), end: (8 + 50) + (8 + 25) + (8 + 32)]
529        // ],
530        // [
531        //    [Fs::Entry::File, "file.txt", start: 0, end: 10000 ]
532        // ]
533        let mut levels = Vec::new();
534        let mut cursor = 0;
535        for (_, (dir_level_bytes, dir_level)) in byte_size_of_each_level.iter() {
536            // calculate at which byte offset in the header the next directory level will start
537            // 8 bytes reserved for directory level size
538            let next_level_start = cursor + dir_level_bytes;
539
540            let mut cur_level = Vec::new();
541            let mut next_dir_level_cursor = 0;
542
543            for (full_name, dir_or_file_name, subdir_len_bytes) in dir_level.iter() {
544                match file_path_offsets.get(&**full_name) {
545                    Some((start, end)) => {
546                        // path is a file, nothing to do
547                        cur_level.push((
548                            full_name,
549                            HeaderEntry {
550                                flags: Flags::File,
551                                text: dir_or_file_name.parse().unwrap(),
552                                offset_start: (*start as u64),
553                                offset_end: (*end as u64),
554                            },
555                        ));
556                    }
557                    None => {
558                        // path is a directory that potentially has subdirectories
559                        cur_level.push((
560                            full_name,
561                            HeaderEntry {
562                                flags: Flags::Dir,
563                                text: dir_or_file_name.parse().unwrap(),
564                                offset_start: next_level_start as u64 + next_dir_level_cursor,
565                                offset_end: next_level_start as u64
566                                    + next_dir_level_cursor
567                                    + (*subdir_len_bytes as u64),
568                            },
569                        ));
570                        next_dir_level_cursor += *subdir_len_bytes as u64;
571                    }
572                }
573            }
574
575            levels.push(cur_level);
576            cursor = next_level_start;
577        }
578
579        let mut header = Vec::new();
580
581        for fs_entries in levels.iter() {
582            let mut current_level = Vec::new();
583
584            let (mut current_dir, mut entries) = match fs_entries.first() {
585                Some((full_name, e)) => (get_parent(full_name), vec![e.clone()]),
586                None => continue,
587            };
588
589            for (full_name, entry) in fs_entries.iter().skip(1) {
590                let parent_of_current_entry = get_parent(full_name);
591
592                // each time the `current_dir` changes (for example from "/a/b/c" to "/a/b/d",
593                // we have to start a new directory section)
594                if parent_of_current_entry != current_dir {
595                    let mut buffer = Vec::new();
596                    for entry in entries.drain(..) {
597                        entry.write_to(&mut buffer);
598                    }
599                    current_level.extend(u64::try_from(buffer.len()).unwrap().to_le_bytes());
600                    current_level.extend(buffer);
601                    current_dir = parent_of_current_entry;
602                }
603                entries.push(entry.clone());
604            }
605
606            if !entries.is_empty() {
607                let mut buffer = Vec::new();
608                for entry in entries.drain(..) {
609                    entry.write_to(&mut buffer);
610                }
611                current_level.extend(u64::try_from(buffer.len()).unwrap().to_le_bytes());
612                current_level.extend(buffer);
613            }
614
615            header.extend(current_level);
616        }
617
618        let mut total = to_leb(header.len() as u64);
619        total.extend_from_slice(&header);
620        total.append(&mut volume_content);
621
622        total
623    }
624
625    /// Returns all files and directories with the corresponding `FsEntry`
626    pub fn get_all_file_and_dir_entries(
627        &'a self,
628    ) -> Result<BTreeMap<DirOrFile, FsEntry<'a>>, Error> {
629        let mut target = BTreeMap::new();
630        let mut levels = vec![(PathBuf::new(), self.header.top_level.clone())];
631
632        while !levels.is_empty() {
633            let mut next_levels = Vec::new();
634
635            for (parent_path, entries) in levels.iter() {
636                for entry in entries {
637                    let real_path = parent_path.clone().join(&*entry.text);
638                    let offset_start: usize =
639                        entry.offset_start.try_into().unwrap_or(u32::MAX as usize);
640                    let offset_end: usize =
641                        entry.offset_end.try_into().unwrap_or(u32::MAX as usize);
642
643                    match entry.fs_type {
644                        FsEntryType::File => {
645                            target.insert(DirOrFile::File(real_path.clone()), entry.clone());
646                        }
647                        FsEntryType::Dir => {
648                            let next_level_entries =
649                                FsEntry::parse(&self.header.header_data[offset_start..offset_end]);
650                            target.insert(DirOrFile::Dir(real_path.clone()), entry.clone());
651                            next_levels.push((real_path.clone(), next_level_entries));
652                        }
653                    }
654                }
655            }
656
657            levels = next_levels;
658        }
659
660        Ok(target)
661    }
662
663    /// Returns all entries in a "tree" sorted structure, i.e.
664    /// sorted in the same way you'd see the files in a tree explorer
665    pub fn get_all_file_entries_recursivesorted(&'a self) -> RecursiveFsEntryDir<'a> {
666        let mut target = RecursiveFsEntryDir {
667            name: "/".to_string(),
668            contents: Vec::new(),
669        };
670        let dir_entries = Self::specialsort_dir(&self.header.top_level[..]);
671        append_entries_recursive(self.header.header_data, dir_entries, &mut target);
672        target
673    }
674
675    /// Returns all entries in a "tree" sorted structure, i.e.
676    /// sorted in the same way you'd see the files in a tree explorer
677    pub fn get_all_file_entries_directorysorted(&'a self) -> Vec<(DirOrFile, FsEntry<'a>)> {
678        let mut target = Vec::new();
679
680        Self::specialsort_append_to_target(
681            PathBuf::new(),
682            &self.header.top_level,
683            self.header.header_data,
684            &mut target,
685        );
686
687        target
688    }
689
690    fn specialsort_append_to_target(
691        parent_path: PathBuf,
692        entries: &[FsEntry<'a>],
693        data: &'a [u8],
694        target: &mut Vec<(DirOrFile, FsEntry<'a>)>,
695    ) {
696        let dir_entries = entries
697            .iter()
698            .filter(|f| f.fs_type == FsEntryType::Dir)
699            .cloned()
700            .collect::<Vec<_>>();
701        let dir_entries = Self::specialsort_dir(&dir_entries);
702        for entry in dir_entries {
703            target.push((
704                DirOrFile::Dir(parent_path.join(entry.text.as_ref())),
705                entry.clone(),
706            ));
707            let offset_start: usize = entry.offset_start.try_into().unwrap_or(u32::MAX as usize);
708            let offset_end: usize = entry.offset_end.try_into().unwrap_or(u32::MAX as usize);
709            let fs_entry_bytes = match get_byte_slice(data, offset_start, offset_end) {
710                Some(s) => s,
711                None => {
712                    println!("cannot get byte slice");
713                    continue;
714                }
715            };
716            let dir_entries = FsEntry::parse(fs_entry_bytes);
717            Self::specialsort_append_to_target(
718                parent_path.join(entry.text.as_ref()),
719                &dir_entries,
720                data,
721                target,
722            );
723        }
724
725        let file_entries = entries
726            .iter()
727            .filter(|f| f.fs_type == FsEntryType::File)
728            .cloned()
729            .collect::<Vec<_>>();
730        let file_entries = Self::specialsort_dir(&file_entries);
731
732        for entry in file_entries {
733            target.push((
734                DirOrFile::File(parent_path.join(entry.text.as_ref())),
735                entry.clone(),
736            ));
737        }
738    }
739
740    fn specialsort_dir(entries: &[FsEntry<'a>]) -> Vec<FsEntry<'a>> {
741        use lexical_sort::lexical_cmp;
742
743        let mut dirs = entries
744            .iter()
745            .filter(|e| e.fs_type == FsEntryType::Dir)
746            .cloned()
747            .collect::<Vec<_>>();
748        dirs.sort_by(|a, b| lexical_cmp(a.text.as_ref(), b.text.as_ref()));
749
750        let mut files = entries
751            .iter()
752            .filter(|e| e.fs_type == FsEntryType::File)
753            .cloned()
754            .collect::<Vec<_>>();
755        files.sort_by(|a, b| lexical_cmp(a.text.as_ref(), b.text.as_ref()));
756
757        dirs.append(&mut files);
758        dirs
759    }
760
761    /// Generic walk function that walks recursively over the files and
762    /// calls a callback function with `self.data` on every entry.
763    pub fn walk<'b>(&'b self) -> VolumeIterator<'a, 'b> {
764        let parent = PathBuf::new();
765        VolumeIterator {
766            volume: self,
767            entries: Self::specialsort_dir(&self.header.top_level)
768                .iter()
769                .map(|v| match v.fs_type {
770                    FsEntryType::File => DirOrFile::File(parent.join(v.text.as_ref())),
771                    FsEntryType::Dir => DirOrFile::Dir(parent.join(v.text.as_ref())),
772                })
773                .collect(),
774        }
775    }
776
777    /// Returns all the files in this volume, indexed by the full path
778    /// (in unix fashion, i.e. "/", "/a", "/b/file.txt")
779    pub fn get_all_files_and_directories_with_bytes(
780        &self,
781    ) -> Result<BTreeSet<DirOrFileWithBytes<'_>>, Error> {
782        self.get_all_file_and_dir_entries()?
783            .into_iter()
784            .map(|(path, entry)| {
785                if entry.fs_type == FsEntryType::File {
786                    let offset_start: usize = entry
787                        .offset_start
788                        .try_into()
789                        .map_err(|e| Error(format!("{e}: {path}")))?;
790                    let offset_end: usize = entry
791                        .offset_end
792                        .try_into()
793                        .map_err(|e| Error(format!("{e}: {path}")))?;
794                    let data = self.data.get(offset_start..offset_end).ok_or_else(|| {
795                        Error(format!(
796                            "could not get data {offset_start}..{offset_end}: {path}"
797                        ))
798                    })?;
799                    Ok(DirOrFileWithBytes::File {
800                        path: path.get_path_buf().clone(),
801                        bytes: data,
802                    })
803                } else {
804                    Ok(DirOrFileWithBytes::Dir {
805                        path: path.get_path_buf().clone(),
806                    })
807                }
808            })
809            .collect()
810    }
811
812    /// Returns the number of files in this volume
813    pub fn count_files(&self) -> u64 {
814        let mut cursor = 0;
815        let mut num_files = 0;
816        while cursor < self.header.header_data.len() {
817            let next_directory_level = FsEntry::parse(&self.header.header_data[cursor..]);
818            num_files += next_directory_level
819                .iter()
820                .filter(|f| f.fs_type == FsEntryType::File)
821                .count() as u64;
822            cursor += FsEntry::calculate_byte_length(&next_directory_level);
823        }
824        num_files
825    }
826
827    /// Returns the number of directories in this volume
828    pub fn count_directories(&self) -> u64 {
829        let mut cursor = 0;
830        let mut num_files = 0;
831        while cursor < self.header.header_data.len() {
832            let next_directory_level = FsEntry::parse(&self.header.header_data[cursor..]);
833            num_files += next_directory_level
834                .iter()
835                .filter(|f| f.fs_type == FsEntryType::Dir)
836                .count() as u64;
837            cursor += FsEntry::calculate_byte_length(&next_directory_level);
838        }
839        num_files
840    }
841
842    pub fn list_directories(&self) -> Vec<String> {
843        self.get_all_file_and_dir_entries()
844            .unwrap_or_default()
845            .iter()
846            .filter_map(|(path, _)| match path {
847                DirOrFile::Dir(d) => Some(format!("{}", d.display())),
848                DirOrFile::File(_) => None,
849            })
850            .collect()
851    }
852
853    /// Parses a filesystem volume from a buffer of bytes
854    pub fn parse(data: &'a [u8]) -> Result<Self, Error> {
855        let leb_size = get_leb_size(data).ok_or(Error(
856            "Error parsing volume: could not read header size LEB128".to_string(),
857        ))?;
858
859        if data.len() < leb_size {
860            return Err(Error(format!(
861                "Error parsing volume: expected at least {leb_size} bytes, got {}",
862                data.len()
863            )));
864        }
865
866        let header_len: usize = from_leb(data)
867            .ok_or(Error(format!(
868                "Could not read header length from data (first {leb_size} bytes)"
869            )))?
870            .try_into()
871            .unwrap_or(usize::MAX);
872
873        if data.len() < header_len + leb_size {
874            return Err(Error(format!(
875                "Error parsing volume: expected at least {} bytes, got only {}",
876                header_len + leb_size,
877                data.len()
878            )));
879        }
880
881        let (header, data) = data[leb_size..].split_at(header_len);
882
883        let header = VolumeHeader::from_slice(header);
884
885        Ok(Self { header, data })
886    }
887
888    /// Returns file entries for `$path`
889    pub fn read_dir(&self, path: &str) -> Result<Vec<FsEntry<'a>>, Error> {
890        // removes redundant ".", "..", etc
891        let clean = path_clean::clean(path);
892
893        let mut components = Path::new(&clean)
894            .components()
895            .filter_map(|s| match s {
896                Component::Normal(s) => s.to_str(),
897                _ => None,
898            })
899            .collect::<Vec<_>>();
900
901        components.reverse();
902
903        let mut directory_to_search = self.header.top_level.clone();
904
905        while let Some(searched_directory_name) = components.pop() {
906            let found = match directory_to_search
907                .binary_search_by(|probe| (*probe.text).cmp(searched_directory_name))
908            {
909                Ok(i) => directory_to_search[i].clone(),
910                Err(_) => {
911                    return Err(Error(format!(
912                        "Could not find directory {clean:?}: could not find  directory {searched_directory_name:?} (os error 2)"
913                    )));
914                }
915            };
916
917            let offset_start: usize = found.offset_start.try_into().unwrap_or(u32::MAX as usize);
918            let offset_end: usize = found.offset_end.try_into().unwrap_or(u32::MAX as usize);
919
920            match found.fs_type {
921                FsEntryType::File => {
922                    return Err(Error(format!(
923                        "Could not find directory {clean:?} (os error 2)"
924                    )));
925                }
926                FsEntryType::Dir => {
927                    if offset_start == offset_end {
928                        directory_to_search = Vec::new();
929                    } else {
930                        let next_dir_level_to_decode = get_byte_slice(self.header.header_data, offset_start, offset_end)
931                        .ok_or(Error(format!("Could not find directory {clean:?}: could not decode directory {searched_directory_name:?} at byte offset {offset_start}..{offset_end} (os error -2)")))?;
932
933                        directory_to_search = FsEntry::parse(next_dir_level_to_decode);
934                    }
935                }
936            }
937        }
938
939        Ok(directory_to_search)
940    }
941
942    /// Returns the file entry for `$path`. Note that this does not
943    /// return the file contents directly, use `volume.get_file(path)` instead.
944    ///
945    /// # Errors
946    ///
947    /// Returns an error if the file is a directory.
948    pub fn get_file_entry(&self, path: &str) -> Result<OwnedFsEntryFile, Error> {
949        let clean = path_clean::clean(path); // removes redundant ".", "..", etc
950
951        let mut components = Path::new(&clean)
952            .components()
953            .filter_map(|s| match s {
954                Component::Normal(s) => s.to_str(),
955                _ => None,
956            })
957            .collect::<Vec<_>>();
958
959        components.reverse();
960
961        let mut directory_to_search = self.header.top_level.clone();
962
963        while let Some(searched_directory_name) = components.pop() {
964            let found = match directory_to_search
965                .binary_search_by(|probe| (*probe.text).cmp(searched_directory_name))
966            {
967                Ok(i) => directory_to_search[i].clone(),
968                Err(_) => {
969                    return Err(Error(format!(
970                        "Could not find file {clean:?}: could not find file or directory {searched_directory_name:?} (os error 2)"
971                    )));
972                }
973            };
974
975            let offset_start: usize = found.offset_start.try_into().unwrap_or(u32::MAX as usize);
976            let offset_end: usize = found.offset_end.try_into().unwrap_or(u32::MAX as usize);
977
978            match found.fs_type {
979                FsEntryType::File => {
980                    if !components.is_empty() {
981                        return Err(Error(format!("Could not find file {clean:?} (os error 2)")));
982                    }
983
984                    return Ok(OwnedFsEntryFile {
985                        text: path.to_string(),
986                        offset_start: offset_start as u64,
987                        offset_end: offset_end as u64,
988                    });
989                }
990                FsEntryType::Dir => {
991                    if offset_start == offset_end {
992                        directory_to_search = Vec::new();
993                    } else {
994                        let next_dir_level_to_decode = get_byte_slice(self.header.header_data, offset_start, offset_end)
995                        .ok_or(Error(format!("Could not find file {clean:?}: could not decode directory {searched_directory_name:?} at byte offset {offset_start}..{offset_end} (os error -2)")))?;
996
997                        directory_to_search = FsEntry::parse(next_dir_level_to_decode);
998                    }
999                }
1000            }
1001        }
1002
1003        Err(Error(format!("Could not find file {clean:?} (os error 2)")))
1004    }
1005
1006    /// Given an already-existing `OwnedFsEntryFile`, returns the byte slice for this
1007    /// file entry.
1008    ///
1009    /// # Errors
1010    ///
1011    /// The function returns an error if the file entry is out of bounds of the
1012    /// underlying data slice (should never happen)
1013    pub fn get_file_bytes(&self, entry: &OwnedFsEntryFile) -> Result<&'a [u8], Error> {
1014        static EMPTY_SLICE: &[u8] = &[];
1015
1016        let offset_start = entry.offset_start.try_into().unwrap_or(u32::MAX as usize);
1017        let offset_end = entry.offset_end.try_into().unwrap_or(u32::MAX as usize);
1018
1019        // empty file
1020        if offset_start == offset_end {
1021            return Ok(EMPTY_SLICE);
1022        }
1023
1024        get_byte_slice(self.data, offset_start, offset_end).ok_or(Error(format!(
1025            "Could not file file {:?} - filesystem corrupt at {}..{} (os error -1)",
1026            entry.text, entry.offset_start, entry.offset_end
1027        )))
1028    }
1029
1030    /// Returns the file contents (shorthand for
1031    /// `volume.get_file_bytes(volume.get_file_entry(path))`)
1032    pub fn get_file(&'a self, path: &str) -> Result<&'a [u8], Error> {
1033        let owned_file_entry = self.get_file_entry(path)?;
1034        self.get_file_bytes(&owned_file_entry)
1035    }
1036
1037    /// Serializes the volume into writable bytes (including
1038    /// the header and header length)
1039    pub fn into_bytes(&self) -> Vec<u8> {
1040        // TODO(felix): avoid extra allocation?
1041        let mut out = Vec::new();
1042        out.extend_from_slice(&to_leb(self.header.header_data.len() as u64));
1043        out.extend_from_slice(self.header.header_data);
1044        out.extend_from_slice(self.data);
1045        out
1046    }
1047}
1048
1049#[derive(Debug, Clone, PartialEq, Eq)]
1050struct HeaderEntry {
1051    flags: Flags,
1052    offset_start: u64,
1053    offset_end: u64,
1054    text: String,
1055}
1056
1057impl HeaderEntry {
1058    fn write_to(&self, buffer: &mut Vec<u8>) {
1059        // Note: The reference implementation diverges from the spec
1060        // here - the flag should actually go first.
1061        buffer.extend(self.text_length());
1062        buffer.extend(self.flags.as_bytes());
1063
1064        buffer.extend(self.offset_start.to_le_bytes());
1065        buffer.extend(self.offset_end.to_le_bytes());
1066        buffer.extend(self.text.as_bytes());
1067    }
1068
1069    fn text_length(&self) -> [u8; 7] {
1070        text_length(&self.text)
1071    }
1072}
1073
1074fn text_length(text: &str) -> [u8; 7] {
1075    let length = u64::try_from(text.len()).unwrap();
1076    let [head @ .., last] = length.to_le_bytes();
1077    assert_eq!(
1078        last,
1079        0,
1080        "Text length of {} is out of bounds (max = 2^56 = 72,057,594,037,927,936) for text {:?}",
1081        text.len(),
1082        &text[..250],
1083    );
1084    head
1085}
1086
1087#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
1088pub(crate) enum Flags {
1089    Dir = 0b00,
1090    File = 0b01,
1091}
1092
1093impl Flags {
1094    pub(crate) fn as_bytes(self) -> [u8; 1] {
1095        [self as u8]
1096    }
1097}
1098
1099/// Iterator over the paths in the file, yields PathBufs
1100/// until all files in the volume have been listed.
1101#[derive(Debug)]
1102pub struct VolumeIterator<'b, 'a: 'b> {
1103    pub volume: &'b Volume<'a>,
1104    pub entries: Vec<DirOrFile>,
1105}
1106
1107impl<'a, 'b> Iterator for VolumeIterator<'a, 'b> {
1108    type Item = DirOrFile;
1109
1110    fn next(&mut self) -> Option<Self::Item> {
1111        let next = self.entries.pop();
1112
1113        if let Some(DirOrFile::Dir(d)) = next.as_ref() {
1114            self.entries.extend(
1115                Volume::specialsort_dir(
1116                    &self
1117                        .volume
1118                        .read_dir(&format!("/{}", d.display()))
1119                        .unwrap_or_default(),
1120                )
1121                .iter()
1122                .map(|v| match v.fs_type {
1123                    FsEntryType::File => DirOrFile::File(d.join(v.text.as_ref())),
1124                    FsEntryType::Dir => DirOrFile::Dir(d.join(v.text.as_ref())),
1125                }),
1126            );
1127        }
1128
1129        next
1130    }
1131}
1132
1133#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Ord, Eq)]
1134pub enum DirOrFileWithBytes<'a> {
1135    Dir { path: PathBuf },
1136    File { path: PathBuf, bytes: &'a [u8] },
1137}
1138
1139impl<'a> DirOrFileWithBytes<'a> {
1140    pub fn get_path(&self) -> &PathBuf {
1141        match self {
1142            DirOrFileWithBytes::Dir { path } => path,
1143            DirOrFileWithBytes::File { path, .. } => path,
1144        }
1145    }
1146
1147    pub fn get_bytes(&self) -> Option<&'a [u8]> {
1148        match self {
1149            DirOrFileWithBytes::Dir { .. } => None,
1150            DirOrFileWithBytes::File { bytes, .. } => Some(bytes),
1151        }
1152    }
1153}
1154
1155fn append_entries_recursive<'b>(
1156    header: &'b [u8],
1157    entries: Vec<FsEntry<'b>>,
1158    parent: &mut RecursiveFsEntryDir<'b>,
1159) {
1160    for entry in entries.iter() {
1161        match entry.fs_type {
1162            FsEntryType::Dir => {
1163                let mut subdir = RecursiveFsEntryDir {
1164                    name: entry.text.as_ref().to_string(),
1165                    contents: Vec::new(),
1166                };
1167                let offset_start: usize =
1168                    entry.offset_start.try_into().unwrap_or(u32::MAX as usize);
1169                let offset_end: usize = entry.offset_end.try_into().unwrap_or(u32::MAX as usize);
1170                let fs_entry_bytes = match get_byte_slice(header, offset_start, offset_end) {
1171                    Some(s) => s,
1172                    None => continue,
1173                };
1174                let new_entries = Volume::specialsort_dir(FsEntry::parse(fs_entry_bytes).as_ref());
1175                append_entries_recursive(header, new_entries, &mut subdir);
1176                parent.contents.push(RecursiveFsEntry::Dir { dir: subdir });
1177            }
1178            FsEntryType::File => {
1179                parent.contents.push(RecursiveFsEntry::File {
1180                    file: entry.clone(),
1181                });
1182            }
1183        }
1184    }
1185}
1186
1187/// Since `env::temp_dir()` panics on wasm32-wasi, this
1188/// function provides a non-panicking replacement
1189pub fn webc_temp_dir() -> PathBuf {
1190    #[cfg(not(target_arch = "wasm32"))]
1191    {
1192        std::env::temp_dir()
1193    }
1194    #[cfg(target_arch = "wasm32")]
1195    {
1196        let random = rand::random::<u64>();
1197
1198        let dir = std::env::current_exe()
1199            .unwrap_or(Path::new("").to_path_buf())
1200            .join(&format!("temp-{random}"));
1201
1202        std::fs::create_dir_all(&dir).unwrap();
1203
1204        dir
1205    }
1206}
1207
1208fn to_leb(num: u64) -> Vec<u8> {
1209    let mut buf = Vec::new();
1210    match leb128::write::unsigned(&mut buf, num) {
1211        Ok(_) => buf,
1212        Err(_) => Vec::new(),
1213    }
1214}
1215
1216fn get_parent<P: AsRef<Path>>(path: P) -> String {
1217    match path.as_ref().parent() {
1218        Some(s) => format!("{}", s.display()),
1219        None => String::new(),
1220    }
1221}
1222
1223// Returns how many bytes the LEB128 would take up if it was read
1224fn get_leb_size(bytes: &[u8]) -> Option<usize> {
1225    use std::io::Cursor;
1226    let mut cursor = Cursor::new(bytes);
1227    let initial_pos = cursor.position(); // usually 0
1228    let _ = leb128::read::unsigned(&mut cursor).ok()?;
1229    Some((cursor.position() - initial_pos).min(u32::MAX as u64) as usize)
1230}
1231
1232fn from_leb(mut bytes: &[u8]) -> Option<u64> {
1233    leb128::read::unsigned(&mut bytes).ok()
1234}
1235
1236// /a/b/c => "c"
1237// /a/b/c/file.txt => "file.txt"
1238fn get_last_component(path: &Path) -> Option<&str> {
1239    match path.components().next_back()? {
1240        Component::Normal(s) => s.to_str(),
1241        _ => None,
1242    }
1243}
1244
1245/// Whether the file is a directory or a file
1246#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1247pub enum FsEntryType {
1248    /// File is a file
1249    File,
1250    /// File is a directory entry
1251    Dir,
1252}
1253
1254impl FsEntryType {
1255    /// 8-Bit ID of the file entry type
1256    pub fn get_id(&self) -> u8 {
1257        match self {
1258            FsEntryType::Dir => 0,
1259            FsEntryType::File => 1,
1260        }
1261    }
1262
1263    /// Reverse function of `self.get_id()`
1264    pub fn from_id(id: u8) -> Option<Self> {
1265        match id {
1266            0 => Some(FsEntryType::Dir),
1267            1 => Some(FsEntryType::File),
1268            _ => None,
1269        }
1270    }
1271}
1272
1273#[derive(Debug, PartialEq)]
1274pub struct RecursiveFsEntryDir<'a> {
1275    pub name: String,
1276    pub contents: Vec<RecursiveFsEntry<'a>>,
1277}
1278
1279#[derive(Debug, PartialEq)]
1280pub enum RecursiveFsEntry<'a> {
1281    File { file: FsEntry<'a> },
1282    Dir { dir: RecursiveFsEntryDir<'a> },
1283}
1284
1285/// Same as `FsEntry` but with an owned `text: String`,
1286/// instead of a `&str`
1287#[derive(Debug, Clone, PartialEq)]
1288pub enum OwnedFsEntry {
1289    /// File entry
1290    File(OwnedFsEntryFile),
1291    /// Directory entry
1292    Dir(OwnedFsEntryDir),
1293}
1294
1295impl OwnedFsEntry {
1296    /// Returns the text component of the path, i.e. `"file.txt"` for `/a/b/file.txt`
1297    pub fn get_name(&self) -> &str {
1298        match self {
1299            OwnedFsEntry::File(f) => f.text.as_str(),
1300            OwnedFsEntry::Dir(d) => d.text.as_str(),
1301        }
1302    }
1303}
1304
1305/// Owned version of the `FsEntry` with `fs_type = FsEntryType::File`
1306#[derive(Debug, Clone, PartialEq, Eq)]
1307pub struct OwnedFsEntryFile {
1308    /// Same as `FsEntry::text`, but owned as a `String`
1309    pub text: String,
1310    /// Starting offset in bytes into the `volume.data` field
1311    pub offset_start: u64,
1312    /// Ending offset in bytes into the `volume.data` field
1313    pub offset_end: u64,
1314}
1315
1316impl OwnedFsEntryFile {
1317    pub fn get_len(&self) -> u64 {
1318        self.offset_end.saturating_sub(self.offset_start)
1319    }
1320}
1321
1322/// Owned version of the `FsEntry` with `fs_type = FsEntryType::Dir`
1323#[derive(Debug, Clone, PartialEq)]
1324pub struct OwnedFsEntryDir {
1325    /// Same as `FsEntry::text`, but owned as a `String`
1326    pub text: String,
1327    /// Entries of the directory
1328    pub files: Vec<OwnedFsEntry>,
1329}
1330
1331/// Directory or file entry, parsed without any allocation
1332#[derive(Debug, Clone, PartialEq, Eq)]
1333pub struct FsEntry<'a> {
1334    /// If the `FsEntryType == Dir`, then `offset_start..offset_end` points
1335    /// to the start / end bytes of the next directory level, relative to the
1336    /// file header
1337    ///
1338    /// If the `FsEntryType = File`, then `offset_start..offset_end` points
1339    /// to the actual file contents in the `volume.data` field
1340    ///
1341    /// Inside of a directory level, all files are grouped by the name
1342    /// of the parent directory, at parsing time only the top-level
1343    /// directories are parsed
1344    pub fs_type: FsEntryType,
1345    /// Directory / file name, for example `usr`, `lib` or `var` in `"/usr/lib/var"`
1346    pub text: Cow<'a, str>,
1347    // See documentation for `fs_type`
1348    pub offset_start: u64,
1349    // See documentation for `fs_type`
1350    pub offset_end: u64,
1351}
1352
1353impl<'a> FsEntry<'a> {
1354    /// Returns the length of the file in bytes (0 for directories)
1355    pub fn get_len(&self) -> u64 {
1356        self.offset_end.saturating_sub(self.offset_start)
1357    }
1358
1359    pub fn calculate_byte_length(entries: &[Self]) -> usize {
1360        (entries.len() * 24) + entries.iter().map(|e| e.text.len()).sum::<usize>() + 8
1361    }
1362
1363    /// Serializes a list of `FsEntry` into bytes (usually
1364    /// done to encode one directory level)
1365    ///
1366    /// # Binary format
1367    ///
1368    /// ```no_run,ignore
1369    /// [8 bytes]: size of the directory level itself
1370    ///
1371    /// [
1372    ///   [1 byte]:  file entry type (0 = Directory, 1 = File, .. ?)
1373    ///   [7 bytes]: text length N (only 7 bytes long instead of 8, maximum file
1374    ///              name length = 268435456 instead of 4294967296 bytes)
1375    ///   [8 bytes]: offset_start
1376    ///   [8 bytes]: offset_end
1377    ///   [n bytes]: text (directory / file name)
1378    /// ]
1379    /// ```
1380    pub fn into_bytes(entries: &[Self]) -> Option<Vec<u8>> {
1381        let mut out = Vec::new();
1382
1383        for entry in entries {
1384            let self_text_bytes = entry.text.as_bytes();
1385
1386            // insanely long file name
1387            if self_text_bytes.len() > 268435456 {
1388                return None;
1389            }
1390
1391            let mut text_len_bytes = (self_text_bytes.len() as u64).to_le_bytes();
1392            text_len_bytes[7] = entry.fs_type.get_id(); // 0th byte = least important byte
1393            out.extend_from_slice(&text_len_bytes);
1394            out.extend_from_slice(&entry.offset_start.to_le_bytes());
1395            out.extend_from_slice(&entry.offset_end.to_le_bytes());
1396            out.extend_from_slice(self_text_bytes);
1397        }
1398
1399        let mut final_out = Vec::new();
1400        let len = out.len() as u64;
1401        let bytes_len = len.to_le_bytes();
1402        final_out.extend_from_slice(&bytes_len);
1403        final_out.append(&mut out);
1404
1405        Some(final_out)
1406    }
1407
1408    /// Reverse function of `Self::into_bytes`, parses one directory level
1409    /// from a set of bytes. One additional feature is that not more than `n`
1410    /// bytes are parsed if `n` is the size of the serialized directory level,
1411    /// even if the input buffer is larger than `n`.
1412    ///
1413    /// If the directory level could not be parsed, the parsing is interrupted
1414    /// and the given file entries are returns as-is (no check for completeness)
1415    pub fn parse(data: &'a [u8]) -> Vec<Self> {
1416        let mut entries = Vec::new();
1417
1418        if data.is_empty() || data.len() < 8 {
1419            return entries;
1420        }
1421
1422        // first 8 bytes = data len
1423        let directory_len_bytes = [
1424            data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
1425        ];
1426
1427        let directory_len = u64::from_le_bytes(directory_len_bytes);
1428        let directory_len: usize = directory_len.try_into().unwrap_or(u32::MAX as usize);
1429
1430        if data.len() < directory_len + 8 {
1431            return entries; // technically an error
1432        }
1433
1434        let data = &data[8..directory_len + 8];
1435
1436        let mut cursor = 0;
1437        while cursor < data.len() {
1438            let fs_type = data[cursor + 7]; // 0 = File, 1 = Directory
1439            if (cursor + 24) > data.len() {
1440                break;
1441            }
1442
1443            let text_size = [
1444                data[cursor],
1445                data[cursor + 1],
1446                data[cursor + 2],
1447                data[cursor + 3],
1448                data[cursor + 4],
1449                data[cursor + 5],
1450                data[cursor + 6],
1451                0,
1452            ];
1453            let text_size = u64::from_le_bytes(text_size);
1454
1455            let text_size: usize = text_size.try_into().unwrap_or(u32::MAX as usize);
1456
1457            let offset_start = [
1458                data[cursor + 8],
1459                data[cursor + 9],
1460                data[cursor + 10],
1461                data[cursor + 11],
1462                data[cursor + 12],
1463                data[cursor + 13],
1464                data[cursor + 14],
1465                data[cursor + 15],
1466            ];
1467            let offset_start = u64::from_le_bytes(offset_start);
1468
1469            let offset_end = [
1470                data[cursor + 16],
1471                data[cursor + 17],
1472                data[cursor + 18],
1473                data[cursor + 19],
1474                data[cursor + 20],
1475                data[cursor + 21],
1476                data[cursor + 22],
1477                data[cursor + 23],
1478            ];
1479            let offset_end = u64::from_le_bytes(offset_end);
1480
1481            if (cursor + 24 + text_size) > data.len() {
1482                break; // directory corrupt?
1483            }
1484
1485            let text_result = std::str::from_utf8(&data[cursor + 24..(cursor + 24 + text_size)]);
1486
1487            cursor += 24 + text_size;
1488
1489            let text = match text_result {
1490                Ok(o) => o,
1491                Err(_) => {
1492                    continue;
1493                }
1494            };
1495
1496            let fs_type = match FsEntryType::from_id(fs_type) {
1497                Some(s) => s,
1498                None => {
1499                    continue;
1500                }
1501            };
1502
1503            entries.push(FsEntry {
1504                fs_type,
1505                offset_start,
1506                offset_end,
1507                text: Cow::Borrowed(text),
1508            });
1509        }
1510
1511        entries
1512    }
1513}
1514
1515/// Header of a filesystem volume, describing a serialized
1516/// list of directories and file paths
1517#[derive(Default, Clone, PartialEq, Eq)]
1518pub struct VolumeHeader<'a> {
1519    /// Top-level files / directories already parsed
1520    pub top_level: Vec<FsEntry<'a>>,
1521    /// Unserialized header data as raw bytes
1522    pub header_data: &'a [u8],
1523}
1524
1525impl<'a> fmt::Debug for VolumeHeader<'a> {
1526    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1527        self.top_level.fmt(f)?;
1528        write!(
1529            f,
1530            "\r\nheader_data: [ ... ({} bytes) ],",
1531            self.header_data.len()
1532        )
1533    }
1534}
1535
1536impl<'a> VolumeHeader<'a> {
1537    /// Parses the top-level directory entries from a slice of bytes,
1538    /// see `FsEntry::into_bytes` for information about the binary format
1539    pub fn from_slice(data: &'a [u8]) -> Self {
1540        Self {
1541            top_level: FsEntry::parse(data),
1542            header_data: data,
1543        }
1544    }
1545
1546    /// Same as `&self.header_data`, API for consistency
1547    pub fn into_vec(&self) -> &'a [u8] {
1548        self.header_data
1549    }
1550}
1551
1552/// Whether to sign the bytes when deserializing
1553/// the WebC file to bytes
1554#[derive(Debug, Clone, PartialEq)]
1555#[allow(clippy::large_enum_variant)]
1556pub enum GenerateChecksum {
1557    /// Signature bytes get zeroed
1558    NoChecksum,
1559    /// Sha256 checksum of the file is calculated and padded
1560    /// with zeroes, but no signature is generated
1561    Sha256,
1562    /// Sha256 checksum is generated and the checksum
1563    /// is signed with the given key (cert must be able to
1564    /// sign at least 256 bytes)
1565    #[cfg(feature = "crypto")]
1566    SignedSha256 { key: Cert },
1567}
1568
1569impl GenerateChecksum {
1570    /// Returns the ID for the Checksum type:
1571    ///
1572    /// - no checksum: `----------------`
1573    /// - sha256 checksum: `sha256----------`
1574    /// - sha256 checksum, signed with key: `sha256-signed---`
1575    ///
1576    pub fn get_key(&self) -> Vec<u8> {
1577        match self {
1578            GenerateChecksum::NoChecksum => b"----------------".to_vec(),
1579            GenerateChecksum::Sha256 => b"sha256----------".to_vec(),
1580            #[cfg(feature = "crypto")]
1581            GenerateChecksum::SignedSha256 { .. } => b"sha256-signed---".to_vec(),
1582        }
1583    }
1584}
1585
1586impl Default for GenerateChecksum {
1587    fn default() -> Self {
1588        Self::NoChecksum
1589    }
1590}
1591
1592/// Options on what to parse from the file
1593#[derive(Debug, Clone)]
1594pub struct ParseOptions {
1595    /// If set, will verify the file against the given public key
1596    /// and error out if the key does not match
1597    #[cfg(feature = "crypto")]
1598    pub key: Option<Cert>,
1599    /// If the manifest should be parsed (will be skipped over otherwise)
1600    pub parse_manifest: bool,
1601    /// If the filesystem should be parsed (will be empty otherwise)
1602    pub parse_volumes: bool,
1603    /// If the atoms should be parsed
1604    pub parse_atoms: bool,
1605}
1606
1607impl Default for ParseOptions {
1608    fn default() -> Self {
1609        Self {
1610            #[cfg(feature = "crypto")]
1611            key: None,
1612            parse_manifest: true,
1613            parse_volumes: true,
1614            parse_atoms: true,
1615        }
1616    }
1617}
1618
1619#[allow(clippy::if_same_then_else)]
1620fn get_byte_slice(input: &[u8], start: usize, end: usize) -> Option<&[u8]> {
1621    if start == end && input.len() > start {
1622        Some(&input[start..end])
1623    } else if start < end && input.len() > start && input.len() >= end {
1624        Some(&input[start..end])
1625    } else {
1626        None
1627    }
1628}
1629
1630#[derive(Debug, Serialize, Deserialize)]
1631struct InternalPackageMeta {
1632    name: String,
1633    version: String,
1634}
1635
1636/// Needed to easily deserialize an `WasiCommandAnnotation`
1637/// from the free-form `command.annotations`
1638#[derive(Default, Debug, Clone, Serialize, Deserialize)]
1639struct WasiCommandAnnotationsDeserializer {
1640    #[serde(default)]
1641    wasi: Option<crate::metadata::annotations::Wasi>,
1642}
1643
1644fn get_wasi_command_annotation(
1645    val: &IndexMap<String, ciborium::value::Value>,
1646) -> Option<crate::metadata::annotations::Wasi> {
1647    let desc: WasiCommandAnnotationsDeserializer = ciborium::Value::serialized(val)
1648        .unwrap()
1649        .deserialized()
1650        .unwrap();
1651
1652    desc.wasi
1653}
1654
1655/// Needed to easily deserialize an `EmscriptenCommandAnnotation`
1656/// from the free-form `command.annotations`
1657#[derive(Default, Debug, Clone, Serialize, Deserialize)]
1658struct EmscriptenCommandAnnotationsDeserializer {
1659    #[serde(default)]
1660    emscripten: Option<Emscripten>,
1661}
1662
1663fn get_emscripten_command_annotation(
1664    val: &IndexMap<String, ciborium::Value>,
1665) -> Option<Emscripten> {
1666    let desc: EmscriptenCommandAnnotationsDeserializer = ciborium::Value::serialized(val)
1667        .unwrap()
1668        .deserialized()
1669        .unwrap();
1670    desc.emscripten
1671}
1672
1673impl<'a> WebC<'a> {
1674    pub fn get_main_args_for_command(&self, command: &str) -> Result<Vec<String>, String> {
1675        let command = self
1676            .manifest
1677            .commands
1678            .get(command)
1679            .ok_or(format!("Command {command:?} not found in manifest"))?;
1680
1681        let atom_description =
1682            get_emscripten_command_annotation(&command.annotations).ok_or(format!(
1683                "no \"atom\" or \"wasi.atom\" or \"emscripten.atom\" found in command {command:#?}"
1684            ))?;
1685
1686        let main_args = atom_description.main_args.as_ref().ok_or(format!(
1687            "command {command:?} has no atom to start the command with"
1688        ))?;
1689
1690        Ok(main_args.clone())
1691    }
1692
1693    #[allow(deprecated)]
1694    pub fn get_atom_name_for_command(&self, api: &str, command: &str) -> Result<String, String> {
1695        let command = self
1696            .manifest
1697            .commands
1698            .get(command)
1699            .ok_or(format!("Command {command:?} not found in manifest"))?;
1700
1701        match api {
1702            "emscripten" => {
1703                let atom_description = get_emscripten_command_annotation(&command.annotations).ok_or(format!(
1704                    "no \"atom\" or \"wasi.atom\" or \"emscripten.atom\" found in command {command:#?}"
1705                ))?;
1706
1707                let atom_name = atom_description.atom.as_ref().ok_or(format!(
1708                    "command {command:?} has no atom to start the command with"
1709                ))?;
1710
1711                Ok(atom_name.to_string())
1712            }
1713            "wasi" => {
1714                let wasi = get_wasi_command_annotation(&command.annotations).ok_or(format!(
1715                    "no \"atom\" or \"wasi.atom\" or \"emscripten.atom\" found in command {command:#?}"
1716                ))?;
1717
1718                Ok(wasi.atom)
1719            }
1720            _ => Err(String::new()),
1721        }
1722    }
1723
1724    /// Checks whether the file starts with the header MAGIC
1725    pub fn check_magic_header(data: &[u8]) -> Result<(), Error> {
1726        let magic = get_byte_slice(data, 0, MAGIC.len()).ok_or(Error(
1727            "Invalid WebC file (can't get magic header)".to_string(),
1728        ))?;
1729
1730        if magic != MAGIC {
1731            return Err(Error("Invalid Magic number".into()));
1732        }
1733
1734        Ok(())
1735    }
1736
1737    /// Determines the available volumes for a given package
1738    pub fn get_volumes_for_package(&self, package: &str) -> Vec<String> {
1739        if self.manifest.use_map.is_empty() {
1740            self.volumes.keys().cloned().collect()
1741        } else if package == self.get_package_name() {
1742            self.volumes
1743                .keys()
1744                .filter(|s| s.starts_with("self"))
1745                .cloned()
1746                .collect()
1747        } else {
1748            // TODO: inaccurate!
1749            self.volumes
1750                .keys()
1751                .filter(|s| s.contains(package))
1752                .cloned()
1753                .collect()
1754        }
1755    }
1756
1757    pub fn list_directories(&self, volume: &str) -> Vec<String> {
1758        self.volumes
1759            .get(volume)
1760            .map(|v| v.list_directories())
1761            .unwrap_or_default()
1762    }
1763
1764    /// Returns the directory entries or an error if the directory does not exist
1765    pub fn read_dir(&self, package: &str, path: &str) -> Result<Vec<FsEntry<'a>>, Error> {
1766        for volume in self.get_volumes_for_package(package) {
1767            let v = match self.volumes.get(&volume) {
1768                Some(s) => s,
1769                None => {
1770                    continue;
1771                }
1772            };
1773
1774            match v.read_dir(path) {
1775                Ok(s) => {
1776                    return Ok(s);
1777                }
1778                Err(_) => {
1779                    continue;
1780                }
1781            }
1782        }
1783
1784        Err(Error(format!(
1785            "\"{package}://{path}\" does not exist (os error 2)"
1786        )))
1787    }
1788
1789    /// Looks for the first volume containing "entry", scoped to the given package
1790    pub fn get_file_entry(&self, package: &str, path: &str) -> Option<(String, OwnedFsEntryFile)> {
1791        let mut available_volumes = self.get_volumes_for_package(package);
1792        let mut path = path.to_string();
1793        let mut volume_selected = None;
1794
1795        for v in available_volumes.iter() {
1796            let v_scheme = format!("{v}://");
1797            if path.starts_with(&v_scheme) {
1798                volume_selected = Some(v.clone());
1799                path = path.replacen(&v_scheme, "", 1);
1800                break;
1801            }
1802        }
1803
1804        if let Some(v) = volume_selected.as_ref() {
1805            available_volumes = vec![v.clone()];
1806        }
1807
1808        for volume in available_volumes {
1809            match self
1810                .volumes
1811                .get(&volume)
1812                .and_then(|v| v.get_file_entry(&path).ok())
1813            {
1814                Some(s) => return Some((volume.clone(), s)),
1815                None => continue,
1816            };
1817        }
1818        None
1819    }
1820
1821    /// Checks whether the version of the file is supported by the parsing implementation
1822    pub fn get_check_version(data: &[u8]) -> Result<u64, Error> {
1823        let version = get_byte_slice(data, MAGIC.len(), MAGIC.len() + Version::V1.len()).ok_or(
1824            Error("Invalid WebC version (can't get version)".to_string()),
1825        )?;
1826
1827        if version != Version::V1 {
1828            return Err(Error("Version not supported".into()));
1829        }
1830
1831        let version = std::str::from_utf8(version)
1832            .map_err(|e| Error(format!("Invalid version: {e}")))?
1833            .parse::<u64>()
1834            .map_err(|e| Error(format!("Invalid version: {e}")))?;
1835
1836        Ok(version)
1837    }
1838
1839    /// Returns the bytes of the checksum
1840    pub fn get_checksum_bytes(data: &[u8]) -> Result<&[u8], Error> {
1841        get_byte_slice(
1842            data,
1843            MAGIC.len() + Version::V1.len() + 16,
1844            MAGIC.len() + Version::V1.len() + 16 + 256,
1845        )
1846        .ok_or(Error(
1847            "Invalid WebC checksum (can't get checksum)".to_string(),
1848        ))
1849    }
1850
1851    /// Returns the offset of the manifest start
1852    pub fn get_manifest_offset_size(data: &[u8]) -> ReadResult<(usize, usize)> {
1853        let (signature_offset, _) = Self::get_signature_offset_size(data)?;
1854        let manifest_start = signature_offset + 1024;
1855
1856        if data.get(manifest_start).is_none() {
1857            return Err(Error(format!(
1858                "Could not get manifest: data.len() < {manifest_start}"
1859            )));
1860        }
1861
1862        let manifest_size_len = get_leb_size(&data[manifest_start..]).ok_or(Error(format!(
1863            "could not read LEB128 for manifest length at offset {manifest_start}"
1864        )))?;
1865
1866        // actually parse the bytes
1867        let manifest_len = from_leb(&data[manifest_start..]).ok_or(Error(format!(
1868            "could not read LEB128 for manifest length at offset {manifest_start}"
1869        )))?;
1870
1871        Ok((
1872            manifest_start + manifest_size_len,
1873            manifest_len.try_into().unwrap_or(u32::MAX as usize),
1874        ))
1875    }
1876
1877    pub fn get_manifest(data: &[u8]) -> Result<Manifest, Error> {
1878        let (manifest_len_start, manifest_size) = Self::get_manifest_offset_size(data)?;
1879
1880        let manifest = get_byte_slice(data, manifest_len_start, manifest_len_start + manifest_size)
1881            .ok_or(Error(
1882                "Invalid WebC manifest (can't get manifest bytes)".to_string(),
1883            ))?;
1884
1885        ciborium::from_reader(manifest).map_err(|e| Error(format!("Failed to parse manifest: {e}")))
1886    }
1887
1888    /// Returns the offset of the `.atoms` section of the file
1889    pub fn get_atoms_volume_offset_size(data: &[u8]) -> ReadResult<(usize, usize)> {
1890        let (manifest_offset, manifest_size) = Self::get_manifest_offset_size(data)?;
1891
1892        let atom_start = manifest_offset + manifest_size;
1893        if data.get(atom_start).is_none() {
1894            return Err(Error(format!(
1895                "Could not get atom: data.len() < {atom_start}"
1896            )));
1897        }
1898
1899        let atom_size_len = get_leb_size(&data[atom_start..]).ok_or(Error(format!(
1900            "could not read LEB128 for atom length at offset {atom_start}"
1901        )))?;
1902
1903        let atom_len = from_leb(&data[atom_start..]).ok_or(Error(format!(
1904            "could not read LEB128 for atom length at offset {atom_start}"
1905        )))?;
1906
1907        Ok((
1908            atom_start + atom_size_len,
1909            atom_len.try_into().unwrap_or(u32::MAX as usize),
1910        ))
1911    }
1912
1913    /// Parses the `.atoms` section of the file
1914    pub fn get_atoms_volume(data: &'a [u8]) -> Result<Volume<'a>, Error> {
1915        let (atoms_volume_start, atoms_volume_size) = Self::get_atoms_volume_offset_size(data)?;
1916
1917        let atoms_volume = get_byte_slice(
1918            data,
1919            atoms_volume_start,
1920            atoms_volume_start + atoms_volume_size,
1921        )
1922        .ok_or(Error(
1923            "Invalid WebC atoms (can't get atoms volume bytes)".to_string(),
1924        ))?;
1925
1926        Volume::parse(atoms_volume).map_err(|e| Error(format!("Failed to parse atoms: {e}")))
1927    }
1928
1929    /// Returns the offsets of the "volume"
1930    pub fn get_volume_data_offsets(data: &[u8]) -> Result<BTreeMap<String, (usize, usize)>, Error> {
1931        let mut results = BTreeMap::new();
1932        let (atoms_volume_start, atoms_volume_size) = Self::get_atoms_volume_offset_size(data)?;
1933        let mut cursor = atoms_volume_start + atoms_volume_size;
1934        let mut volume_id = 0;
1935
1936        while get_byte_slice(data, cursor, data.len()).is_some() {
1937            let volume_name_len_len = get_leb_size(&data[cursor..]).ok_or(Error(format!(
1938                "Could not parse volume size length for volume {volume_id}"
1939            )))?;
1940
1941            let volume_name_bytes_len = from_leb(&data[cursor..]).ok_or(Error(format!(
1942                "Could not parse volume size for volume {volume_id}"
1943            )))?;
1944
1945            let volume_name_bytes_len: usize = volume_name_bytes_len
1946                .try_into()
1947                .unwrap_or(u32::MAX as usize);
1948
1949            let start = cursor + volume_name_len_len;
1950            let end = start + volume_name_bytes_len;
1951            let volume_name_bytes = get_byte_slice(data, start, end)
1952                .ok_or(Error(format!("Failed to parse name of volume {volume_id:?}: Expected {volume_name_bytes_len} bytes at offset {start}..{end}")))?;
1953
1954            let volume_name = std::str::from_utf8(volume_name_bytes)
1955            .map_err(|e| Error(format!("Failed to parse name of volume {volume_id:?} at offset {start}..{end}: {e}: {volume_name_bytes:?}")))?;
1956
1957            let volume_size_start = end;
1958            let _ = get_byte_slice(data, volume_size_start, data.len())
1959            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start}")))?;
1960
1961            let volume_size_len = get_leb_size(&data[volume_size_start..])
1962            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start}")))?;
1963            let volume_size_end = volume_size_start + volume_size_len;
1964            let volume_size = from_leb(&data[volume_size_start..])
1965            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start} + {volume_size_len}")))?;
1966
1967            let volume_size: usize = volume_size.try_into().unwrap_or(u32::MAX as usize);
1968            let volume_start = volume_size_end;
1969            let volume_end = volume_start + volume_size;
1970
1971            let leb_size = get_leb_size(&data[volume_start..volume_end]).ok_or(Error(
1972                "Error parsing volume: could not read header size LEB128".to_string(),
1973            ))?;
1974
1975            let header_len: usize = from_leb(&data[volume_start..volume_end])
1976                .ok_or(Error(format!(
1977                    "Could not read header length from data (first {leb_size} bytes)"
1978                )))?
1979                .try_into()
1980                .unwrap_or(usize::MAX);
1981
1982            let volume_start = volume_start + leb_size + header_len;
1983
1984            results.insert(volume_name.to_string(), (volume_start, volume_end));
1985            cursor = volume_end;
1986            volume_id += 1;
1987        }
1988
1989        Ok(results)
1990    }
1991
1992    pub fn parse_volumes_from_fileblock(
1993        data: &'a [u8],
1994    ) -> ReadResult<IndexMap<String, Volume<'a>>> {
1995        let mut map = IndexMap::new();
1996        let mut volume_id = 0;
1997        let mut cursor = 0;
1998
1999        while get_byte_slice(data, cursor, data.len()).is_some() {
2000            let volume_name_len_len = get_leb_size(&data[cursor..]).ok_or(Error(format!(
2001                "Could not parse volume size length for volume {volume_id}"
2002            )))?;
2003
2004            let volume_name_bytes_len = from_leb(&data[cursor..]).ok_or(Error(format!(
2005                "Could not parse volume size for volume {volume_id}"
2006            )))?;
2007
2008            let volume_name_bytes_len: usize = volume_name_bytes_len
2009                .try_into()
2010                .unwrap_or(u32::MAX as usize);
2011
2012            let start = cursor + volume_name_len_len;
2013            let end = start + volume_name_bytes_len;
2014            let volume_name_bytes = get_byte_slice(data, start, end)
2015                .ok_or(Error(format!("Failed to parse name of volume {volume_id:?}: Expected {volume_name_bytes_len} bytes at offset {start}..{end}")))?;
2016
2017            let volume_name = std::str::from_utf8(volume_name_bytes)
2018            .map_err(|e| Error(format!("Failed to parse name of volume {volume_id:?} at offset {start}..{end}: {e}: {volume_name_bytes:?}")))?;
2019
2020            let volume_size_start = end;
2021            let _ = get_byte_slice(data, volume_size_start, data.len())
2022            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start}")))?;
2023
2024            let volume_size_len = get_leb_size(&data[volume_size_start..])
2025            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start}")))?;
2026            let volume_size_end = volume_size_start + volume_size_len;
2027            let volume_size = from_leb(&data[volume_size_start..])
2028            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected LEB128 at offset {volume_size_start} + {volume_size_len}")))?;
2029
2030            let volume_size: usize = volume_size.try_into().unwrap_or(u32::MAX as usize);
2031            let volume_start = volume_size_end;
2032            let volume_end = volume_start + volume_size;
2033            let volume_bytes = get_byte_slice(data, volume_start, volume_end)
2034            .ok_or(Error(format!("Failed to parse size of volume {volume_name:?}: Expected {volume_size} bytes at offset {volume_start}..{volume_end}")))?;
2035
2036            let volume = Volume::parse(volume_bytes).map_err(|e| {
2037                Error(format!(
2038                    "Failed to parse volume {volume_name:?} (size = {volume_size} bytes): {e}"
2039                ))
2040            })?;
2041
2042            map.insert(volume_name.to_string(), volume);
2043
2044            cursor = volume_end;
2045            volume_id += 1;
2046        }
2047
2048        Ok(map)
2049    }
2050
2051    /// Parses the `.volumes` section(s) of the file
2052    pub fn parse_volumes(data: &'a [u8]) -> ReadResult<IndexMap<String, Volume<'a>>> {
2053        let (atoms_volume_start, atoms_volume_size) = Self::get_atoms_volume_offset_size(data)?;
2054        let cursor = atoms_volume_start + atoms_volume_size;
2055        match get_byte_slice(data, cursor, data.len()) {
2056            Some(s) => Self::parse_volumes_from_fileblock(s),
2057            None => Ok(IndexMap::new()),
2058        }
2059    }
2060
2061    /// Computes the checksum of the file without cloning it
2062    pub fn compute_checksum(data: &[u8]) -> ReadResult<Option<Checksum>> {
2063        use sha2::Sha256;
2064
2065        let min_offset = MAGIC.len() + Version::V1.len();
2066        let max_offset = min_offset + 16;
2067        let checksum_type = get_byte_slice(data, min_offset, max_offset).ok_or(Error(format!(
2068            "Failed to get checksum type at offset {min_offset}..{max_offset}"
2069        )))?;
2070
2071        match checksum_type {
2072            b"----------------" => Ok(None),
2073            b"sha256----------" | b"sha256-signed---" => {
2074                let mut hasher = Sha256::new();
2075
2076                hasher.update(MAGIC);
2077                hasher.update(Version::V1);
2078                hasher.update(checksum_type);
2079                hasher.update([0; 256]);
2080                hasher.update([0; 4]);
2081                hasher.update([0; 1024]);
2082
2083                if data.len() > MAGIC.len() + Version::V1.len() + 16 + 256 + 4 + 1024 {
2084                    hasher.update(&data[(MAGIC.len() + Version::V1.len() + 16 + 256 + 4 + 1024)..]);
2085                };
2086
2087                let mut result = hasher.finalize().to_vec();
2088                let valid_until = result.len();
2089
2090                if result.len() < 256 {
2091                    result.resize(256, 0);
2092                }
2093
2094                let chk_type = std::str::from_utf8(checksum_type).unwrap().to_string();
2095
2096                Ok(Some(Checksum {
2097                    valid_until,
2098                    chk_type,
2099                    data: result,
2100                    valid: false,
2101                }))
2102            }
2103            _ => Err(Error(format!(
2104                "Invalid checksum type: {:?}",
2105                std::str::from_utf8(checksum_type)
2106            ))),
2107        }
2108    }
2109
2110    pub const fn get_signature_offset_start() -> usize {
2111        MAGIC.len() + Version::V1.len() + 16 + 256
2112    }
2113
2114    /// Returns the offset of the signature
2115    pub fn get_signature_offset_size(data: &[u8]) -> ReadResult<(usize, usize)> {
2116        let signature_offset_start = Self::get_signature_offset_start();
2117        let signature_size_bytes =
2118            get_byte_slice(data, signature_offset_start, signature_offset_start + 4).ok_or(
2119                Error(format!(
2120                    "Failed to get signature length at offset {signature_offset_start}..{}",
2121                    signature_offset_start + 4
2122                )),
2123            )?;
2124
2125        let signature_len_u32 = u32::from_le_bytes([
2126            signature_size_bytes[0],
2127            signature_size_bytes[1],
2128            signature_size_bytes[2],
2129            signature_size_bytes[3],
2130        ]);
2131
2132        let signature_len = signature_len_u32.min(1024) as usize;
2133
2134        Ok((signature_offset_start + 4, signature_len))
2135    }
2136
2137    /// Read the signature bytes
2138    pub fn get_signature_bytes(data: &[u8]) -> ReadResult<&[u8]> {
2139        let (offset, size) = Self::get_signature_offset_size(data)?;
2140
2141        get_byte_slice(data, offset, offset + size).ok_or(Error(format!(
2142            "Could not get signature at offset {}..{}",
2143            offset,
2144            offset + size
2145        )))
2146    }
2147
2148    /// Returns the (unverified) signature from the file
2149    pub fn get_signature(data: &[u8]) -> ReadResult<Option<Signature>> {
2150        let signature = Self::get_signature_bytes(data)?;
2151        let last_bytes = signature.iter().rev().take_while(|i| **i == 0).count();
2152        let valid_until = 1024_usize.saturating_sub(last_bytes);
2153        Ok(Some(Signature {
2154            valid_until,
2155            data: signature.to_vec(),
2156            valid: false,
2157        }))
2158    }
2159
2160    /// Verifies the file against a given key
2161    #[cfg(feature = "crypto")]
2162    pub fn verify_file(
2163        checksum: &Checksum,
2164        signature: &Signature,
2165        public_key: &Cert,
2166    ) -> ReadResult<bool> {
2167        verify_signature(&checksum.data, &signature.data, public_key)
2168            .map_err(|e| Error(format!("Error verifying signature: {e}")))
2169    }
2170
2171    /// Returns a reference to the manifest
2172    pub fn get_metadata(&self) -> &Manifest {
2173        &self.manifest
2174    }
2175
2176    /// Returns the current package name with
2177    pub fn get_package_name(&self) -> String {
2178        Self::get_package_name_from_manifest(&self.manifest)
2179    }
2180
2181    fn get_package_name_from_manifest(m: &Manifest) -> String {
2182        m.package
2183            .get("wapm")
2184            .map(|value| {
2185                let meta: InternalPackageMeta = value.deserialized().unwrap();
2186                format!("{}@{}", meta.name, meta.version)
2187            })
2188            .or_else(|| {
2189                let name = m.package.get("name")?;
2190                let name = match name {
2191                    ciborium::Value::Text(t) => t,
2192                    _ => return None,
2193                };
2194                let version = m.package.get("version")?;
2195                let version = match version {
2196                    ciborium::Value::Text(t) => t,
2197                    _ => return None,
2198                };
2199                Some(format!("{name}@{version}"))
2200            })
2201            .unwrap_or_default()
2202    }
2203
2204    /// Returns an atom by name for a given package
2205    pub fn get_atom(&self, package: &str, atom: &str) -> Result<&[u8], Error> {
2206        let full_atom_name = format!("{package}:{atom}");
2207        match self.atoms.get_file(&full_atom_name) {
2208            Ok(o) => Ok(o),
2209            Err(e) => {
2210                // look for the atom without the package name,
2211                // if it's the current package name
2212                if package != self.get_package_name() {
2213                    return Err(e);
2214                }
2215
2216                self.atoms.get_file(atom)
2217            }
2218        }
2219    }
2220
2221    /// Returns a reference to the filesystem volume of the package
2222    pub fn get_volume(&self, package: &str, volume: &str) -> Option<&Volume<'a>> {
2223        match self.volumes.get(&format!("{package}/{volume}")) {
2224            Some(s) => Some(s),
2225            None => {
2226                if package == self.get_package_name() {
2227                    self.volumes.get(volume)
2228                } else {
2229                    None
2230                }
2231            }
2232        }
2233    }
2234
2235    /// Returns a file for a given package - if you want to use a non-default
2236    /// volume, prefix the `file_path` with `volume://`, for example, `metadata://README.md`
2237    pub fn get_file(&self, package: &str, file_path: &str) -> Result<&[u8], Error> {
2238        // if the file path starts with "{volume}://", see if the package has a given volume
2239        let (volume, path) =
2240            Self::get_volume_name_from_path(file_path).unwrap_or(("atom", file_path));
2241        let full_volume_name = format!("{package}/{volume}");
2242        let volume = match self.volumes.get(&full_volume_name) {
2243            Some(o) => o,
2244            None => {
2245                // look for the volume without the package name,
2246                // if it's the current package name
2247                if package != self.get_package_name() {
2248                    return Err(Error(format!("Could not find volume {full_volume_name:?}")));
2249                }
2250
2251                self.volumes
2252                    .get(volume)
2253                    .ok_or(Error(format!("Could not find volume {volume:?}")))?
2254            }
2255        };
2256        volume.get_file(path)
2257    }
2258
2259    fn get_volume_name_from_path(s: &str) -> Option<(&str, &str)> {
2260        let (volume, path) = s.split_once("://")?;
2261        if !s.starts_with(&format!("{volume}://")) {
2262            None
2263        } else {
2264            Some((volume, path))
2265        }
2266    }
2267
2268    /// Returns a list of volumes for this package
2269    pub fn list_volumes(&self, package: &str) -> Vec<String> {
2270        let mut result = Vec::new();
2271        let search = format!("{package}/");
2272        for k in self.volumes.keys() {
2273            if k.starts_with(&search) {
2274                result.push(k.replacen(&search, "", 1));
2275            }
2276        }
2277        result
2278    }
2279
2280    /// Returns a list of bundled "package@version" strings contained in this package
2281    pub fn list_packages(&self) -> Vec<PackageInfo> {
2282        let mut packages = vec![PackageInfo::Internal {
2283            dependency_path: String::new(),
2284            name: self.get_package_name(),
2285        }];
2286        Self::get_packages_recursive("self", &self.manifest.use_map, &mut packages);
2287        packages.sort();
2288        packages.dedup();
2289        packages
2290    }
2291
2292    fn get_packages_recursive(
2293        parent_manifest: &str,
2294        use_map: &IndexMap<String, UrlOrManifest>,
2295        packages: &mut Vec<PackageInfo>,
2296    ) {
2297        for (k, v) in use_map.iter() {
2298            match v {
2299                UrlOrManifest::Url(u) => {
2300                    packages.push(PackageInfo::External {
2301                        name: k.clone(),
2302                        url: u.clone(),
2303                    });
2304                }
2305                UrlOrManifest::RegistryDependentUrl(u) => {
2306                    packages.push(PackageInfo::RegistryExternal {
2307                        name: k.clone(),
2308                        id: u.clone(),
2309                    });
2310                }
2311                UrlOrManifest::Manifest(m) => {
2312                    let name = Self::get_package_name_from_manifest(m);
2313                    packages.push(PackageInfo::Internal {
2314                        dependency_path: parent_manifest.to_string(),
2315                        name: name.clone(),
2316                    });
2317                    let dependency_path = format!("{parent_manifest}::{name}");
2318                    Self::get_packages_recursive(&dependency_path, &m.use_map, packages);
2319                }
2320            }
2321        }
2322    }
2323
2324    /// Returns the atoms in the root package
2325    pub fn list_atoms(&self) -> Vec<String> {
2326        self.list_atoms_for_package(&self.get_package_name())
2327    }
2328
2329    /// Returns a list of all atoms with bytes
2330    pub fn get_all_atoms(&self) -> IndexMap<String, &'a [u8]> {
2331        self.atoms
2332            .header
2333            .top_level
2334            .iter()
2335            .filter_map(|fs_entry| {
2336                Some((
2337                    fs_entry.text.to_string(),
2338                    self.atoms
2339                        .get_file_bytes(&OwnedFsEntryFile {
2340                            text: fs_entry.text.to_string(),
2341                            offset_start: fs_entry.offset_start,
2342                            offset_end: fs_entry.offset_end,
2343                        })
2344                        .ok()?,
2345                ))
2346            })
2347            .collect()
2348    }
2349
2350    /// List the atoms for a given package
2351    pub fn list_atoms_for_package(&self, package_orig: &str) -> Vec<String> {
2352        let package = format!("{package_orig}:");
2353        self.atoms
2354            .header
2355            .top_level
2356            .iter()
2357            .filter_map(|fs_entry| {
2358                if !fs_entry.text.contains(':') && !fs_entry.text.contains('@') {
2359                    Some(fs_entry.text.to_string())
2360                } else if !fs_entry.text.starts_with(&format!("{package_orig}::"))
2361                    && fs_entry.text.starts_with(&package)
2362                {
2363                    Some(fs_entry.text.replacen(&package, "", 1))
2364                } else if !fs_entry.text.starts_with("self::")
2365                    && fs_entry.text.starts_with("self:")
2366                    && package_orig == self.get_package_name()
2367                {
2368                    Some(fs_entry.text.to_string())
2369                } else {
2370                    None
2371                }
2372            })
2373            .collect()
2374    }
2375
2376    /// List the available commands for the root package
2377    pub fn list_commands(&self) -> Vec<&str> {
2378        self.get_metadata()
2379            .commands
2380            .keys()
2381            .map(|s| s.as_str())
2382            .collect()
2383    }
2384
2385    /// Parses the entire file, depending on the `ParseOptions`
2386    #[allow(unused_variables)]
2387    pub fn parse(data: &'a [u8], options: &ParseOptions) -> ReadResult<Self> {
2388        Self::check_magic_header(data)?;
2389        let version = Self::get_check_version(data)?;
2390        let mut checksum = Self::compute_checksum(data)?;
2391        #[allow(unused_mut)]
2392        let mut signature = Self::get_signature(data)?;
2393        let checksum_bytes = Self::get_checksum_bytes(data)?;
2394
2395        if let Some(checksum) = checksum.as_mut() {
2396            checksum.valid = checksum.data == checksum_bytes;
2397        }
2398
2399        #[cfg(feature = "crypto")]
2400        match (options.key.as_ref(), checksum.as_mut(), signature.as_mut()) {
2401            (Some(key), Some(checksum), Some(signature)) if checksum.valid => {
2402                signature.valid = verify_signature(&checksum.data, &signature.data, key).is_ok();
2403            }
2404            _ => {}
2405        }
2406
2407        let manifest = Self::get_manifest(data)?;
2408        let atoms_volume = Self::get_atoms_volume(data)?;
2409        let volumes = Self::parse_volumes(data)?;
2410
2411        Ok(WebC {
2412            version,
2413            checksum,
2414            signature,
2415            manifest,
2416            atoms: atoms_volume,
2417            volumes,
2418        })
2419    }
2420
2421    pub fn get_volumes_as_fileblock(&self) -> Vec<u8> {
2422        let mut file = Vec::new();
2423
2424        for (volume_name, volume) in self.volumes.iter() {
2425            // Serialize volume name
2426            let volume_name_bytes = volume_name.as_bytes();
2427            file.extend_from_slice(&to_leb(volume_name_bytes.len() as u64));
2428            file.extend(volume_name_bytes);
2429
2430            // Serialize volume content
2431            let volume_serialized = volume.into_bytes();
2432            file.extend_from_slice(&to_leb(volume_serialized.len() as u64));
2433            file.extend(&volume_serialized);
2434        }
2435
2436        file
2437    }
2438
2439    /// Serialize the .webc file into bytes
2440    pub fn into_bytes(&self, sign_bytes: GenerateChecksum) -> ReadResult<Vec<u8>> {
2441        use sha2::Sha256;
2442
2443        let mut file: Vec<u8> = vec![];
2444
2445        file.extend(MAGIC);
2446        file.extend(*Version::V1);
2447
2448        // 16 bytes: signature algo
2449        file.extend(sign_bytes.get_key());
2450        // 256 bytes: Reserve space reserved for checksum
2451        file.extend([0; 256]);
2452        // 4 bytes: Length of the signature in bytes
2453        file.extend([0; 4]);
2454        // 1024 bytes: Space reserved for the signature
2455        file.extend([0; 1024]);
2456
2457        // N bytes: length of manifest + manifest
2458        let mut manifest_serialized = vec![];
2459        ciborium::into_writer(&self.manifest, &mut manifest_serialized).unwrap();
2460
2461        file.extend_from_slice(&to_leb(manifest_serialized.len() as u64));
2462        file.extend(manifest_serialized);
2463
2464        // Serialize "atoms" volume
2465        let atoms_volume = self.atoms.into_bytes();
2466        file.extend_from_slice(&to_leb(atoms_volume.len() as u64));
2467        file.extend_from_slice(&atoms_volume);
2468
2469        for (volume_name, volume) in self.volumes.iter() {
2470            // Serialize volume name
2471            let volume_name_bytes = volume_name.as_bytes();
2472            file.extend_from_slice(&to_leb(volume_name_bytes.len() as u64));
2473            file.extend(volume_name_bytes);
2474
2475            // Serialize volume content
2476            let volume_serialized = volume.into_bytes();
2477            file.extend_from_slice(&to_leb(volume_serialized.len() as u64));
2478            file.extend(&volume_serialized);
2479        }
2480
2481        // Generate 256-byte checksum depending on requested algo
2482        let checksum = match sign_bytes {
2483            GenerateChecksum::NoChecksum => vec![0; 256],
2484            _ => {
2485                let mut hasher = Sha256::new();
2486                hasher.update(&file);
2487                let mut result = hasher.finalize().to_vec();
2488                if result.len() > 256 {
2489                    return Err(Error("SHA256 returned >256 byte hash (?)".to_string()));
2490                }
2491                if result.len() < 256 {
2492                    result.resize(256, 0);
2493                }
2494                result
2495            }
2496        };
2497
2498        assert_eq!(checksum.len(), 256);
2499
2500        // update checksum
2501        let idx_start = MAGIC.len() + Version::V1.len() + sign_bytes.get_key().len();
2502        let idx_end = idx_start + checksum.len();
2503        for (i, c) in (idx_start..idx_end).zip(checksum.iter()) {
2504            file[i] = *c;
2505        }
2506
2507        let (sig_len, signature) = match &sign_bytes {
2508            GenerateChecksum::NoChecksum | GenerateChecksum::Sha256 => (0_u32, vec![0; 1024]),
2509            #[cfg(feature = "crypto")]
2510            GenerateChecksum::SignedSha256 { key } => {
2511                let mut sig = create_signature(key, &checksum)
2512                    .map_err(|e| Error(format!("Failed to sign checksum: {e}")))?;
2513
2514                let len = sig.len();
2515
2516                if sig.len() > 1024 {
2517                    // TODO(felix): better error handling
2518                    return Err(Error(format!(
2519                        "Signature length out of bounds: {} bytes, max 1024 bytes",
2520                        sig.len()
2521                    )));
2522                }
2523
2524                if sig.len() < 1024 {
2525                    sig.resize(1024, 0);
2526                }
2527
2528                (len as u32, sig)
2529            }
2530        };
2531
2532        let sig_len_bytes = sig_len.to_le_bytes().to_vec();
2533
2534        assert_eq!(sig_len_bytes.len(), 4);
2535
2536        // update signature length
2537        let idx_start = idx_end;
2538        let idx_end = idx_start + sig_len_bytes.len();
2539        for (i, c) in (idx_start..idx_end).zip(sig_len_bytes.into_iter()) {
2540            file[i] = c;
2541        }
2542
2543        assert_eq!(signature.len(), 1024);
2544
2545        // update signature
2546        let idx_start = idx_end;
2547        let idx_end = idx_start + signature.len();
2548        for (i, c) in (idx_start..idx_end).zip(signature.into_iter()) {
2549            file[i] = c;
2550        }
2551
2552        Ok(file)
2553    }
2554}
2555
2556/// Information about the package name
2557#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize)]
2558pub enum PackageInfo {
2559    /// External dependency, ex. `"abc": "https://myhost.io/package/abc@1.2.3"`
2560    External { name: String, url: Url },
2561    /// External dependency that depends on a registry for resolving the file
2562    /// URL, ex. `"abc": "package/abc@1.2.3"`
2563    RegistryExternal { name: String, id: String },
2564    /// Internal (vendored) dependency
2565    Internal {
2566        dependency_path: String,
2567        name: String,
2568    },
2569}
2570
2571#[cfg(feature = "crypto")]
2572fn create_signature(cert: &Cert, message: &[u8]) -> Result<Vec<u8>, Error> {
2573    use sequoia_openpgp::policy::StandardPolicy as P;
2574    use sequoia_openpgp::serialize::stream::Message;
2575    use sequoia_openpgp::serialize::stream::Signer;
2576    use std::io::Write;
2577
2578    let policy = &P::new();
2579
2580    let keypair = cert
2581        .keys()
2582        .unencrypted_secret()
2583        .with_policy(policy, None)
2584        .supported()
2585        .alive()
2586        .revoked(false)
2587        .for_signing()
2588        .next()
2589        .unwrap()
2590        .key()
2591        .clone()
2592        .into_keypair()
2593        .map_err(|e| Error(format!("{e}")))?;
2594
2595    let mut target = Vec::new();
2596    let sink = Message::new(&mut target);
2597
2598    let mut signer = Signer::new(sink, keypair)
2599        .map_err(|e| Error(format!("{e}")))?
2600        .detached()
2601        .build()
2602        .map_err(|e| Error(format!("{e}")))?;
2603    signer
2604        .write_all(message)
2605        .map_err(|e| Error(format!("{e}")))?;
2606    signer.finalize().map_err(|e| Error(format!("{e}")))?;
2607
2608    Ok(target)
2609}
2610
2611// Verifies the signature of a .webc file where checksum = computed checksum of the
2612// file with zeroed signature + signature, public_key = the public key to verify against
2613#[cfg(feature = "crypto")]
2614fn verify_signature(
2615    checksum: &[u8],
2616    signature: &[u8],
2617    public_key: &Cert,
2618) -> Result<bool, anyhow::Error> {
2619    use sequoia_openpgp::parse::Parse;
2620    use sequoia_openpgp::policy::StandardPolicy as P;
2621
2622    let policy = &P::new();
2623
2624    // Make a helper that that feeds the sender's
2625    // public key to the verifier.
2626    let helper = CertVerifier { cert: public_key };
2627
2628    // Now, create a verifier with a helper using the given Certs.
2629    let mut verifier =
2630        DetachedVerifierBuilder::from_bytes(signature)?.with_policy(policy, None, helper)?;
2631
2632    // Verify the data.
2633    verifier.verify_bytes(checksum)?;
2634
2635    Ok(true)
2636}
2637
2638#[cfg(feature = "crypto")]
2639struct CertVerifier<'a> {
2640    cert: &'a Cert,
2641}
2642
2643#[cfg(feature = "crypto")]
2644impl<'a> VerificationHelper for CertVerifier<'a> {
2645    /// Impl to return the public keys for verification based on the given handle
2646    fn get_certs(
2647        &mut self,
2648        _ids: &[sequoia_openpgp::KeyHandle],
2649    ) -> sequoia_openpgp::Result<Vec<Cert>> {
2650        Ok(vec![self.cert.clone()])
2651    }
2652
2653    /// Impl to verify the signature with the public key
2654    fn check(&mut self, structure: MessageStructure<'_>) -> sequoia_openpgp::Result<()> {
2655        let mut good = false;
2656
2657        for (i, layer) in structure.into_iter().enumerate() {
2658            match (i, layer) {
2659                (0, MessageLayer::SignatureGroup { results }) => match results.into_iter().next() {
2660                    Some(Ok(_)) => good = true,
2661                    Some(Err(e)) => return Err(sequoia_openpgp::Error::from(e).into()),
2662                    None => return Err(anyhow::anyhow!("No signature")),
2663                },
2664                _ => return Err(anyhow::anyhow!("Unexpected message structure")),
2665            }
2666        }
2667
2668        if !good {
2669            return Err(anyhow::anyhow!("Signature verification failed"));
2670        }
2671
2672        Ok(())
2673    }
2674}
2675
2676pub type FileMap = BTreeMap<DirOrFile, Vec<u8>>;
2677
2678pub fn pack_directory(dir: &Path) -> Result<FileMap, String> {
2679    let mut files = BTreeMap::new();
2680
2681    // by default, this builder will ignore:
2682    // - entries in .git/info/exclude
2683    // - entries in .gitignore
2684    // - hidden files
2685    let walker = ignore::WalkBuilder::new(dir).build();
2686
2687    for entry in walker {
2688        let entry = entry.as_ref().map_err(|e| format!("{entry:?}: {e}"))?;
2689
2690        let original_path = entry.path();
2691        let path = original_path.strip_prefix(dir).unwrap_or(original_path);
2692        let file_str = path.display().to_string();
2693        if file_str.is_empty() {
2694            continue;
2695        }
2696
2697        if original_path.is_dir() {
2698            files.insert(DirOrFile::Dir(path.to_path_buf()), Vec::new());
2699        } else {
2700            let file_contents =
2701                std::fs::read(original_path).map_err(|e| format!("{file_str:?}: {e}"))?;
2702            files.insert(DirOrFile::File(path.to_path_buf()), file_contents);
2703        }
2704    }
2705
2706    Ok(files)
2707}
2708
2709#[cfg(test)]
2710mod tests {
2711    use std::io::Write;
2712
2713    use super::*;
2714    use FsEntryType::*;
2715    use tempfile::tempdir;
2716
2717    #[test]
2718    fn ignore_hidden_and_git_related() {
2719        let root = tempdir().unwrap();
2720
2721        let _hidden = std::fs::File::create(root.path().join(".hidden")).unwrap();
2722
2723        let _git = std::fs::File::create(root.path().join(".git")).unwrap();
2724
2725        let mut gitignore = std::fs::File::create(root.path().join(".gitignore")).unwrap();
2726        gitignore.write_all(b"ignore_me").unwrap();
2727
2728        let _ignore_me = std::fs::File::create(root.path().join("ignore_me")).unwrap();
2729
2730        let _include_me = std::fs::File::create(root.path().join("include_me")).unwrap();
2731
2732        let map = pack_directory(root.path()).unwrap();
2733
2734        // These files must be excluded:
2735        // - .git
2736        // - .gitignore
2737        // - .hidden
2738        // - ignore_me
2739        assert_eq!(map.len(), 1);
2740        assert!(map.contains_key(&DirOrFile::File("include_me".parse().unwrap())));
2741    }
2742
2743    #[test]
2744    fn serialize_header_entry() {
2745        let entry = HeaderEntry {
2746            flags: Flags::File,
2747            offset_start: 23,
2748            offset_end: 1024,
2749            text: "file.txt".parse().unwrap(),
2750        };
2751
2752        let mut buffer = Vec::new();
2753        entry.write_to(&mut buffer);
2754
2755        assert_bytes_eq!(
2756            buffer,
2757            bytes! {
2758                text_length("file.txt"),
2759                Flags::File,
2760                23_u64.to_le_bytes(),
2761                1024_u64.to_le_bytes(),
2762                "file.txt",
2763            }
2764        );
2765    }
2766
2767    #[test]
2768    fn test_specialsort_append_to_target() {
2769        let mut map = BTreeMap::new();
2770
2771        map.insert(
2772            DirOrFile::File(Path::new("10.txt").to_path_buf()),
2773            b"hello".to_vec(),
2774        );
2775        map.insert(
2776            DirOrFile::File(Path::new("104.txt").to_path_buf()),
2777            b"hello".to_vec(),
2778        );
2779        map.insert(DirOrFile::Dir(Path::new("a100").to_path_buf()), Vec::new());
2780        map.insert(DirOrFile::Dir(Path::new("a101").to_path_buf()), Vec::new());
2781        map.insert(
2782            DirOrFile::File(Path::new("a101/test.txt").to_path_buf()),
2783            b"hello".to_vec(),
2784        );
2785        map.insert(
2786            DirOrFile::File(Path::new("file1.txt").to_path_buf()),
2787            b"hello".to_vec(),
2788        );
2789        map.insert(
2790            DirOrFile::File(Path::new("file4.txt").to_path_buf()),
2791            b"hello".to_vec(),
2792        );
2793        map.insert(
2794            DirOrFile::File(Path::new("file2.txt").to_path_buf()),
2795            b"hello".to_vec(),
2796        );
2797
2798        let volume_bytes = Volume::serialize_files(map);
2799        let volume = Volume::parse(&volume_bytes).unwrap();
2800        assert_eq!(
2801            volume.get_all_file_entries_directorysorted(),
2802            vec![
2803                (
2804                    DirOrFile::Dir(Path::new("a100").to_path_buf()),
2805                    FsEntry {
2806                        fs_type: Dir,
2807                        text: Cow::Borrowed("a100"),
2808                        offset_start: 224,
2809                        offset_end: 224
2810                    }
2811                ),
2812                (
2813                    DirOrFile::Dir(Path::new("a101").to_path_buf()),
2814                    FsEntry {
2815                        fs_type: Dir,
2816                        text: Cow::Borrowed("a101"),
2817                        offset_start: 224,
2818                        offset_end: 264
2819                    }
2820                ),
2821                (
2822                    DirOrFile::File(Path::new("a101/test.txt").to_path_buf()),
2823                    FsEntry {
2824                        fs_type: File,
2825                        text: Cow::Borrowed("test.txt"),
2826                        offset_start: 10,
2827                        offset_end: 15
2828                    }
2829                ),
2830                (
2831                    DirOrFile::File(Path::new("10.txt").to_path_buf()),
2832                    FsEntry {
2833                        fs_type: File,
2834                        text: Cow::Borrowed("10.txt"),
2835                        offset_start: 0,
2836                        offset_end: 5
2837                    }
2838                ),
2839                (
2840                    DirOrFile::File(Path::new("104.txt").to_path_buf()),
2841                    FsEntry {
2842                        fs_type: File,
2843                        text: Cow::Borrowed("104.txt"),
2844                        offset_start: 5,
2845                        offset_end: 10
2846                    }
2847                ),
2848                (
2849                    DirOrFile::File(Path::new("file1.txt").to_path_buf()),
2850                    FsEntry {
2851                        fs_type: File,
2852                        text: Cow::Borrowed("file1.txt"),
2853                        offset_start: 15,
2854                        offset_end: 20
2855                    }
2856                ),
2857                (
2858                    DirOrFile::File(Path::new("file2.txt").to_path_buf()),
2859                    FsEntry {
2860                        fs_type: File,
2861                        text: Cow::Borrowed("file2.txt"),
2862                        offset_start: 20,
2863                        offset_end: 25
2864                    }
2865                ),
2866                (
2867                    DirOrFile::File(Path::new("file4.txt").to_path_buf()),
2868                    FsEntry {
2869                        fs_type: File,
2870                        text: Cow::Borrowed("file4.txt"),
2871                        offset_start: 25,
2872                        offset_end: 30
2873                    }
2874                ),
2875            ]
2876        );
2877    }
2878
2879    #[test]
2880    fn webc_invalid_data() {
2881        let content = WebC::parse(b"Nweb", &ParseOptions::default());
2882        pretty_assertions::assert_eq!(
2883            content.unwrap_err().0.as_str(),
2884            "Invalid WebC file (can\'t get magic header)"
2885        );
2886
2887        let content = WebC::parse(b"\0webc0x1", &ParseOptions::default());
2888        pretty_assertions::assert_eq!(content.unwrap_err().0.as_str(), "Version not supported");
2889
2890        let content = WebC::parse(b"\0webc001", &ParseOptions::default());
2891        pretty_assertions::assert_eq!(
2892            content.unwrap_err().0.as_str(),
2893            "Failed to get checksum type at offset 8..24"
2894        );
2895
2896        pretty_assertions::assert_eq!(
2897            WebC::compute_checksum(b"\0webc001----------------"),
2898            Ok(None)
2899        );
2900
2901        let content = WebC::parse(b"\0webc001----------------", &ParseOptions::default());
2902        pretty_assertions::assert_eq!(
2903            content.unwrap_err().0.as_str(),
2904            "Failed to get signature length at offset 280..284"
2905        );
2906    }
2907
2908    #[test]
2909    fn test_encode_decode_file_entry() {
2910        use crate::v1::FsEntryType::*;
2911        use std::borrow::Cow;
2912        let entries = vec![
2913            FsEntry {
2914                fs_type: Dir,
2915                text: Cow::Borrowed("a"),
2916                offset_start: 58,
2917                offset_end: 91,
2918            },
2919            FsEntry {
2920                fs_type: Dir,
2921                text: Cow::Borrowed("b"),
2922                offset_start: 91,
2923                offset_end: 91,
2924            },
2925        ];
2926
2927        pretty_assertions::assert_eq!(
2928            FsEntry::parse(&FsEntry::into_bytes(&entries).unwrap_or_default()),
2929            entries
2930        );
2931    }
2932
2933    #[test]
2934    fn test_volume() {
2935        let mut files = BTreeMap::new();
2936        files.insert(
2937            DirOrFile::File(Path::new("/a/c/file.txt").to_path_buf()),
2938            b"hello".to_vec(),
2939        );
2940        files.insert(DirOrFile::Dir(Path::new("/b").to_path_buf()), Vec::new());
2941        let volume_bytes = Volume::serialize_files(files);
2942        let volume = Volume::parse(&volume_bytes).unwrap();
2943        pretty_assertions::assert_eq!(volume.get_file("/a/c/file.txt"), Ok(&b"hello"[..]));
2944    }
2945
2946    #[test]
2947    fn test_encode_decode_webc() {
2948        let mut files = BTreeMap::new();
2949        files.insert(
2950            DirOrFile::File(Path::new("atom.wasm").to_path_buf()),
2951            b"atom wasm content".to_vec(),
2952        );
2953        let atom_volume = Volume::serialize_atoms(files);
2954        let atom_volume = Volume::parse(&atom_volume).unwrap();
2955
2956        let mut files = BTreeMap::new();
2957        files.insert(
2958            DirOrFile::File(Path::new("dependency.txt").to_path_buf()),
2959            b"dependency!".to_vec(),
2960        );
2961        let file_volume = Volume::serialize_files(files);
2962        let file_volume = Volume::parse(&file_volume).unwrap();
2963
2964        let webc = WebC {
2965            version: 1,
2966            checksum: None,
2967            signature: Some(Signature {
2968                valid_until: 1024,
2969                valid: false,
2970                data: Vec::new(),
2971            }),
2972            manifest: Manifest {
2973                origin: None,
2974                use_map: IndexMap::default(),
2975                package: IndexMap::default(),
2976                atoms: IndexMap::default(),
2977                commands: IndexMap::default(),
2978                bindings: Vec::new(),
2979                entrypoint: None,
2980            },
2981            atoms: atom_volume,
2982            volumes: {
2983                let mut map = IndexMap::default();
2984                map.insert("files".to_string(), file_volume);
2985                map
2986            },
2987        };
2988
2989        let bytes = webc.into_bytes(GenerateChecksum::NoChecksum).unwrap();
2990
2991        pretty_assertions::assert_eq!(WebC::parse(&bytes, &ParseOptions::default()).unwrap(), webc);
2992    }
2993
2994    #[test]
2995    fn test_insert_wrong_file() {
2996        let volume_bytes = Volume::serialize_files(
2997            [(
2998                DirOrFile::File(Path::new("/a/b/c/test.txt").to_path_buf()),
2999                b"hello".to_vec(),
3000            )]
3001            .iter()
3002            .map(|(a, b)| (a.clone(), b.clone()))
3003            .collect(),
3004        );
3005
3006        let volume = Volume::parse(&volume_bytes).unwrap();
3007        assert_eq!(
3008            volume.header.top_level,
3009            vec![FsEntry {
3010                fs_type: FsEntryType::Dir,
3011                text: Cow::Borrowed("a"),
3012                offset_start: 33,
3013                offset_end: 66,
3014            }]
3015        );
3016
3017        let mut volumes = IndexMap::new();
3018        volumes.insert("atom".to_string(), volume);
3019
3020        let atom_volume_bytes = Volume::serialize_atoms(
3021            [(DirOrFile::File("path/to/a".into()), b"".to_vec())]
3022                .iter()
3023                .map(|(a, b)| (a.clone(), b.clone()))
3024                .collect(),
3025        );
3026
3027        let file = WebC {
3028            version: 1,
3029            checksum: None,
3030            signature: None,
3031            manifest: Manifest::default(),
3032            atoms: Volume::parse(&atom_volume_bytes).unwrap(),
3033            volumes,
3034        };
3035
3036        assert_eq!(
3037            file.get_file(&file.get_package_name(), "/a/b/c/test.txt"),
3038            Ok(&b"hello"[..])
3039        );
3040    }
3041
3042    #[test]
3043    fn test_walk_volume() {
3044        let volume = Volume::serialize_files({
3045            let mut map = BTreeMap::new();
3046            map.insert(
3047                DirOrFile::File(Path::new("test.txt").to_path_buf()),
3048                Vec::new(),
3049            );
3050            map.insert(DirOrFile::Dir(Path::new("a").to_path_buf()), Vec::new());
3051            map.insert(
3052                DirOrFile::File(Path::new("a/tmp2.txt").to_path_buf()),
3053                Vec::new(),
3054            );
3055            map
3056        });
3057        let volume = Volume::parse(&volume).unwrap();
3058        let files = volume.walk().collect::<Vec<_>>();
3059
3060        assert_eq!(
3061            files,
3062            vec![
3063                DirOrFile::File(Path::new("test.txt").to_path_buf()),
3064                DirOrFile::Dir(Path::new("a").to_path_buf()),
3065                DirOrFile::File(Path::new("a/tmp2.txt").to_path_buf()),
3066            ]
3067        )
3068    }
3069
3070    #[test]
3071    fn test_serialize_deserialize_volumes() {
3072        let mut volumes = IndexMap::new();
3073
3074        let volume_a_bytes = Volume::serialize_files(
3075            [(
3076                DirOrFile::File(Path::new("test.txt").to_path_buf()),
3077                b"hello".to_vec(),
3078            )]
3079            .iter()
3080            .map(|(a, b)| (a.clone(), b.clone()))
3081            .collect(),
3082        );
3083        let volume_b_bytes = Volume::serialize_files(
3084            [(
3085                DirOrFile::File(Path::new("test2.txt").to_path_buf()),
3086                b"hello2".to_vec(),
3087            )]
3088            .iter()
3089            .map(|(a, b)| (a.clone(), b.clone()))
3090            .collect(),
3091        );
3092
3093        volumes.insert("a".to_string(), Volume::parse(&volume_a_bytes).unwrap());
3094        volumes.insert("b".to_string(), Volume::parse(&volume_b_bytes).unwrap());
3095        let file = WebC {
3096            version: 1,
3097            checksum: None,
3098            signature: None,
3099            manifest: Manifest::default(),
3100            atoms: Volume::parse(&volume_b_bytes).unwrap(),
3101            volumes,
3102        };
3103
3104        let volume_serialized = file.get_volumes_as_fileblock();
3105        let volumes_parsed = WebC::parse_volumes_from_fileblock(&volume_serialized).unwrap();
3106        assert_eq!(volumes_parsed["a"].get_file("test.txt"), Ok(&b"hello"[..]));
3107    }
3108}