exaf_rs/
writer.rs

1//
2// Copyright (c) 2024 Nathan Fiedler
3//
4
5//!
6//! Create compressed archives to contain files, directories, and symbolic
7//! links.
8//!
9//! To create an archive, use the `Writer` to add files, directories, and
10//! symbolic links. It also can add an entire directory tree in one call.
11//!
12//! ```no_run
13//! # use std::fs::File;
14//! use exaf_rs::writer::Writer;
15//!
16//! let output = File::create("archive.exa").expect("create file");
17//! let mut writer = Writer::new(output).expect("new writer");
18//! writer.add_dir_all("important-docs").expect("add dir all");
19//!
20//! // You must call finish() in order to flush everything to the output.
21//! writer.finish().expect("finish");
22//! ```
23//!
24//! To create an encrypted archive, you will need to have a password and specify
25//! the key derivation function and encryption algorithm.
26//!
27//! ```no_run
28//! # use std::fs::File;
29//! use exaf_rs::writer::Writer;
30//!
31//! let output = File::create("archive.exa").expect("create file");
32//! let mut writer = Writer::new(output).expect("new writer");
33//! writer.enable_encryption(
34//!     exaf_rs::KeyDerivation::Argon2id,
35//!     exaf_rs::Encryption::AES256GCM,
36//!     "correct horse battery staple",
37//! ).expect("enable crypto");
38//!
39//! // add files and directories...
40//! writer.add_dir_all("important-docs").expect("add dir all");
41//!
42//! // You must call finish() in order to flush everything to the output.
43//! writer.finish().expect("finish");
44//! ```
45//!
46
47use super::*;
48use std::collections::HashMap;
49use std::fs;
50use std::io::{self, Read, Seek, SeekFrom, Write};
51use std::path::{Path, PathBuf};
52
53//
54// Represents the content of a file (item) and its position within a content
55// bundle when building an archive. It is possible that a portion of the file is
56// being added and thus the itempos might be non-zero; similarly the size may be
57// less than the actual file length.
58//
59struct IncomingContent {
60    // path of the file being packed
61    path: PathBuf,
62    // kind of item: file or symlink
63    kind: Kind,
64    // corresponding file entry identifier
65    file_id: u32,
66    // offset within the file from which to start, usually zero
67    itempos: u64,
68    // offset within the content bundle where the data will go
69    contentpos: u64,
70    // size of the item content
71    size: u64,
72}
73
74///
75/// Set of options to be passed to the `Writer` as needed.
76///
77#[derive(Clone, Debug)]
78pub struct Options {
79    /// Save the file size to the archive as `LN` row (default `false`).
80    file_size: bool,
81    /// Save file metadata for files, links, and directories (default `false`).
82    metadata: bool,
83}
84
85impl Options {
86    /// Construct a new default options.
87    pub fn new() -> Self {
88        Default::default()
89    }
90
91    /// Set the file size option to the given value (`true` to save file size).
92    pub fn file_size(mut self, value: bool) -> Self {
93        self.file_size = value;
94        self
95    }
96
97    /// Set the metadata option to the given value (`true` to save metadata).
98    pub fn metadata(mut self, value: bool) -> Self {
99        self.metadata = value;
100        self
101    }
102}
103
104impl Default for Options {
105    fn default() -> Self {
106        Self {
107            file_size: false,
108            metadata: false,
109        }
110    }
111}
112
113///
114/// Creates an archive.
115///
116pub struct Writer<W: Write + Seek> {
117    // output to which archive will be written
118    output: W,
119    // options when building the archive
120    options: Options,
121    // identifier of the most recent directory entry
122    prev_dir_id: u32,
123    // directories to be written in the pending manifest
124    directories: Vec<Entry>,
125    // identifier of the most recent file entry
126    prev_file_id: u32,
127    // files to be written, at least partially, in the pending manifest; the key is
128    // a numeric identifier that is unique to this archive
129    files: HashMap<u32, Entry>,
130    // byte offset within a bundle to which new content is added
131    current_pos: u64,
132    // item content that will reside in the bundle under construction
133    contents: Vec<IncomingContent>,
134    // buffer for compressing content bundle in memory (to get final size)
135    buffer: Option<Vec<u8>>,
136    // buffer for building up the manifest + content in memory
137    manifest: Option<Vec<u8>>,
138    // chosen encryption algorithm, possibly none
139    encryption: Encryption,
140    // secret key for encrypting files, if encryption is enabled
141    secret_key: Option<Vec<u8>>,
142    // number of bytes committed to the output so far
143    bytes_written: u64,
144}
145
146impl<W: Write + Seek> Writer<W> {
147    ///
148    /// Construct a `Writer` with default options.
149    ///
150    pub fn new(output: W) -> Result<Self, Error> {
151        Writer::with_options(output, Default::default())
152    }
153
154    ///
155    /// Construct a new `Writer` with the specified options.
156    ///
157    pub fn with_options(mut output: W, options: Options) -> Result<Self, Error> {
158        write_archive_header(&mut output, None)?;
159        Ok(Self {
160            output,
161            options,
162            prev_dir_id: 0,
163            directories: vec![],
164            prev_file_id: 0,
165            files: HashMap::new(),
166            current_pos: 0,
167            contents: vec![],
168            buffer: None,
169            manifest: None,
170            encryption: Encryption::None,
171            secret_key: None,
172            bytes_written: 0,
173        })
174    }
175
176    ///
177    /// Enable encryption when building this archive, using the given passphrase.
178    ///
179    pub fn enable_encryption(
180        &mut self,
181        kd: KeyDerivation,
182        ea: Encryption,
183        password: &str,
184    ) -> Result<(), Error> {
185        if self.prev_dir_id > 0 || self.prev_file_id > 0 {
186            return Err(Error::InternalError("pack must be empty".into()));
187        }
188        self.encryption = ea.clone();
189        let salt = generate_salt(&kd)?;
190        let params: KeyDerivationParams = Default::default();
191        self.secret_key = Some(derive_key(&kd, password, &salt, &params)?);
192        // reset the output position and write out a new archive header that
193        // includes the encryption information provided
194        self.output.seek(SeekFrom::Start(0))?;
195        let mut header = HeaderBuilder::new();
196        header.add_u8(TAG_ENC_ALGO, ea.into())?;
197        header.add_u8(TAG_KEY_DERIV, kd.into())?;
198        header.add_bytes(TAG_SALT, &salt)?;
199        write_archive_header(&mut self.output, Some(header))?;
200        Ok(())
201    }
202
203    ///
204    /// Visit all of the files and directories within the specified path, adding
205    /// them to the archive.
206    ///
207    /// **Note:** Remember to call `finish()` when done adding content.
208    ///
209    pub fn add_dir_all<P: AsRef<Path>>(&mut self, basepath: P) -> Result<u64, Error> {
210        let mut file_count: u64 = 0;
211        let mut subdirs: Vec<(u32, PathBuf)> = Vec::new();
212        subdirs.push((0, basepath.as_ref().to_path_buf()));
213        while let Some((parent, currdir)) = subdirs.pop() {
214            let dir_id = self.add_directory(&currdir, parent)?;
215            let readdir = fs::read_dir(currdir)?;
216            for entry_result in readdir {
217                let entry = entry_result?;
218                let path = entry.path();
219                // DirEntry.metadata() does not follow symlinks and that is good
220                let metadata = entry.metadata()?;
221                if metadata.is_dir() {
222                    subdirs.push((dir_id, path));
223                } else if metadata.is_file() {
224                    self.add_file(&path, Some(dir_id))?;
225                    file_count += 1;
226                } else if metadata.is_symlink() {
227                    self.add_symlink(&path, Some(dir_id))?;
228                }
229            }
230        }
231        Ok(file_count)
232    }
233
234    ///
235    /// Return the number of bytes written to the output so far.
236    ///
237    /// This value is updated whenever a content block is written to the output,
238    /// and thus serves only as an approximation of the size of the data that
239    /// will eventually be flushed to the output. Thus, it may be off by as much
240    /// as the size of a content block depending on when this is called.
241    ///
242    /// A reasonable strategy would be to call this after each file or file
243    /// slice that is added, and when the value increases then that is likely
244    /// the most accurate value.
245    ///
246    pub fn bytes_written(&self) -> u64 {
247        self.bytes_written
248    }
249
250    ///
251    /// Call `finish()` when all file content has been added to the builder.
252    ///
253    pub fn finish(&mut self) -> Result<(), Error> {
254        if !self.contents.is_empty() {
255            self.process_contents()?;
256        }
257        Ok(())
258    }
259
260    ///
261    /// Process the current bundle of item content, clearing the collection and
262    /// resetting the current content position.
263    ///
264    fn process_contents(&mut self) -> Result<(), Error> {
265        self.insert_content()?;
266        self.contents.clear();
267        self.directories.clear();
268        self.current_pos = 0;
269        Ok(())
270    }
271
272    ///
273    /// Add directory entry to the manifest, returning the new directory identifier.
274    ///
275    pub fn add_directory<P: AsRef<Path>>(&mut self, path: P, parent: u32) -> Result<u32, Error> {
276        self.prev_dir_id += 1;
277        let mut dir_entry = Entry::new(path);
278        dir_entry.dir_id = Some(self.prev_dir_id);
279        // parent might be zero when buildin
280        if parent > 0 {
281            dir_entry.parent = Some(parent);
282        }
283        self.directories.push(dir_entry);
284        Ok(self.prev_dir_id)
285    }
286
287    ///
288    /// Adds a single file to the archive.
289    ///
290    /// Depending on the size of the file and the content bundle so far, this
291    /// may result in writing one or more manifest/content pairs to the output.
292    ///
293    /// **Note:** Remember to call `finish()` when done adding content.
294    ///
295    pub fn add_file<P: AsRef<Path>>(&mut self, path: P, parent: Option<u32>) -> Result<(), Error> {
296        self.prev_file_id += 1;
297        let mut file_entry = Entry::new(path.as_ref());
298        file_entry.parent = parent;
299        let md = fs::metadata(path.as_ref());
300        let file_len = match md.as_ref() {
301            Ok(attr) => attr.len(),
302            Err(_) => 0,
303        };
304        file_entry.size = Some(file_len);
305        self.files.insert(self.prev_file_id, file_entry);
306        // empty files will result in a manifest entry whose size is zero,
307        // allowing for the extraction process to know to create an empty file
308        // (otherwise it is difficult to tell from the available data)
309        let mut itempos: u64 = 0;
310        let mut size: u64 = file_len;
311        loop {
312            if self.current_pos + size > BUNDLE_SIZE {
313                let remainder = BUNDLE_SIZE - self.current_pos;
314                // add a portion of the file to fill the bundle
315                let content = IncomingContent {
316                    path: path.as_ref().to_path_buf(),
317                    kind: Kind::File,
318                    file_id: self.prev_file_id,
319                    itempos,
320                    contentpos: self.current_pos,
321                    size: remainder,
322                };
323                self.contents.push(content);
324                // insert the content and itemcontent rows and start a new
325                // bundle, then continue with the current file
326                self.process_contents()?;
327                size -= remainder;
328                itempos += remainder;
329            } else {
330                // the remainder of the file fits within this content bundle
331                let content = IncomingContent {
332                    path: path.as_ref().to_path_buf(),
333                    kind: Kind::File,
334                    file_id: self.prev_file_id,
335                    itempos,
336                    contentpos: self.current_pos,
337                    size,
338                };
339                self.contents.push(content);
340                self.current_pos += size;
341                break;
342            }
343        }
344        Ok(())
345    }
346
347    ///
348    /// Adds a symbolic link to the archive.
349    ///
350    /// **Note:** Remember to call `finish()` when done adding content.
351    ///
352    pub fn add_symlink<P: AsRef<Path>>(
353        &mut self,
354        path: P,
355        parent: Option<u32>,
356    ) -> Result<(), Error> {
357        self.prev_file_id += 1;
358        let mut file_entry = Entry::new(path.as_ref());
359        file_entry.parent = parent;
360        let md = fs::symlink_metadata(path.as_ref());
361        let link_len = match md.as_ref() {
362            Ok(attr) => attr.len(),
363            Err(_) => 0,
364        };
365        file_entry.size = Some(link_len);
366        self.files.insert(self.prev_file_id, file_entry);
367        // assume that the link value is relatively small and simply add it into
368        // the current content bundle in whole
369        let content = IncomingContent {
370            path: path.as_ref().to_path_buf(),
371            kind: Kind::Link,
372            file_id: self.prev_file_id,
373            itempos: 0,
374            contentpos: self.current_pos,
375            size: link_len,
376        };
377        self.contents.push(content);
378        self.current_pos += link_len;
379        Ok(())
380    }
381
382    ///
383    /// Adds a slice of a file to the archive, using the given name.
384    ///
385    /// Depending on the size of the slice and the content bundle so far, this
386    /// may result in writing one or more manifest/content pairs to the output.
387    ///
388    /// Upon extraction, the file will have the name given and will have a
389    /// length of `length` bytes. That is, only the slice is restored upon
390    /// extraction.
391    ///
392    /// **Note:** Remember to call `finish()` when done adding content.
393    ///
394    pub fn add_file_slice<P: AsRef<Path>, S: Into<String>>(
395        &mut self,
396        path: P,
397        name: S,
398        parent: Option<u32>,
399        offset: u64,
400        length: u32,
401    ) -> Result<(), Error> {
402        self.prev_file_id += 1;
403        let mut file_entry = Entry::with_name(name);
404        file_entry.parent = parent;
405        file_entry.size = Some(length as u64);
406        self.files.insert(self.prev_file_id, file_entry);
407
408        let mut itempos: u64 = offset;
409        let mut size: u64 = length as u64;
410        loop {
411            if self.current_pos + size > BUNDLE_SIZE {
412                let remainder = BUNDLE_SIZE - self.current_pos;
413                // add a portion of the file to fill the bundle
414                let content = IncomingContent {
415                    path: path.as_ref().to_path_buf(),
416                    kind: Kind::Slice(offset),
417                    file_id: self.prev_file_id,
418                    itempos,
419                    contentpos: self.current_pos,
420                    size: remainder,
421                };
422                self.contents.push(content);
423                // insert the content and itemcontent rows and start a new
424                // bundle, then continue with the current file
425                self.process_contents()?;
426                size -= remainder;
427                itempos += remainder;
428            } else {
429                // the remainder of the file fits within this content bundle
430                let content = IncomingContent {
431                    path: path.as_ref().to_path_buf(),
432                    kind: Kind::Slice(offset),
433                    file_id: self.prev_file_id,
434                    itempos,
435                    contentpos: self.current_pos,
436                    size,
437                };
438                self.contents.push(content);
439                self.current_pos += size;
440                break;
441            }
442        }
443        Ok(())
444    }
445
446    //
447    // Creates a content bundle based on the data collected so far, then
448    // compresses it, produces a manifest to describe the entries in this
449    // bundle, optionally encrypts everything, and finally writes to the output.
450    //
451    fn insert_content(&mut self) -> Result<(), Error> {
452        // Allocate a buffer for the compressed data, reusing it each time. For
453        // small data sets this makes no observable difference, but for large
454        // data sets (e.g. Linux kernel), it makes a huge difference.
455        let mut content: Vec<u8> = if let Some(mut buf) = self.buffer.take() {
456            buf.clear();
457            buf
458        } else {
459            Vec::with_capacity(BUNDLE_SIZE as usize)
460        };
461
462        // iterate through the file contents, compressing to the buffer
463        let mut encoder = zstd::stream::write::Encoder::new(content, 0)?;
464        for item in self.contents.iter() {
465            match item.kind {
466                Kind::Link => {
467                    let value = read_link(&item.path)?;
468                    encoder.write_all(&value)?;
469                }
470                _ => {
471                    // files and slices of files are handled the same
472                    let mut input = fs::File::open(&item.path)?;
473                    input.seek(SeekFrom::Start(item.itempos))?;
474                    let mut chunk = input.take(item.size);
475                    io::copy(&mut chunk, &mut encoder)?;
476                }
477            }
478        }
479        content = encoder.finish()?;
480
481        // serialize everything to an in-memory buffer to allow for easier
482        // encryption, in which we need to know the block size before writing
483        // the encryption header to the file _before_ the encrypted block
484        // (reader must know how many bytes to read)
485        let mut output: Vec<u8> = if let Some(mut buf) = self.manifest.take() {
486            buf.clear();
487            buf
488        } else {
489            // add some capacity for the manifest entries
490            Vec::with_capacity(content.len() / 2 * 3)
491        };
492
493        // create the manifest header
494        let num_entries = (self.directories.len() + self.contents.len()) as u32;
495        let header = make_manifest_header(num_entries, content.len())?;
496        header.write_header(&mut output)?;
497
498        // write all of the directory entries to the output
499        for dir_entry in self.directories.iter() {
500            let mut header = HeaderBuilder::new();
501            add_directory_rows(&dir_entry, &mut header)?;
502            if self.options.metadata {
503                add_metadata_rows(&dir_entry, &mut header)?;
504            }
505            header.write_header(&mut output)?;
506        }
507
508        // serialize item contents, and their corresponding file entries, as a
509        // single unit (files and symbolic links are handled the same)
510        for content in self.contents.iter() {
511            let file_entry = self
512                .files
513                .get(&content.file_id)
514                .expect("internal error, missing file entry for item content");
515            let mut header = HeaderBuilder::new();
516            add_file_rows(&file_entry, &mut header)?;
517            if content.itempos == 0 {
518                // optionally write the file size and metadata to the header,
519                // but only if this is the first time this entry is being
520                // written to a manifest (i.e. do not repeat later)
521                if self.options.file_size {
522                    add_size_row(&file_entry, &mut header)?;
523                }
524                if self.options.metadata {
525                    add_metadata_rows(&file_entry, &mut header)?;
526                }
527            } else if content.kind.is_slice() {
528                // slices may have a non-zero item position but we always need
529                // to have the slice length written to the header
530                add_size_row(&file_entry, &mut header)?;
531            }
532            add_content_rows(&content, &mut header)?;
533            header.write_header(&mut output)?;
534        }
535
536        // write the compressed buffer to the output
537        output.write_all(&mut content)?;
538
539        // if encryption is enabled, write an additional header and then the
540        // encrypted block of manifest + content; the result of this will likely
541        // be a larger (new) buffer, which will take the place of the working
542        // buffer named `output`
543        if let Some(ref secret) = self.secret_key {
544            let (cipher, nonce) = encrypt_data(&self.encryption, secret, output.as_slice())?;
545            let mut header = HeaderBuilder::new();
546            header.add_bytes(TAG_INIT_VECTOR, &nonce)?;
547            header.add_u32(TAG_ENCRYPTED_SIZE, cipher.len() as u32)?;
548            header.write_header(&mut self.output)?;
549            output = cipher;
550        }
551        let mut cursor = std::io::Cursor::new(&output);
552        io::copy(&mut cursor, &mut self.output)?;
553
554        self.bytes_written += output.len() as u64;
555        self.buffer = Some(content);
556        self.manifest = Some(output);
557        Ok(())
558    }
559}
560
561fn write_archive_header<W: Write>(mut output: W, rows: Option<HeaderBuilder>) -> Result<(), Error> {
562    let version = [b'E', b'X', b'A', b'F', 1, 1];
563    output.write_all(&version)?;
564    if let Some(header) = rows {
565        header.write_header(output)?;
566    } else {
567        // an empty header is a header with zero entries
568        output.write_all(&[0, 0])?;
569    }
570    Ok(())
571}
572
573///
574/// Builds a header consisting of 3-tuples, one value at a time.
575///
576struct HeaderBuilder {
577    buffer: Vec<u8>,
578    row_count: u16,
579}
580
581impl HeaderBuilder {
582    /// Create an empty header builder.
583    fn new() -> Self {
584        Self {
585            buffer: vec![],
586            row_count: 0,
587        }
588    }
589
590    /// Add a single u8 value to the header.
591    fn add_u8(&mut self, tag: u16, value: u8) -> Result<(), Error> {
592        let tag_bytes = u16::to_be_bytes(tag);
593        self.buffer.write_all(&tag_bytes)?;
594        self.buffer.write_all(&[0, 1, value])?;
595        self.row_count += 1;
596        Ok(())
597    }
598
599    /// Add a single u16 value to the header.
600    fn add_u16(&mut self, tag: u16, value: u16) -> Result<(), Error> {
601        if value < 256 {
602            self.add_u8(tag, value as u8)
603        } else {
604            let tag_bytes = u16::to_be_bytes(tag);
605            self.buffer.write_all(&tag_bytes)?;
606            self.buffer.write_all(&[0, 2])?;
607            let value_bytes = u16::to_be_bytes(value);
608            self.buffer.write_all(&value_bytes)?;
609            self.row_count += 1;
610            Ok(())
611        }
612    }
613
614    /// Add a single u32 value to the header.
615    fn add_u32(&mut self, tag: u16, value: u32) -> Result<(), Error> {
616        if value < 65_536 {
617            self.add_u16(tag, value as u16)
618        } else {
619            let tag_bytes = u16::to_be_bytes(tag);
620            self.buffer.write_all(&tag_bytes)?;
621            self.buffer.write_all(&[0, 4])?;
622            let value_bytes = u32::to_be_bytes(value);
623            self.buffer.write_all(&value_bytes)?;
624            self.row_count += 1;
625            Ok(())
626        }
627    }
628
629    /// Add a single u64 value to the header.
630    fn add_u64(&mut self, tag: u16, value: u64) -> Result<(), Error> {
631        if value < 4_294_967_296 {
632            self.add_u32(tag, value as u32)
633        } else {
634            let tag_bytes = u16::to_be_bytes(tag);
635            self.buffer.write_all(&tag_bytes)?;
636            self.buffer.write_all(&[0, 8])?;
637            let value_bytes = u64::to_be_bytes(value);
638            self.buffer.write_all(&value_bytes)?;
639            self.row_count += 1;
640            Ok(())
641        }
642    }
643
644    /// Add a single i32 value to the header.
645    fn add_i32(&mut self, tag: u16, value: i32) -> Result<(), Error> {
646        let tag_bytes = u16::to_be_bytes(tag);
647        self.buffer.write_all(&tag_bytes)?;
648        self.buffer.write_all(&[0, 4])?;
649        let value_bytes = i32::to_be_bytes(value);
650        self.buffer.write_all(&value_bytes)?;
651        self.row_count += 1;
652        Ok(())
653    }
654
655    /// Add a single i64 value to the header.
656    fn add_i64(&mut self, tag: u16, value: i64) -> Result<(), Error> {
657        if value <= 2_147_483_647 || value >= -2_147_483_648 {
658            self.add_i32(tag, value as i32)
659        } else {
660            let tag_bytes = u16::to_be_bytes(tag);
661            self.buffer.write_all(&tag_bytes)?;
662            self.buffer.write_all(&[0, 8])?;
663            let value_bytes = i64::to_be_bytes(value);
664            self.buffer.write_all(&value_bytes)?;
665            self.row_count += 1;
666            Ok(())
667        }
668    }
669
670    /// Add a variable length string to the header.
671    fn add_str(&mut self, tag: u16, value: &str) -> Result<(), Error> {
672        // the value is almost certainly not going to be this long
673        if value.len() > 65535 {
674            return Err(Error::InternalError("add_str value too long".into()));
675        }
676        let tag_bytes = u16::to_be_bytes(tag);
677        self.buffer.write_all(&tag_bytes)?;
678        let value_bytes = value.as_bytes();
679        let value_len = u16::to_be_bytes(value_bytes.len() as u16);
680        self.buffer.write_all(&value_len)?;
681        self.buffer.write_all(value_bytes)?;
682        self.row_count += 1;
683        Ok(())
684    }
685
686    /// Add a variable length slice of bytes to the header.
687    fn add_bytes(&mut self, tag: u16, value: &[u8]) -> Result<(), Error> {
688        // the value is almost certainly not going to be this long
689        if value.len() > 65535 {
690            return Err(Error::InternalError("add_bytes value too long".into()));
691        }
692        let tag_bytes = u16::to_be_bytes(tag);
693        self.buffer.write_all(&tag_bytes)?;
694        let value_len = u16::to_be_bytes(value.len() as u16);
695        self.buffer.write_all(&value_len)?;
696        self.buffer.write_all(value)?;
697        self.row_count += 1;
698        Ok(())
699    }
700
701    /// Write the header to the given output.
702    fn write_header<W: Write>(&self, mut output: W) -> Result<(), Error> {
703        let row_count = u16::to_be_bytes(self.row_count);
704        output.write_all(&row_count)?;
705        output.write_all(&self.buffer)?;
706        Ok(())
707    }
708}
709
710// Build the manifest header and write the bytes to the output.
711fn make_manifest_header(num_entries: u32, block_size: usize) -> Result<HeaderBuilder, Error> {
712    let mut header = HeaderBuilder::new();
713    header.add_u32(TAG_NUM_ENTRIES, num_entries)?;
714    // compression algorithm is always Zstandard, for now
715    header.add_u8(TAG_COMP_ALGO, Compression::ZStandard.into())?;
716    // block size will never larger than 2^32 bytes
717    header.add_u32(TAG_BLOCK_SIZE, block_size as u32)?;
718    Ok(header)
719}
720
721// Inject the metadata rows into the header.
722fn add_metadata_rows(entry: &Entry, header: &mut HeaderBuilder) -> Result<(), Error> {
723    if let Some(mode) = entry.mode {
724        header.add_u32(TAG_UNIX_MODE, mode)?;
725    }
726    if let Some(attrs) = entry.attrs {
727        header.add_u32(TAG_FILE_ATTRS, attrs)?;
728    }
729    if let Some(mt) = entry.mtime {
730        header.add_i64(TAG_MODIFY_TIME, mt.timestamp())?;
731    }
732    if let Some(ct) = entry.ctime {
733        header.add_i64(TAG_CREATE_TIME, ct.timestamp())?;
734    }
735    if let Some(at) = entry.atime {
736        header.add_i64(TAG_ACCESS_TIME, at.timestamp())?;
737    }
738    if let Some(ref username) = entry.user {
739        header.add_str(TAG_USER_NAME, username)?;
740    }
741    if let Some(ref groupname) = entry.group {
742        header.add_str(TAG_GROUP_NAME, groupname)?;
743    }
744    if let Some(uid) = entry.uid {
745        header.add_u32(TAG_USER_ID, uid)?;
746    }
747    if let Some(gid) = entry.gid {
748        header.add_u32(TAG_GROUP_ID, gid)?;
749    }
750    Ok(())
751}
752
753// Build the directory entry header and write the bytes to the output.
754fn add_directory_rows(entry: &Entry, header: &mut HeaderBuilder) -> Result<(), Error> {
755    if let Some(dir_id) = entry.dir_id {
756        header.add_u32(TAG_DIRECTORY_ID, dir_id)?;
757    } else {
758        return Err(Error::InternalError("dir_id was missing".into()));
759    }
760    header.add_str(TAG_NAME, &entry.name)?;
761    if let Some(parent) = entry.parent {
762        header.add_u32(TAG_PARENT, parent)?;
763    }
764    Ok(())
765}
766
767// Add the header rows for the file/link entry to the header builder.
768fn add_file_rows(entry: &Entry, header: &mut HeaderBuilder) -> Result<(), Error> {
769    if entry.is_link {
770        // symbolic links have the SL tag instead of the NM tag
771        header.add_str(TAG_SYM_LINK, &entry.name)?;
772    } else {
773        header.add_str(TAG_NAME, &entry.name)?;
774    }
775    if let Some(parent) = entry.parent {
776        header.add_u32(TAG_PARENT, parent)?;
777    }
778    Ok(())
779}
780
781// Add the header row for the file/link size, if set, to the header builder.
782fn add_size_row(entry: &Entry, header: &mut HeaderBuilder) -> Result<(), Error> {
783    if let Some(size) = entry.size {
784        header.add_u64(TAG_FILE_SIZE, size)?;
785    }
786    Ok(())
787}
788
789// Add the header rows for the item content to the header builder.
790fn add_content_rows(
791    item_content: &IncomingContent,
792    header: &mut HeaderBuilder,
793) -> Result<(), Error> {
794    match item_content.kind {
795        Kind::Slice(offset) => {
796            // slices will be extracted as their own file, so the recorded item
797            // position must be adjusted based on the starting offset
798            header.add_u64(TAG_ITEM_POS, item_content.itempos - offset)?;
799        }
800        _ => {
801            header.add_u64(TAG_ITEM_POS, item_content.itempos)?;
802        }
803    }
804    // content position will never more than 2^32 bytes
805    header.add_u32(TAG_CONTENT_POS, item_content.contentpos as u32)?;
806    // size of content will never more than 2^32 bytes
807    header.add_u32(TAG_ITEM_SIZE, item_content.size as u32)?;
808    Ok(())
809}
810
811#[cfg(test)]
812mod tests {
813    use super::*;
814    use tempfile::tempdir;
815
816    #[test]
817    fn test_header_builder_empty() -> Result<(), Error> {
818        let builder = HeaderBuilder::new();
819        let mut output: Vec<u8> = vec![];
820        builder.write_header(&mut output)?;
821        assert_eq!(output.len(), 2);
822        assert_eq!(output[..], [0, 0]);
823        Ok(())
824    }
825
826    #[test]
827    fn test_header_builder_down_i64() -> Result<(), Error> {
828        let mut builder = HeaderBuilder::new();
829        builder.add_i64(0x1234, 101)?;
830        let mut output: Vec<u8> = vec![];
831        builder.write_header(&mut output)?;
832        assert_eq!(output.len(), 10);
833        assert_eq!(output[..], [0, 1, 0x12, 0x34, 0, 4, 0, 0, 0, 101]);
834        Ok(())
835    }
836
837    #[test]
838    fn test_header_builder_down_u64() -> Result<(), Error> {
839        let mut builder = HeaderBuilder::new();
840        builder.add_u64(0x1234, 101)?;
841        let mut output: Vec<u8> = vec![];
842        builder.write_header(&mut output)?;
843        assert_eq!(output.len(), 7);
844        assert_eq!(output[..], [0, 1, 0x12, 0x34, 0, 1, 101]);
845        Ok(())
846    }
847
848    #[test]
849    fn test_header_builder_down_i32() -> Result<(), Error> {
850        let mut builder = HeaderBuilder::new();
851        builder.add_i32(0x1234, 101)?;
852        let mut output: Vec<u8> = vec![];
853        builder.write_header(&mut output)?;
854        assert_eq!(output.len(), 10);
855        assert_eq!(output[..], [0, 1, 0x12, 0x34, 0, 4, 0, 0, 0, 101]);
856        Ok(())
857    }
858
859    #[test]
860    fn test_header_builder_u8() -> Result<(), Error> {
861        let mut builder = HeaderBuilder::new();
862        builder.add_u8(0x1234, 255)?;
863        let mut output: Vec<u8> = vec![];
864        builder.write_header(&mut output)?;
865        assert_eq!(output.len(), 7);
866        assert_eq!(output[..], [0, 1, 0x12, 0x34, 0, 1, 255]);
867        Ok(())
868    }
869
870    #[test]
871    fn test_header_builder_u16() -> Result<(), Error> {
872        let mut builder = HeaderBuilder::new();
873        builder.add_u16(0x1234, 65_535)?;
874        let mut output: Vec<u8> = vec![];
875        builder.write_header(&mut output)?;
876        assert_eq!(output.len(), 8);
877        assert_eq!(output[..], [0, 1, 0x12, 0x34, 0, 2, 255, 255]);
878        Ok(())
879    }
880
881    #[test]
882    fn test_header_builder_u32() -> Result<(), Error> {
883        let mut builder = HeaderBuilder::new();
884        builder.add_u32(0x1234, 4_294_967_295)?;
885        let mut output: Vec<u8> = vec![];
886        builder.write_header(&mut output)?;
887        assert_eq!(output.len(), 10);
888        assert_eq!(output[..], [0, 1, 0x12, 0x34, 0, 4, 255, 255, 255, 255]);
889        Ok(())
890    }
891
892    #[test]
893    fn test_header_builder_u64() -> Result<(), Error> {
894        let mut builder = HeaderBuilder::new();
895        builder.add_u64(0x1234, 4_294_967_297)?;
896        let mut output: Vec<u8> = vec![];
897        builder.write_header(&mut output)?;
898        assert_eq!(output.len(), 14);
899        assert_eq!(output[..], [0, 1, 0x12, 0x34, 0, 8, 0, 0, 0, 1, 0, 0, 0, 1]);
900        Ok(())
901    }
902
903    #[test]
904    fn test_header_builder_str() -> Result<(), Error> {
905        let mut builder = HeaderBuilder::new();
906        builder.add_str(0x1234, "foobar")?;
907        let mut output: Vec<u8> = vec![];
908        builder.write_header(&mut output)?;
909        assert_eq!(output.len(), 12);
910        assert_eq!(
911            output[..],
912            [0, 1, 0x12, 0x34, 0, 6, b'f', b'o', b'o', b'b', b'a', b'r']
913        );
914        Ok(())
915    }
916
917    #[test]
918    fn test_header_builder_bytes() -> Result<(), Error> {
919        let mut builder = HeaderBuilder::new();
920        builder.add_bytes(0x1234, "foobar".as_bytes())?;
921        let mut output: Vec<u8> = vec![];
922        builder.write_header(&mut output)?;
923        assert_eq!(output.len(), 12);
924        assert_eq!(
925            output[..],
926            [0, 1, 0x12, 0x34, 0, 6, b'f', b'o', b'o', b'b', b'a', b'r']
927        );
928        Ok(())
929    }
930
931    fn sha1_from_file(infile: &Path) -> io::Result<String> {
932        use sha1::{Digest, Sha1};
933        let mut file = fs::File::open(infile)?;
934        let mut hasher = Sha1::new();
935        io::copy(&mut file, &mut hasher)?;
936        let digest = hasher.finalize();
937        Ok(format!("{:x}", digest))
938    }
939
940    #[test]
941    fn test_create_archive_big_content() -> Result<(), Error> {
942        // add a file that is larger than the BUNDLE_SIZE when configured for
943        // testing (2048 bytes), forcing a different code path for add_file()
944        //
945        // create the archive
946        let outdir = tempdir()?;
947        let archive = outdir.path().join("archive.exa");
948        let output = std::fs::File::create(&archive)?;
949        let mut builder = super::writer::Writer::new(output)?;
950        builder.add_file("test/fixtures/IMG_0385.JPG", None)?;
951        assert_eq!(builder.bytes_written(), 19541);
952        builder.finish()?;
953
954        // extract the archive and verify everything
955        let mut reader = super::reader::from_file(&archive)?;
956        reader.extract_all(outdir.path())?;
957        let actual = sha1_from_file(outdir.path().join("IMG_0385.JPG").as_path())?;
958        assert_eq!(actual, "98074ad81e1ddac384cfcd23144109d4d6baa5f2");
959
960        Ok(())
961    }
962
963    #[test]
964    fn test_create_archive_file_slice() -> Result<(), Error> {
965        // create the archive
966        let outdir = tempdir()?;
967        let archive = outdir.path().join("archive.exa");
968        let output = std::fs::File::create(&archive)?;
969        let options = Options::new().file_size(true);
970        let mut builder = super::writer::Writer::with_options(output, options)?;
971        builder.add_file_slice(
972            "test/fixtures/IMG_0385.JPG",
973            "5ba33678260abc495b6c77003ddab5cc613b9ba7",
974            None,
975            4096,
976            8192,
977        )?;
978        assert_eq!(builder.bytes_written(), 6431);
979        builder.finish()?;
980
981        // verify the entry has the file (slice) size
982        let reader = super::reader::Entries::new(&archive)?;
983        let entries: Vec<u64> = reader
984            .filter_map(|e| e.ok())
985            .map(|e| e.size().unwrap_or(0))
986            .collect();
987        assert_eq!(entries.len(), 1);
988        assert_eq!(entries[0], 8192);
989
990        // extract the archive and verify everything
991        let mut reader = super::reader::from_file(&archive)?;
992        reader.extract_all(outdir.path())?;
993        let actual = sha1_from_file(
994            outdir
995                .path()
996                .join("5ba33678260abc495b6c77003ddab5cc613b9ba7")
997                .as_path(),
998        )?;
999        assert_eq!(actual, "5ba33678260abc495b6c77003ddab5cc613b9ba7");
1000
1001        Ok(())
1002    }
1003}