Skip to main content

exaf_rs/
lib.rs

1//
2// Copyright (c) 2024 Nathan Fiedler
3//
4
5//!
6//! The `exaf_rs` crate implements the EXtensible Archiver Format and is
7//! intended to be used in compressing and archiving files. It offers an
8//! alternative to the well-known zip and 7-zip formats, with extensibility in
9//! mind. The running time of this reference implementation is similar to that
10//! of GNU tar with Zstandard compression, and the resulting file size is very
11//! similar.
12//!
13//! ## Examples
14//!
15//! To create an archive, use the `Writer` to add files, directories, and
16//! symbolic links. It also can add an entire directory tree in one call.
17//!
18//! ```no_run
19//! # use std::fs::File;
20//! use exaf_rs::writer::Writer;
21//!
22//! let output = File::create("archive.exa").expect("create file");
23//! let mut writer = Writer::new(output).expect("new writer");
24//! writer.add_dir_all("important-docs").expect("add dir all");
25//!
26//! // You must call finish() in order to flush everything to the output.
27//! writer.finish().expect("finish");
28//! ```
29//!
30//! The code to list the entries of an archive might look like this (this
31//! example assumes that the archive is not encrypted):
32//!
33//! ```no_run
34//! let mut reader = exaf_rs::reader::Entries::new("archive.exa").expect("new entries");
35//! for result in reader {
36//!     match result {
37//!         Ok(entry) => println!("{}", entry.name()),
38//!         Err(err) => println!("error: {}", err),
39//!     }
40//! }
41//! ```
42//!
43//! To extract everything within the archive, which may be encrypted, use the
44//! `Reader` like so:
45//!
46//! ```no_run
47//! # let passwd: Option<&str> = None;
48//! let mut reader = exaf_rs::reader::from_file("archive.exa").expect("from file");
49//! if reader.is_encrypted() && passwd.is_none() {
50//!     println!("Archive is encrypted, please provide a password.");
51//! } else {
52//!     if let Some(password) = passwd {
53//!         reader.enable_encryption(password).expect("enable crypto");
54//!     }
55//!     let path = std::env::current_dir().expect("no env?");
56//!     reader.extract_all(&path).expect("extract all");
57//! }
58//! ```
59//!
60//! A more complete example of creating and extracting archives can be found in
61//! the `src/main.rs` file in the source repository.
62//!
63
64use chrono::prelude::*;
65use std::fmt;
66use std::fs;
67use std::path::{Component, Path, PathBuf};
68use std::time::SystemTime;
69
70/// This type represents all possible errors that can occur within this crate.
71#[derive(thiserror::Error, Debug)]
72pub enum Error {
73    /// Error occurred during IO related operation.
74    #[error("I/O error: {0}")]
75    IOError(#[from] std::io::Error),
76    /// Error occurred decoding a UTF-8 string from bytes.
77    #[error("UTF-8 error: {0}")]
78    FromUtf8Error(#[from] std::string::FromUtf8Error),
79    /// Error occurred attempting to manipulate a slice.
80    #[error("Slice error: {0}")]
81    SliceError(#[from] std::array::TryFromSliceError),
82    /// The symbolic link bytes were not decipherable.
83    #[error("symbolic link encoding was not recognized")]
84    LinkTextEncoding,
85    /// File header lacks the initial `E,X,A,F` bytes.
86    #[error("missing magic 'EXAF' number")]
87    MissingMagic,
88    /// File/link entry referred to an unknown parent.
89    #[error("unknown parent identifier {0}")]
90    MissingParent(u32),
91    /// Reached the end of file before reading all of the content.
92    #[error("unexpectedly reached end of file")]
93    UnexpectedEof,
94    /// Version of EXAF not currently supported by this crate.
95    #[error("unsupported EXAF version")]
96    UnsupportedVersion,
97    /// Encountered and entry header that was not recognized.
98    #[error("unsupported header format")]
99    UnsupportedHeader,
100    /// Compression algorithm in archive is not supported.
101    #[error("unsupported compression algorithm {0}")]
102    UnsupportedCompAlgo(u8),
103    /// Encryption algorithm in archive is not supported.
104    #[error("unsupported encryption algorithm {0}")]
105    UnsupportedEncAlgo(u8),
106    /// Key derivation function in archive is not supported.
107    #[error("unsupported key derivation function {0}")]
108    UnsupportedKeyAlgo(u8),
109    /// A header was missing a required tag row.
110    #[error("missing required tag from header: {0}")]
111    MissingTag(String),
112    /// A usage error
113    #[error("error: {0}")]
114    Usage(String),
115    /// An unexpected error occurred that would otherwise have been a panic.
116    #[error("something bad happened: {0}")]
117    InternalError(String),
118}
119
120///
121/// Return the last part of the path, converting to a String.
122///
123fn get_file_name<P: AsRef<Path>>(path: P) -> String {
124    // ignore any paths that end in '..'
125    if let Some(p) = path.as_ref().file_name() {
126        // ignore any paths that failed UTF-8 translation
127        if let Some(pp) = p.to_str() {
128            return pp.to_owned();
129        }
130    }
131    // normal conversion failed, return whatever garbage is there
132    path.as_ref().to_string_lossy().into_owned()
133}
134
135///
136/// Read the symbolic link value and convert to raw bytes.
137///
138fn read_link(path: &Path) -> Result<Vec<u8>, Error> {
139    // convert whatever value returned by the OS into raw bytes without string conversion
140    use os_str_bytes::OsStringBytes;
141    let value = fs::read_link(path)?;
142    Ok(value.into_os_string().into_raw_vec())
143}
144
145///
146/// Create a symbolic link using the given raw bytes.
147///
148fn write_link(contents: &[u8], filepath: &Path) -> Result<(), Error> {
149    use os_str_bytes::OsStringBytes;
150    // this may panic if the bytes are not valid for this platform
151    let target =
152        std::ffi::OsString::from_io_vec(contents.to_owned()).ok_or(Error::LinkTextEncoding)?;
153    // cfg! macro will not work in this OS-specific import case
154    {
155        #[cfg(target_family = "unix")]
156        use std::os::unix::fs;
157        #[cfg(target_family = "windows")]
158        use std::os::windows::fs;
159        #[cfg(target_family = "unix")]
160        fs::symlink(target, filepath)?;
161        #[cfg(target_family = "windows")]
162        fs::symlink_file(target, filepath)?;
163    }
164    Ok(())
165}
166
167///
168/// Return the Unix file mode for the given path.
169///
170#[cfg(target_family = "unix")]
171fn unix_mode<P: AsRef<Path>>(path: P) -> Option<u32> {
172    use std::os::unix::fs::MetadataExt;
173    if let Ok(meta) = fs::symlink_metadata(path) {
174        Some(meta.mode())
175    } else {
176        None
177    }
178}
179
180#[cfg(target_family = "windows")]
181fn unix_mode<P: AsRef<Path>>(_path: P) -> Option<u32> {
182    None
183}
184
185///
186/// Return the Windows file attributes for the given path.
187///
188#[cfg(target_family = "unix")]
189fn file_attrs<P: AsRef<Path>>(_path: P) -> Option<u32> {
190    None
191}
192
193#[cfg(target_family = "windows")]
194fn file_attrs<P: AsRef<Path>>(path: P) -> Option<u32> {
195    use std::os::windows::prelude::*;
196    if let Ok(meta) = fs::symlink_metadata(path) {
197        Some(meta.file_attributes())
198    } else {
199        None
200    }
201}
202
203///
204/// Return a sanitized version of the path, with any non-normal components
205/// removed. Roots and prefixes are especially problematic for extracting an
206/// archive, so those are always removed. Note also that path components which
207/// refer to the parent directory will be stripped ("foo/../bar" will become
208/// "foo/bar").
209///
210fn sanitize_path<P: AsRef<Path>>(dirty: P) -> PathBuf {
211    let components = dirty.as_ref().components();
212    let allowed = components.filter(|c| matches!(c, Component::Normal(_)));
213    let mut path = PathBuf::new();
214    for component in allowed {
215        path = path.join(component);
216    }
217    path
218}
219
220///
221/// Generate a salt appropriate for the given key derivation function.
222///
223fn generate_salt(kd: &KeyDerivation) -> Result<Vec<u8>, Error> {
224    match kd {
225        KeyDerivation::Argon2id => {
226            use argon2::password_hash::{SaltString, rand_core::OsRng};
227            let salt = SaltString::generate(&mut OsRng);
228            let mut buf: Vec<u8> = vec![0; salt.len()];
229            let bytes = salt
230                .decode_b64(&mut buf)
231                .map_err(|e| Error::InternalError(format!("argon2 failed: {}", e)))?;
232            Ok(bytes.to_vec())
233        }
234        KeyDerivation::None => Err(Error::UnsupportedKeyAlgo(255)),
235    }
236}
237
238///
239/// Produce a secret key from a passphrase and random salt.
240///
241fn derive_key(
242    kd: &KeyDerivation,
243    password: &str,
244    salt: &[u8],
245    params: &KeyDerivationParams,
246) -> Result<Vec<u8>, Error> {
247    match kd {
248        KeyDerivation::Argon2id => {
249            use argon2::{Algorithm, ParamsBuilder, Version};
250            let mut output: Vec<u8> = vec![0; params.tag_length as usize];
251            let mut builder: ParamsBuilder = ParamsBuilder::new();
252            builder.t_cost(params.time_cost);
253            builder.m_cost(params.mem_cost);
254            builder.p_cost(params.para_cost);
255            builder.output_len(params.tag_length as usize);
256            let kdf = builder
257                .context(Algorithm::Argon2id, Version::V0x13)
258                .map_err(|e| Error::InternalError(format!("argon2 failed: {}", e)))?;
259            kdf.hash_password_into(password.as_bytes(), salt, output.as_mut_slice())
260                .map_err(|e| Error::InternalError(format!("argon2 failed: {}", e)))?;
261            Ok(output)
262        }
263        KeyDerivation::None => Err(Error::UnsupportedKeyAlgo(255)),
264    }
265}
266
267///
268/// Encrypt the given data, returning a newly allocated vector of bytes
269/// containing the cipher text, and the nonce that was generated.
270///
271fn encrypt_data(ea: &Encryption, key: &[u8], data: &[u8]) -> Result<(Vec<u8>, Vec<u8>), Error> {
272    match ea {
273        Encryption::AES256GCM => {
274            use aes_gcm::{
275                Aes256Gcm, Key,
276                aead::{Aead, AeadCore, KeyInit, OsRng},
277            };
278            let key: &Key<Aes256Gcm> = key.into();
279            let cipher = Aes256Gcm::new(key);
280            let nonce = Aes256Gcm::generate_nonce(&mut OsRng);
281            let ciphertext = cipher
282                .encrypt(&nonce, data)
283                .map_err(|e| Error::InternalError(format!("aes_gcm failed: {}", e)))?;
284            Ok((ciphertext, nonce.to_vec()))
285        }
286        Encryption::None => Err(Error::UnsupportedEncAlgo(255)),
287    }
288}
289
290///
291/// Decrypt the given data, returning a newly allocated vector of bytes
292/// containing the plain text.
293///
294fn decrypt_data(ea: &Encryption, key: &[u8], data: &[u8], nonce: &[u8]) -> Result<Vec<u8>, Error> {
295    match ea {
296        Encryption::AES256GCM => {
297            use aes_gcm::{
298                Aes256Gcm, Key,
299                aead::{Aead, AeadCore, KeyInit, generic_array::GenericArray},
300            };
301            let key: &Key<Aes256Gcm> = key.into();
302            let cipher = Aes256Gcm::new(key);
303            let nonce: &GenericArray<u8, <Aes256Gcm as AeadCore>::NonceSize> = nonce.into();
304            let plaintext = cipher
305                .decrypt(nonce, data)
306                .map_err(|e| Error::InternalError(format!("aes_gcm failed: {}", e)))?;
307            Ok(plaintext)
308        }
309        Encryption::None => Err(Error::UnsupportedEncAlgo(255)),
310    }
311}
312
313///
314/// Type of compression used on a specific content block.
315///
316#[derive(Clone, Debug, PartialEq)]
317enum Compression {
318    None,
319    ZStandard,
320}
321
322impl fmt::Display for Compression {
323    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
324        match self {
325            Compression::None => write!(f, "none"),
326            Compression::ZStandard => write!(f, "zstd"),
327        }
328    }
329}
330
331impl From<Compression> for u8 {
332    fn from(val: Compression) -> Self {
333        match val {
334            Compression::None => 0,
335            Compression::ZStandard => 1,
336        }
337    }
338}
339
340impl TryFrom<u8> for Compression {
341    type Error = self::Error;
342
343    fn try_from(value: u8) -> Result<Self, Self::Error> {
344        match value {
345            0 => Ok(Compression::None),
346            1 => Ok(Compression::ZStandard),
347            v => Err(self::Error::UnsupportedCompAlgo(v)),
348        }
349    }
350}
351
352///
353/// Algorithm for encrypting the archive data.
354///
355#[derive(Clone, Copy, Debug, PartialEq)]
356pub enum Encryption {
357    /// No encryption; _default_
358    None,
359    /// Use the AES 256 bit GCM AEAD cipher.
360    AES256GCM,
361}
362
363impl fmt::Display for Encryption {
364    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
365        match self {
366            Encryption::None => write!(f, "none"),
367            Encryption::AES256GCM => write!(f, "AES256GCM"),
368        }
369    }
370}
371
372impl From<Encryption> for u8 {
373    fn from(val: Encryption) -> Self {
374        match val {
375            Encryption::None => 0,
376            Encryption::AES256GCM => 1,
377        }
378    }
379}
380
381impl TryFrom<u8> for Encryption {
382    type Error = self::Error;
383
384    fn try_from(value: u8) -> Result<Self, Self::Error> {
385        match value {
386            0 => Ok(Encryption::None),
387            1 => Ok(Encryption::AES256GCM),
388            v => Err(self::Error::UnsupportedEncAlgo(v)),
389        }
390    }
391}
392
393///
394/// Algorithm for deriving a key from a passphrase.
395///
396#[derive(Clone, Copy, Debug, PartialEq)]
397pub enum KeyDerivation {
398    /// No derivation function, _default_
399    None,
400    /// Use the Argon2id KDF
401    Argon2id,
402}
403
404impl fmt::Display for KeyDerivation {
405    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
406        match self {
407            KeyDerivation::None => write!(f, "none"),
408            KeyDerivation::Argon2id => write!(f, "Argon2id"),
409        }
410    }
411}
412
413impl From<KeyDerivation> for u8 {
414    fn from(val: KeyDerivation) -> Self {
415        match val {
416            KeyDerivation::None => 0,
417            KeyDerivation::Argon2id => 1,
418        }
419    }
420}
421
422impl TryFrom<u8> for KeyDerivation {
423    type Error = self::Error;
424
425    fn try_from(value: u8) -> Result<Self, Self::Error> {
426        match value {
427            0 => Ok(KeyDerivation::None),
428            1 => Ok(KeyDerivation::Argon2id),
429            v => Err(self::Error::UnsupportedKeyAlgo(v)),
430        }
431    }
432}
433
434///
435/// Parameters to be provided to the key derivation function. These are fairly
436/// common to most such functions.
437///
438#[derive(Clone, Debug)]
439pub struct KeyDerivationParams {
440    /// Number of iterations for key derivation function
441    time_cost: u32,
442    /// Number of 1 kb memory blocks for key derivation function
443    mem_cost: u32,
444    /// Degree of parallelism for key derivation function
445    para_cost: u32,
446    /// Output length for key derivation function
447    tag_length: u32,
448}
449
450impl KeyDerivationParams {
451    ///
452    /// Set the time cost from the optional value found in the archive.
453    ///
454    pub fn time_cost(mut self, time_cost: Option<u32>) -> Self {
455        if let Some(tc) = time_cost {
456            self.time_cost = tc;
457        }
458        self
459    }
460
461    ///
462    /// Set the memory cost from the optional value found in the archive.
463    ///
464    pub fn mem_cost(mut self, mem_cost: Option<u32>) -> Self {
465        if let Some(tc) = mem_cost {
466            self.mem_cost = tc;
467        }
468        self
469    }
470
471    ///
472    /// Set the degree of parallelism from the optional value found in the
473    /// archive.
474    ///
475    pub fn para_cost(mut self, para_cost: Option<u32>) -> Self {
476        if let Some(tc) = para_cost {
477            self.para_cost = tc;
478        }
479        self
480    }
481
482    ///
483    /// Set the output length from the optional value found in the archive.
484    ///
485    pub fn tag_length(mut self, tag_length: Option<u32>) -> Self {
486        if let Some(tc) = tag_length {
487            self.tag_length = tc;
488        }
489        self
490    }
491}
492
493impl Default for KeyDerivationParams {
494    fn default() -> Self {
495        Self {
496            time_cost: 2,
497            mem_cost: 19_456,
498            para_cost: 1,
499            tag_length: 32,
500        }
501    }
502}
503
504///
505/// Represents a file, directory, or symbolic link within an archive.
506///
507#[derive(Clone, Debug)]
508pub struct Entry {
509    // name of the file, directory, or symbolic link
510    name: String,
511    // true if this entry is a symbolic link
512    is_link: bool,
513    // identifier for directory entries
514    dir_id: Option<u32>,
515    // identifier of the parent directory
516    parent: Option<u32>,
517    // size of the content for files and links
518    size: Option<u64>,
519    // Unix file mode
520    mode: Option<u32>,
521    // Windows file attributes
522    attrs: Option<u32>,
523    // Unix user identifier
524    uid: Option<u32>,
525    // name of the owning user
526    user: Option<String>,
527    // Unix group identifier
528    gid: Option<u32>,
529    // name of the owning group
530    group: Option<String>,
531    // created time
532    ctime: Option<DateTime<Utc>>,
533    // modification time
534    mtime: Option<DateTime<Utc>>,
535    // last accessed time
536    atime: Option<DateTime<Utc>>,
537}
538
539impl Entry {
540    ///
541    /// Create an instance of `Entry` based on the given path.
542    ///
543    pub fn new<P: AsRef<Path>>(path: P) -> Self {
544        let name = get_file_name(path.as_ref());
545        let metadata = fs::symlink_metadata(path.as_ref());
546        let mtime = match metadata.as_ref() {
547            Ok(attr) => {
548                let mt = attr.modified().unwrap_or(SystemTime::UNIX_EPOCH);
549                Some(DateTime::<Utc>::from(mt))
550            }
551            Err(_) => None,
552        };
553        let ctime = match metadata.as_ref() {
554            Ok(attr) => {
555                let ct = attr.created().unwrap_or(SystemTime::UNIX_EPOCH);
556                Some(DateTime::<Utc>::from(ct))
557            }
558            Err(_) => None,
559        };
560        let atime = match metadata.as_ref() {
561            Ok(attr) => {
562                let at = attr.accessed().unwrap_or(SystemTime::UNIX_EPOCH);
563                Some(DateTime::<Utc>::from(at))
564            }
565            Err(_) => None,
566        };
567        let is_link = match metadata.as_ref() {
568            Ok(attr) => attr.is_symlink(),
569            Err(_) => false,
570        };
571        let mode = unix_mode(path.as_ref());
572        let attrs = file_attrs(path.as_ref());
573        let em = Self {
574            name,
575            is_link,
576            dir_id: None,
577            parent: None,
578            size: None,
579            mode,
580            attrs,
581            uid: None,
582            gid: None,
583            user: None,
584            group: None,
585            ctime,
586            mtime,
587            atime,
588        };
589        em.owners(path.as_ref())
590    }
591
592    ///
593    /// Create an instance of `Entry` with just the given name.
594    ///
595    pub fn with_name<S: Into<String>>(name: S) -> Self {
596        Self {
597            name: name.into(),
598            is_link: false,
599            dir_id: None,
600            parent: None,
601            size: None,
602            mode: None,
603            attrs: None,
604            uid: None,
605            gid: None,
606            user: None,
607            group: None,
608            ctime: None,
609            mtime: None,
610            atime: None,
611        }
612    }
613
614    ///
615    /// Set the user and group ownership of the given path.
616    ///
617    #[cfg(target_family = "unix")]
618    fn owners<P: AsRef<Path>>(mut self, path: P) -> Self {
619        use std::ffi::CStr;
620        use std::os::unix::fs::MetadataExt;
621        if let Ok(meta) = fs::symlink_metadata(path) {
622            self.uid = Some(meta.uid());
623            self.gid = Some(meta.gid());
624            // get the user name
625            let username: String = unsafe {
626                let passwd = libc::getpwuid(meta.uid());
627                if passwd.is_null() {
628                    String::new()
629                } else {
630                    let c_buf = (*passwd).pw_name;
631                    if c_buf.is_null() {
632                        String::new()
633                    } else {
634                        CStr::from_ptr(c_buf).to_string_lossy().into_owned()
635                    }
636                }
637            };
638            self.user = Some(username);
639            // get the group name
640            let groupname = unsafe {
641                let group = libc::getgrgid(meta.gid());
642                if group.is_null() {
643                    String::new()
644                } else {
645                    let c_buf = (*group).gr_name;
646                    if c_buf.is_null() {
647                        String::new()
648                    } else {
649                        CStr::from_ptr(c_buf).to_string_lossy().into_owned()
650                    }
651                }
652            };
653            self.group = Some(groupname);
654        }
655        self
656    }
657
658    #[cfg(target_family = "windows")]
659    fn owners(self, _path: &Path) -> Self {
660        self
661    }
662
663    /// Name of the entry, will be the full path when returned from `Entries`.
664    pub fn name(&self) -> &str {
665        self.name.as_str()
666    }
667
668    pub fn size(&self) -> Option<u64> {
669        self.size
670    }
671
672    /// Unix file mode
673    pub fn mode(&self) -> Option<u32> {
674        self.mode
675    }
676
677    /// Windows file attributes
678    pub fn attrs(&self) -> Option<u32> {
679        self.attrs
680    }
681
682    /// Unix user identifier
683    pub fn uid(&self) -> Option<u32> {
684        self.uid
685    }
686
687    /// Name of the owning user
688    pub fn user(&self) -> Option<&str> {
689        self.user.as_deref()
690    }
691
692    /// Unix group identifier
693    pub fn gid(&self) -> Option<u32> {
694        self.gid
695    }
696
697    /// Name of the owning group
698    pub fn group(&self) -> Option<&str> {
699        self.group.as_deref()
700    }
701
702    /// Created time
703    pub fn ctime(&self) -> Option<DateTime<Utc>> {
704        self.ctime
705    }
706
707    /// Modification time
708    pub fn mtime(&self) -> Option<DateTime<Utc>> {
709        self.mtime
710    }
711
712    /// Last accessed time
713    pub fn atime(&self) -> Option<DateTime<Utc>> {
714        self.atime
715    }
716}
717
718///
719/// The type of an entry that has content, such as a file or symbolic link.
720///
721#[derive(Clone, Debug, PartialEq)]
722pub enum Kind {
723    /// Item represents a file an its entirety.
724    File,
725    /// Item represents a symbolic link.
726    Link,
727    /// Item represents a portion of a file at the given offset.
728    Slice(u64),
729}
730
731impl Kind {
732    /// Return `true` if this kind represents a file slice.
733    pub fn is_slice(&self) -> bool {
734        matches!(*self, Kind::Slice(_))
735    }
736}
737
738// tags for archive header rows
739const TAG_ENC_ALGO: u16 = 0x4541;
740const TAG_KEY_DERIV: u16 = 0x4b44;
741const TAG_SALT: u16 = 0x5341;
742const TAG_TIME_COST: u16 = 0x5443;
743const TAG_MEM_COST: u16 = 0x4d43;
744const TAG_PARA_COST: u16 = 0x5043;
745const TAG_TAG_LENGTH: u16 = 0x544c;
746
747// tags for manifest header rows
748const TAG_NUM_ENTRIES: u16 = 0x4e45;
749const TAG_COMP_ALGO: u16 = 0x4341;
750const TAG_BLOCK_SIZE: u16 = 0x4253;
751
752// tags for entry header rows
753const TAG_NAME: u16 = 0x4e4d;
754const TAG_PARENT: u16 = 0x5041;
755const TAG_FILE_SIZE: u16 = 0x4c4e;
756const TAG_DIRECTORY_ID: u16 = 0x4944;
757const TAG_UNIX_MODE: u16 = 0x4d4f;
758const TAG_FILE_ATTRS: u16 = 0x4641;
759const TAG_MODIFY_TIME: u16 = 0x4d54;
760const TAG_CREATE_TIME: u16 = 0x4354;
761const TAG_ACCESS_TIME: u16 = 0x4154;
762const TAG_USER_NAME: u16 = 0x554e;
763const TAG_GROUP_NAME: u16 = 0x474e;
764const TAG_USER_ID: u16 = 0x5549;
765const TAG_GROUP_ID: u16 = 0x4749;
766const TAG_ITEM_POS: u16 = 0x4950;
767const TAG_CONTENT_POS: u16 = 0x4350;
768const TAG_ITEM_SIZE: u16 = 0x535a;
769const TAG_SYM_LINK: u16 = 0x534c;
770
771// tags for encryption header rows
772const TAG_INIT_VECTOR: u16 = 0x4956;
773const TAG_ENCRYPTED_SIZE: u16 = 0x4553;
774
775// Desired size of the compressed bundle of file data.
776//
777// For the unit tests, which all use fairly small files, make the bundle size
778// very small so that we can easily exercise the case of adding a file that
779// pushes into another manifest/content pair.
780#[cfg(test)]
781const BUNDLE_SIZE: u64 = 2048;
782#[cfg(not(test))]
783const BUNDLE_SIZE: u64 = 16777216;
784
785///
786/// Return the size in bytes of the content block at which point a `Writer` will
787/// create a new manifest and content pair.
788///
789pub fn content_size() -> u64 {
790    BUNDLE_SIZE
791}
792
793pub mod reader;
794pub mod writer;
795
796#[cfg(test)]
797mod tests {
798    use super::*;
799    use crate::writer::Options;
800    use tempfile::tempdir;
801
802    #[test]
803    fn test_content_size() {
804        assert_eq!(content_size(), 2048);
805    }
806
807    #[test]
808    fn test_kind_is_slice() {
809        assert!(!Kind::File.is_slice());
810        assert!(!Kind::Link.is_slice());
811        assert!(Kind::Slice(0).is_slice());
812    }
813
814    #[test]
815    fn test_get_file_name() -> Result<(), Error> {
816        assert_eq!(get_file_name(PathBuf::from("")), "");
817        assert_eq!(get_file_name(PathBuf::from("path/to/file")), "file");
818        assert_eq!(get_file_name(PathBuf::from("path/to/..")), "path/to/..");
819        Ok(())
820    }
821
822    #[cfg(target_family = "unix")]
823    #[test]
824    fn test_write_link_read_link() -> Result<(), Error> {
825        let outdir = tempdir()?;
826        let link = outdir.path().join("mylink");
827        let target = "link_target_is_meaningless";
828        write_link(target.as_bytes(), &link)?;
829        let actual = read_link(&link)?;
830        assert_eq!(actual, target.as_bytes());
831        Ok(())
832    }
833
834    #[test]
835    fn test_sanitize_path() -> Result<(), Error> {
836        // need to use real paths for the canonicalize() call
837        #[cfg(target_family = "windows")]
838        {
839            let result = sanitize_path(Path::new("C:\\Windows"));
840            assert_eq!(result, PathBuf::from("Windows"));
841        }
842        #[cfg(target_family = "unix")]
843        {
844            let result = sanitize_path(Path::new("/etc"));
845            assert_eq!(result, PathBuf::from("etc"));
846        }
847        let result = sanitize_path(Path::new("src/lib.rs"));
848        assert_eq!(result, PathBuf::from("src/lib.rs"));
849
850        let result = sanitize_path(Path::new("/usr/../src/./lib.rs"));
851        assert_eq!(result, PathBuf::from("usr/src/lib.rs"));
852        Ok(())
853    }
854
855    #[test]
856    fn test_compression_try_from() {
857        let result = Compression::try_from(0);
858        assert!(result.is_ok());
859        let value = result.unwrap();
860        assert_eq!(value, Compression::None);
861
862        let result = Compression::try_from(1);
863        assert!(result.is_ok());
864        let value = result.unwrap();
865        assert_eq!(value, Compression::ZStandard);
866
867        let result = Compression::try_from(2);
868        assert!(result.is_err());
869        let err_string = result.err().unwrap().to_string();
870        assert_eq!(err_string, "unsupported compression algorithm 2");
871    }
872
873    #[test]
874    fn test_compression_into_u8() {
875        let value: u8 = Compression::None.into();
876        assert_eq!(value, 0);
877
878        let value: u8 = Compression::ZStandard.into();
879        assert_eq!(value, 1);
880    }
881
882    #[test]
883    fn test_encryption_try_from() {
884        let result = Encryption::try_from(0);
885        assert!(result.is_ok());
886        let value = result.unwrap();
887        assert_eq!(value, Encryption::None);
888
889        let result = Encryption::try_from(1);
890        assert!(result.is_ok());
891        let value = result.unwrap();
892        assert_eq!(value, Encryption::AES256GCM);
893
894        let result = Encryption::try_from(2);
895        assert!(result.is_err());
896        let err_string = result.err().unwrap().to_string();
897        assert_eq!(err_string, "unsupported encryption algorithm 2");
898    }
899
900    #[test]
901    fn test_encryption_into_u8() {
902        let value: u8 = Encryption::None.into();
903        assert_eq!(value, 0);
904
905        let value: u8 = Encryption::AES256GCM.into();
906        assert_eq!(value, 1);
907    }
908
909    #[test]
910    fn test_key_derivation_try_from() {
911        let result = KeyDerivation::try_from(0);
912        assert!(result.is_ok());
913        let value = result.unwrap();
914        assert_eq!(value, KeyDerivation::None);
915
916        let result = KeyDerivation::try_from(1);
917        assert!(result.is_ok());
918        let value = result.unwrap();
919        assert_eq!(value, KeyDerivation::Argon2id);
920
921        let result = KeyDerivation::try_from(2);
922        assert!(result.is_err());
923        let err_string = result.err().unwrap().to_string();
924        assert_eq!(err_string, "unsupported key derivation function 2");
925    }
926
927    #[test]
928    fn test_key_derivation_into_u8() {
929        let value: u8 = KeyDerivation::None.into();
930        assert_eq!(value, 0);
931
932        let value: u8 = KeyDerivation::Argon2id.into();
933        assert_eq!(value, 1);
934    }
935
936    #[test]
937    fn test_generate_salt() -> Result<(), Error> {
938        use argon2::password_hash::{SaltString, rand_core::OsRng};
939        let salt = SaltString::generate(&mut OsRng);
940        let mut buf: Vec<u8> = vec![0; salt.len()];
941        let result = salt.decode_b64(&mut buf);
942        assert!(result.is_ok());
943        let bytes = result.unwrap();
944        assert_eq!(bytes.len(), 16);
945        Ok(())
946    }
947
948    #[test]
949    fn test_derive_key_argon2() -> Result<(), Error> {
950        let password = "keyboard cat";
951        let salt = generate_salt(&KeyDerivation::Argon2id)?;
952        let params: KeyDerivationParams = Default::default();
953        let secret = derive_key(&KeyDerivation::Argon2id, password, &salt, &params)?;
954        assert_eq!(secret.len(), 32);
955        assert_ne!(password.as_bytes(), secret.as_slice());
956        Ok(())
957    }
958
959    #[test]
960    fn test_encrypt_decrypt() -> Result<(), Error> {
961        let password = "keyboard cat";
962        let salt = generate_salt(&KeyDerivation::Argon2id)?;
963        let params: KeyDerivationParams = Default::default();
964        let secret = derive_key(&KeyDerivation::Argon2id, password, &salt, &params)?;
965        let input = "mary had a little lamb whose fleece was white as snow";
966        assert_eq!(input.len(), 53);
967        let (cipher, nonce) = encrypt_data(&Encryption::AES256GCM, &secret, input.as_bytes())?;
968        // the cipher text will be larger than the input due to the
969        // authentication tag, and possibly the encryption algorithm
970        assert_eq!(cipher.len(), 69);
971        // the nonce is usually around 12 bytes, not really important
972        assert_eq!(nonce.len(), 12);
973        let plain = decrypt_data(&Encryption::AES256GCM, &secret, &cipher, &nonce)?;
974        // the part that matters -- the data can make the roundtrip
975        assert_eq!(plain, input.as_bytes());
976        Ok(())
977    }
978
979    #[test]
980    fn test_create_list_extract() -> Result<(), Error> {
981        // git does not track empty directories
982        std::fs::create_dir_all("test/fixtures/version1/tiny_tree/sub/empty-dir")?;
983        // create the archive
984        let outdir = tempdir()?;
985        let archive = outdir.path().join("archive.exa");
986        let output = std::fs::File::create(&archive)?;
987        let mut builder = super::writer::Writer::new(output)?;
988        builder.add_dir_all("test/fixtures/version1/tiny_tree")?;
989        builder.finish()?;
990
991        // verify the entries appear as expected
992        let reader = super::reader::Entries::new(&archive)?;
993        assert!(!reader.is_encrypted());
994        let mut entries: Vec<String> = reader
995            .filter_map(|e| e.ok())
996            .map(|e| e.name().to_owned())
997            .collect();
998        entries.sort();
999        assert_eq!(entries.len(), 9);
1000        #[cfg(target_family = "unix")]
1001        let expected: Vec<String> = vec![
1002            "tiny_tree".into(),
1003            "tiny_tree/file-a.txt".into(),
1004            "tiny_tree/file-b.txt".into(),
1005            "tiny_tree/file-c.txt".into(),
1006            "tiny_tree/link-to-c".into(),
1007            "tiny_tree/sub".into(),
1008            "tiny_tree/sub/empty-dir".into(),
1009            "tiny_tree/sub/empty-file".into(),
1010            "tiny_tree/sub/file-1.txt".into(),
1011        ];
1012        #[cfg(target_family = "windows")]
1013        let expected: Vec<String> = vec![
1014            "tiny_tree".into(),
1015            "tiny_tree\\file-a.txt".into(),
1016            "tiny_tree\\file-b.txt".into(),
1017            "tiny_tree\\file-c.txt".into(),
1018            "tiny_tree\\link-to-c".into(),
1019            "tiny_tree\\sub".into(),
1020            "tiny_tree\\sub\\empty-dir".into(),
1021            "tiny_tree\\sub\\empty-file".into(),
1022            "tiny_tree\\sub\\file-1.txt".into(),
1023        ];
1024        for (a, b) in entries.iter().zip(expected.iter()) {
1025            assert_eq!(a, b);
1026        }
1027
1028        // extract the archive and verify everything
1029        let mut reader = super::reader::from_file(&archive)?;
1030        assert!(!reader.is_encrypted());
1031        reader.extract_all(outdir.path())?;
1032
1033        // the symbolic link (has expected bytes)
1034        if cfg!(target_family = "unix") {
1035            // links do not work properly on windows, there are several issues
1036            // both in reading and writing symbolic links
1037            let link = outdir.path().join("tiny_tree").join("link-to-c");
1038            let link_bytes = read_link(&link)?;
1039            let expected_link: Vec<u8> = "file-c.txt".as_bytes().to_vec();
1040            assert_eq!(link_bytes, expected_link);
1041        }
1042
1043        // the empty directory (should exist)
1044        let empty_dir = outdir
1045            .path()
1046            .join("tiny_tree")
1047            .join("sub")
1048            .join("empty-dir");
1049        let metadata = std::fs::metadata(&empty_dir)?;
1050        assert!(metadata.is_dir());
1051
1052        // the empty file (is empty)
1053        let empty_file = outdir
1054            .path()
1055            .join("tiny_tree")
1056            .join("sub")
1057            .join("empty-file");
1058        let metadata = std::fs::metadata(&empty_file)?;
1059        assert_eq!(metadata.len(), 0);
1060
1061        // the other files (have expected content)
1062        let actual = std::fs::read_to_string(outdir.path().join("tiny_tree").join("file-a.txt"))?;
1063        #[cfg(target_family = "unix")]
1064        assert_eq!(actual, "mary had a little lamb\n");
1065        #[cfg(target_family = "windows")]
1066        assert_eq!(actual, "mary had a little lamb\r\n");
1067        let actual = std::fs::read_to_string(outdir.path().join("tiny_tree").join("file-b.txt"))?;
1068        #[cfg(target_family = "unix")]
1069        assert_eq!(actual, "whose fleece was white as snow\n");
1070        #[cfg(target_family = "windows")]
1071        assert_eq!(actual, "whose fleece was white as snow\r\n");
1072        let actual = std::fs::read_to_string(outdir.path().join("tiny_tree").join("file-c.txt"))?;
1073        #[cfg(target_family = "unix")]
1074        assert_eq!(actual, "and everywhere that Mary went\n");
1075        #[cfg(target_family = "windows")]
1076        assert_eq!(actual, "and everywhere that Mary went\r\n");
1077        let actual = std::fs::read_to_string(
1078            outdir
1079                .path()
1080                .join("tiny_tree")
1081                .join("sub")
1082                .join("file-1.txt"),
1083        )?;
1084        #[cfg(target_family = "unix")]
1085        assert_eq!(actual, "the lamb was sure to go.\n");
1086        #[cfg(target_family = "windows")]
1087        assert_eq!(actual, "the lamb was sure to go.\r\n");
1088        Ok(())
1089    }
1090
1091    #[test]
1092    fn test_create_list_file_size() -> Result<(), Error> {
1093        // git does not track empty directories
1094        std::fs::create_dir_all("test/fixtures/version1/tiny_tree/sub/empty-dir")?;
1095        // create the archive
1096        let outdir = tempdir()?;
1097        let archive = outdir.path().join("archive.exa");
1098        let output = std::fs::File::create(&archive)?;
1099        let options = Options::new().file_size(true);
1100        let mut builder = super::writer::Writer::with_options(output, options)?;
1101        builder.add_dir_all("test/fixtures/version1/tiny_tree")?;
1102        builder.finish()?;
1103
1104        // verify the entries appear with file/link sizes
1105        let reader = super::reader::Entries::new(&archive)?;
1106        assert!(!reader.is_encrypted());
1107        let mut entries: Vec<(String, Option<u64>)> = reader
1108            .filter_map(|e| e.ok())
1109            .map(|e| (e.name().to_owned(), e.size()))
1110            .collect();
1111        entries.sort();
1112        assert_eq!(entries.len(), 9);
1113        #[cfg(target_family = "unix")]
1114        let expected: Vec<(String, Option<u64>)> = vec![
1115            ("tiny_tree".into(), None),
1116            ("tiny_tree/file-a.txt".into(), Some(23_u64)),
1117            ("tiny_tree/file-b.txt".into(), Some(31_u64)),
1118            ("tiny_tree/file-c.txt".into(), Some(30_u64)),
1119            ("tiny_tree/link-to-c".into(), Some(10_u64)),
1120            ("tiny_tree/sub".into(), None),
1121            ("tiny_tree/sub/empty-dir".into(), None),
1122            ("tiny_tree/sub/empty-file".into(), Some(0_u64)),
1123            ("tiny_tree/sub/file-1.txt".into(), Some(25_u64)),
1124        ];
1125        #[cfg(target_family = "windows")]
1126        let expected: Vec<(String, Option<u64>)> = vec![
1127            ("tiny_tree".into(), None),
1128            ("tiny_tree\\file-a.txt".into(), Some(24_u64)),
1129            ("tiny_tree\\file-b.txt".into(), Some(32_u64)),
1130            ("tiny_tree\\file-c.txt".into(), Some(31_u64)),
1131            ("tiny_tree\\link-to-c".into(), Some(10_u64)),
1132            ("tiny_tree\\sub".into(), None),
1133            ("tiny_tree\\sub\\empty-dir".into(), None),
1134            ("tiny_tree\\sub\\empty-file".into(), Some(0_u64)),
1135            ("tiny_tree\\sub\\file-1.txt".into(), Some(26_u64)),
1136        ];
1137        for (a, b) in entries.iter().zip(expected.iter()) {
1138            assert_eq!(a, b);
1139        }
1140
1141        Ok(())
1142    }
1143
1144    #[test]
1145    fn test_create_list_metadata() -> Result<(), Error> {
1146        // create the archive
1147        let outdir = tempdir()?;
1148        let archive = outdir.path().join("archive.exa");
1149        let output = std::fs::File::create(&archive)?;
1150        let options = Options::new().metadata(true);
1151        let mut builder = super::writer::Writer::with_options(output, options)?;
1152        builder.add_file("test/fixtures/version1/tiny_tree/file-a.txt", None)?;
1153        builder.finish()?;
1154
1155        // verify the entries appear with metadata
1156        let reader = super::reader::Entries::new(&archive)?;
1157        assert!(!reader.is_encrypted());
1158        for result in reader {
1159            let entry = result?;
1160            // probably only mtime is reliable across platforms
1161            assert!(entry.mtime().is_some());
1162        }
1163
1164        Ok(())
1165    }
1166
1167    #[test]
1168    fn test_create_list_extract_encryption() -> Result<(), Error> {
1169        // git does not track empty directories
1170        std::fs::create_dir_all("test/fixtures/version1/tiny_tree/sub/empty-dir")?;
1171        // create the archive
1172        let outdir = tempdir()?;
1173        let archive = outdir.path().join("archive.exa");
1174        let output = std::fs::File::create(&archive)?;
1175        let mut builder = super::writer::Writer::new(output)?;
1176        builder.enable_encryption(
1177            super::KeyDerivation::Argon2id,
1178            super::Encryption::AES256GCM,
1179            "Passw0rd!",
1180        )?;
1181        builder.add_dir_all("test/fixtures/version1/tiny_tree")?;
1182        builder.finish()?;
1183
1184        // verify the entries appear as expected
1185        let mut reader = super::reader::Entries::new(&archive)?;
1186        assert!(reader.is_encrypted());
1187        reader.enable_encryption("Passw0rd!")?;
1188        let mut entries: Vec<String> = reader
1189            .filter_map(|e| e.ok())
1190            .map(|e| e.name().to_owned())
1191            .collect();
1192        entries.sort();
1193        assert_eq!(entries.len(), 9);
1194        #[cfg(target_family = "unix")]
1195        let expected: Vec<String> = vec![
1196            "tiny_tree".into(),
1197            "tiny_tree/file-a.txt".into(),
1198            "tiny_tree/file-b.txt".into(),
1199            "tiny_tree/file-c.txt".into(),
1200            "tiny_tree/link-to-c".into(),
1201            "tiny_tree/sub".into(),
1202            "tiny_tree/sub/empty-dir".into(),
1203            "tiny_tree/sub/empty-file".into(),
1204            "tiny_tree/sub/file-1.txt".into(),
1205        ];
1206        #[cfg(target_family = "windows")]
1207        let expected: Vec<String> = vec![
1208            "tiny_tree".into(),
1209            "tiny_tree\\file-a.txt".into(),
1210            "tiny_tree\\file-b.txt".into(),
1211            "tiny_tree\\file-c.txt".into(),
1212            "tiny_tree\\link-to-c".into(),
1213            "tiny_tree\\sub".into(),
1214            "tiny_tree\\sub\\empty-dir".into(),
1215            "tiny_tree\\sub\\empty-file".into(),
1216            "tiny_tree\\sub\\file-1.txt".into(),
1217        ];
1218        for (a, b) in entries.iter().zip(expected.iter()) {
1219            assert_eq!(a, b);
1220        }
1221
1222        // extract the archive and verify everything
1223        let mut reader = super::reader::from_file(&archive)?;
1224        assert!(reader.is_encrypted());
1225        reader.enable_encryption("Passw0rd!")?;
1226        reader.extract_all(outdir.path())?;
1227
1228        // the symbolic link (has expected bytes)
1229        if cfg!(target_family = "unix") {
1230            // links do not work properly on windows, there are several issues
1231            // both in reading and writing symbolic links
1232            let link = outdir.path().join("tiny_tree").join("link-to-c");
1233            let link_bytes = read_link(&link)?;
1234            let expected_link: Vec<u8> = "file-c.txt".as_bytes().to_vec();
1235            assert_eq!(link_bytes, expected_link);
1236        }
1237
1238        // the empty directory (should exist)
1239        let empty_dir = outdir
1240            .path()
1241            .join("tiny_tree")
1242            .join("sub")
1243            .join("empty-dir");
1244        let metadata = std::fs::metadata(&empty_dir)?;
1245        assert!(metadata.is_dir());
1246
1247        // the empty file (is empty)
1248        let empty_file = outdir
1249            .path()
1250            .join("tiny_tree")
1251            .join("sub")
1252            .join("empty-file");
1253        let metadata = std::fs::metadata(&empty_file)?;
1254        assert_eq!(metadata.len(), 0);
1255
1256        // the other files (have expected content)
1257        let actual = std::fs::read_to_string(outdir.path().join("tiny_tree").join("file-a.txt"))?;
1258        #[cfg(target_family = "unix")]
1259        assert_eq!(actual, "mary had a little lamb\n");
1260        #[cfg(target_family = "windows")]
1261        assert_eq!(actual, "mary had a little lamb\r\n");
1262        let actual = std::fs::read_to_string(outdir.path().join("tiny_tree").join("file-b.txt"))?;
1263        #[cfg(target_family = "unix")]
1264        assert_eq!(actual, "whose fleece was white as snow\n");
1265        #[cfg(target_family = "windows")]
1266        assert_eq!(actual, "whose fleece was white as snow\r\n");
1267        let actual = std::fs::read_to_string(outdir.path().join("tiny_tree").join("file-c.txt"))?;
1268        #[cfg(target_family = "unix")]
1269        assert_eq!(actual, "and everywhere that Mary went\n");
1270        #[cfg(target_family = "windows")]
1271        assert_eq!(actual, "and everywhere that Mary went\r\n");
1272        let actual = std::fs::read_to_string(
1273            outdir
1274                .path()
1275                .join("tiny_tree")
1276                .join("sub")
1277                .join("file-1.txt"),
1278        )?;
1279        #[cfg(target_family = "unix")]
1280        assert_eq!(actual, "the lamb was sure to go.\n");
1281        #[cfg(target_family = "windows")]
1282        assert_eq!(actual, "the lamb was sure to go.\r\n");
1283        Ok(())
1284    }
1285}