exaf_rs/
reader.rs

1//
2// Copyright (c) 2024 Nathan Fiedler
3//
4
5//!
6//! Read an archive, decrypting and decompressing as needed.
7//!
8//! The `reader` module provides the functions needed to read an archive which
9//! may have optional encryption enabled. The `Entries` iterator provides a
10//! simple means of examining all of the entries contained in the archive.
11//!
12//! To extract the contents of the archive, use the `extract_all()` function of
13//! the `Reader` implementation.
14//!
15
16use super::*;
17use std::cell::RefCell;
18use std::collections::{HashMap, HashSet};
19use std::fmt;
20use std::fs::File;
21use std::io::{self, ErrorKind, Read, Seek, SeekFrom};
22use std::path::Path;
23
24//
25// Concerning the versioned reader API, it seems like a good idea but it has not
26// been fleshed out yet, so it looks incomplete at the moment.
27//
28
29///
30/// A reader of the EXAF format for one version or another.
31///
32trait VersionedReader {
33    // Read the header starting at the current position.
34    fn read_next_header(&mut self) -> Result<HeaderMap, Error>;
35
36    // Skip some content in the input stream (such as compressed content).
37    #[allow(dead_code)]
38    fn skip_n_bytes(&mut self, skip: u32) -> Result<(), Error>;
39
40    // Read the given number of bytes into a new vector.
41    fn read_n_bytes(&mut self, count: u64) -> Result<Vec<u8>, Error>;
42}
43
44//
45// Helper for building up the full path for entries in the archive.
46//
47struct PathBuilder {
48    // directories encountered so far; key is ID, value is (PA, NM)
49    // (if PA is zero, the entry is at the root of the tree)
50    parents: HashMap<u32, (u32, String)>,
51    // full paths of the directory with the given identifier; built lazily when
52    // get_full_path() is called
53    full_paths: HashMap<u32, PathBuf>,
54}
55
56impl PathBuilder {
57    fn new() -> Self {
58        Self {
59            parents: HashMap::new(),
60            full_paths: HashMap::new(),
61        }
62    }
63
64    // insert a mapping for the given directory to its parent
65    fn insert<S: Into<String>>(&mut self, dir_id: u32, parent: u32, name: S) {
66        self.parents.insert(dir_id, (parent, name.into()));
67    }
68
69    // follow the parent chain to build up a path
70    fn get_full_path(&mut self, mut parent: u32) -> Result<PathBuf, Error> {
71        let fullpath = if let Some(cached_path) = self.full_paths.get(&parent) {
72            cached_path.to_owned()
73        } else {
74            let mut paths: Vec<String> = vec![];
75            let entry_parent = parent;
76            while parent != 0 {
77                if let Some(pair) = self.parents.get(&parent) {
78                    parent = pair.0;
79                    paths.push(pair.1.clone());
80                } else {
81                    return Err(Error::MissingParent(parent));
82                }
83            }
84            let mut fullpath: PathBuf = PathBuf::new();
85            while let Some(path) = paths.pop() {
86                fullpath = fullpath.join(path);
87            }
88            self.full_paths.insert(entry_parent, fullpath.clone());
89            fullpath
90        };
91        Ok(fullpath)
92    }
93}
94
95// describes a file/link that will be extracted from the content block
96#[derive(Debug)]
97struct OutboundContent {
98    // offset within the content for this chunk of file
99    contentpos: u64,
100    // offset within the file where this chunk belongs
101    itempos: u64,
102    // size of the file chunk
103    size: u64,
104    // content is either for a file or symbolic link
105    kind: Kind,
106}
107
108impl TryFrom<HeaderMap> for OutboundContent {
109    type Error = super::Error;
110
111    fn try_from(value: HeaderMap) -> Result<Self, Self::Error> {
112        let kind: Kind = if get_header_str(&value, &TAG_NAME)?.is_some() {
113            Kind::File
114        } else if get_header_str(&value, &TAG_SYM_LINK)?.is_some() {
115            Kind::Link
116        } else {
117            return Err(Error::MissingTag("NM or SL".into()));
118        };
119        let contentpos = get_header_u32(&value, &TAG_CONTENT_POS)?
120            .ok_or_else(|| Error::MissingTag("CP".into()))?;
121        let itempos =
122            get_header_u64(&value, &TAG_ITEM_POS)?.ok_or_else(|| Error::MissingTag("IP".into()))?;
123        let size = get_header_u32(&value, &TAG_ITEM_SIZE)?
124            .ok_or_else(|| Error::MissingTag("SZ".into()))?;
125        Ok(Self {
126            contentpos: contentpos as u64,
127            itempos,
128            size: size as u64,
129            kind,
130        })
131    }
132}
133
134/// Raw header rows consisting of the tags and values without interpretation.
135type HeaderMap = HashMap<u16, Vec<u8>>;
136
137// Read a complete header from the stream.
138fn read_header<R: Read>(mut input: R) -> Result<HeaderMap, Error> {
139    let mut rows: HeaderMap = HashMap::new();
140    // read in the number of rows in this header
141    let mut row_count_bytes = [0; 2];
142    input.read_exact(&mut row_count_bytes)?;
143    let row_count = u16::from_be_bytes(row_count_bytes);
144    // read that many tag/size/value tuples into the map
145    for _ in 0..row_count {
146        // read tag bytes, convert to u16
147        let mut tag_bytes = [0; 2];
148        input.read_exact(&mut tag_bytes)?;
149        let tag = u16::from_be_bytes(tag_bytes);
150        // read size bytes, convert to u16
151        let mut size_bytes = [0; 2];
152        input.read_exact(&mut size_bytes)?;
153        let size = u16::from_be_bytes(size_bytes);
154        // read N bytes into a Vec<u8>
155        let mut chunk = input.take(size as u64);
156        let mut value: Vec<u8> = vec![];
157        chunk.read_to_end(&mut value)?;
158        input = chunk.into_inner();
159        rows.insert(tag, value);
160    }
161    Ok(rows)
162}
163
164fn get_header_str(rows: &HeaderMap, key: &u16) -> Result<Option<String>, Error> {
165    if let Some(row) = rows.get(key) {
166        let s = String::from_utf8(row.to_owned())?;
167        Ok(Some(s))
168    } else {
169        Ok(None)
170    }
171}
172
173fn get_header_u8(rows: &HeaderMap, key: &u16) -> Result<Option<u8>, Error> {
174    if let Some(row) = rows.get(key) {
175        Ok(Some(row[0]))
176    } else {
177        Ok(None)
178    }
179}
180
181#[allow(dead_code)]
182fn pad_to_u16(row: &Vec<u8>) -> [u8; 2] {
183    if row.len() == 1 {
184        [0, row[0]]
185    } else {
186        [row[0], row[1]]
187    }
188}
189
190#[allow(dead_code)]
191fn get_header_u16(rows: &HeaderMap, key: &u16) -> Result<Option<u16>, Error> {
192    if let Some(row) = rows.get(key) {
193        let raw: [u8; 2] = pad_to_u16(row);
194        let v = u16::from_be_bytes(raw);
195        Ok(Some(v))
196    } else {
197        Ok(None)
198    }
199}
200
201fn pad_to_u32(row: &Vec<u8>) -> [u8; 4] {
202    if row.len() == 1 {
203        [0, 0, 0, row[0]]
204    } else if row.len() == 2 {
205        [0, 0, row[0], row[1]]
206    } else {
207        [row[0], row[1], row[2], row[3]]
208    }
209}
210
211fn get_header_u32(rows: &HeaderMap, key: &u16) -> Result<Option<u32>, Error> {
212    if let Some(row) = rows.get(key) {
213        let raw: [u8; 4] = pad_to_u32(row);
214        let v = u32::from_be_bytes(raw);
215        Ok(Some(v))
216    } else {
217        Ok(None)
218    }
219}
220
221fn pad_to_u64(row: &Vec<u8>) -> [u8; 8] {
222    if row.len() == 1 {
223        [0, 0, 0, 0, 0, 0, 0, row[0]]
224    } else if row.len() == 2 {
225        [0, 0, 0, 0, 0, 0, row[0], row[1]]
226    } else if row.len() == 4 {
227        [0, 0, 0, 0, row[0], row[1], row[2], row[3]]
228    } else {
229        [
230            row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7],
231        ]
232    }
233}
234
235fn get_header_u64(rows: &HeaderMap, key: &u16) -> Result<Option<u64>, Error> {
236    if let Some(row) = rows.get(key) {
237        let raw: [u8; 8] = pad_to_u64(row);
238        let v = u64::from_be_bytes(raw);
239        Ok(Some(v))
240    } else {
241        Ok(None)
242    }
243}
244
245fn get_header_time(rows: &HeaderMap, key: &u16) -> Result<Option<DateTime<Utc>>, Error> {
246    if let Some(row) = rows.get(key) {
247        if row.len() == 4 {
248            let raw: [u8; 4] = row[0..4].try_into()?;
249            let secs = i32::from_be_bytes(raw);
250            Ok(DateTime::from_timestamp(secs as i64, 0))
251        } else {
252            let raw: [u8; 8] = row[0..8].try_into()?;
253            let secs = i64::from_be_bytes(raw);
254            Ok(DateTime::from_timestamp(secs, 0))
255        }
256    } else {
257        Ok(None)
258    }
259}
260
261fn get_header_bytes(rows: &HeaderMap, key: &u16) -> Result<Option<Vec<u8>>, Error> {
262    if let Some(row) = rows.get(key) {
263        Ok(Some(row.to_owned()))
264    } else {
265        Ok(None)
266    }
267}
268
269///
270/// Optional values read from the archive header.
271///
272struct ArchiveHeader {
273    /// Encryption algorithm
274    enc_algo: Encryption,
275    /// Key derivation algorithm
276    key_algo: KeyDerivation,
277    /// Salt for deriving the key from a passphrase
278    salt: Option<Vec<u8>>,
279    /// Number of iterations for key derivation function
280    time_cost: Option<u32>,
281    /// Number of 1 kb memory blocks for key derivation function
282    mem_cost: Option<u32>,
283    /// Degree of parallelism for key derivation function
284    para_cost: Option<u32>,
285    /// Output length for key derivation function
286    tag_length: Option<u32>,
287}
288
289impl TryFrom<HeaderMap> for ArchiveHeader {
290    type Error = super::Error;
291
292    fn try_from(value: HeaderMap) -> Result<Self, Self::Error> {
293        let enc_algo = get_header_u8(&value, &TAG_ENC_ALGO)?
294            .map_or(Ok(Encryption::None), |v| Encryption::try_from(v))?;
295        let key_algo = get_header_u8(&value, &TAG_KEY_DERIV)?
296            .map_or(Ok(KeyDerivation::None), |v| KeyDerivation::try_from(v))?;
297        let salt = get_header_bytes(&value, &TAG_SALT)?;
298        let time_cost = get_header_u32(&value, &TAG_TIME_COST)?;
299        let mem_cost = get_header_u32(&value, &TAG_MEM_COST)?;
300        let para_cost = get_header_u32(&value, &TAG_PARA_COST)?;
301        let tag_length = get_header_u32(&value, &TAG_TAG_LENGTH)?;
302        Ok(Self {
303            enc_algo,
304            key_algo,
305            salt,
306            time_cost,
307            mem_cost,
308            para_cost,
309            tag_length,
310        })
311    }
312}
313
314impl TryFrom<HeaderMap> for Entry {
315    type Error = super::Error;
316
317    fn try_from(value: HeaderMap) -> Result<Self, Self::Error> {
318        let (is_link, name): (bool, String) = if let Some(nm) = get_header_str(&value, &TAG_NAME)? {
319            (false, nm)
320        } else if let Some(sl) = get_header_str(&value, &TAG_SYM_LINK)? {
321            (true, sl)
322        } else {
323            return Err(Error::MissingTag("NM or SL".into()));
324        };
325        let dir_id = get_header_u32(&value, &TAG_DIRECTORY_ID)?;
326        let parent = get_header_u32(&value, &TAG_PARENT)?;
327        let size = get_header_u64(&value, &TAG_FILE_SIZE)?;
328        let mode = get_header_u32(&value, &TAG_UNIX_MODE)?;
329        let attrs = get_header_u32(&value, &TAG_FILE_ATTRS)?;
330        let uid = get_header_u32(&value, &TAG_USER_ID)?;
331        let gid = get_header_u32(&value, &TAG_GROUP_ID)?;
332        let user = get_header_str(&value, &TAG_USER_NAME)?;
333        let group = get_header_str(&value, &TAG_GROUP_NAME)?;
334        let ctime = get_header_time(&value, &TAG_CREATE_TIME)?;
335        let mtime = get_header_time(&value, &TAG_MODIFY_TIME)?;
336        let atime = get_header_time(&value, &TAG_ACCESS_TIME)?;
337        Ok(Self {
338            name,
339            is_link,
340            dir_id,
341            parent,
342            size,
343            mode,
344            attrs,
345            uid,
346            gid,
347            user,
348            group,
349            ctime,
350            mtime,
351            atime,
352        })
353    }
354}
355
356///
357/// Represents the properties related to a content block that holds one or more
358/// files (or parts of files).
359///
360#[derive(Debug)]
361struct Manifest {
362    /// Number of directory, file, or symbolic links in the content block.
363    num_entries: u32,
364    /// Compression algorithm for this content block.
365    comp_algo: Compression,
366    /// Size in bytes of the (compressed) content block.
367    block_size: u32,
368}
369
370impl fmt::Display for Manifest {
371    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
372        write!(
373            f,
374            "num_entries: {}, comp_algo: {}, block_size: {}",
375            self.num_entries, self.comp_algo, self.block_size
376        )
377    }
378}
379
380impl TryFrom<HeaderMap> for Manifest {
381    type Error = super::Error;
382
383    fn try_from(value: HeaderMap) -> Result<Self, Self::Error> {
384        let num_entries = get_header_u32(&value, &TAG_NUM_ENTRIES)?
385            .ok_or_else(|| Error::MissingTag("NE".into()))?;
386        let comp_num =
387            get_header_u8(&value, &TAG_COMP_ALGO)?.ok_or_else(|| Error::MissingTag("CA".into()))?;
388        let block_size = get_header_u32(&value, &TAG_BLOCK_SIZE)?
389            .ok_or_else(|| Error::MissingTag("BS".into()))?;
390        let comp_algo = Compression::try_from(comp_num)?;
391        Ok(Self {
392            num_entries,
393            comp_algo,
394            block_size,
395        })
396    }
397}
398
399///
400/// Parameters related to the subsequent encrypted data.
401///
402struct Encrypted {
403    // nonce, or initialization vector, depending on who you ask
404    init_vector: Vec<u8>,
405    // size in bytes of the encrypted data following the header
406    block_size: u32,
407}
408
409impl TryFrom<HeaderMap> for Encrypted {
410    type Error = super::Error;
411
412    fn try_from(value: HeaderMap) -> Result<Self, Self::Error> {
413        let block_size = get_header_u32(&value, &TAG_ENCRYPTED_SIZE)?
414            .ok_or_else(|| Error::MissingTag("ES".into()))?;
415        let init_vector = get_header_bytes(&value, &TAG_INIT_VECTOR)?
416            .ok_or_else(|| Error::MissingTag("IV".into()))?;
417        Ok(Self {
418            init_vector,
419            block_size,
420        })
421    }
422}
423
424//
425// The use of a versioned reader seemed like a good idea but it has not been
426// fully realized as yet; for now it's just convenient to use the code in this
427// way (that is, the ref cells and boxes and helpful).
428//
429struct ReaderV1<R: ?Sized> {
430    input: RefCell<R>,
431}
432
433impl<R: Read> ReaderV1<R> {
434    fn new(input: R) -> Self {
435        Self {
436            input: RefCell::new(input),
437        }
438    }
439}
440
441impl<R: Read + Seek> VersionedReader for ReaderV1<R> {
442    fn read_next_header(&mut self) -> Result<HeaderMap, Error> {
443        let input = self.input.get_mut();
444        read_header(input)
445    }
446
447    #[allow(dead_code)]
448    fn skip_n_bytes(&mut self, skip: u32) -> Result<(), Error> {
449        let input = self.input.get_mut();
450        input.seek(SeekFrom::Current(skip as i64))?;
451        Ok(())
452    }
453
454    fn read_n_bytes(&mut self, count: u64) -> Result<Vec<u8>, Error> {
455        let input = self.input.get_mut();
456        let mut taker = input.take(count);
457        let mut content: Vec<u8> = vec![];
458        let bytes_read = taker.read_to_end(&mut content)? as u64;
459        if bytes_read != count {
460            return Err(Error::UnexpectedEof);
461        }
462        Ok(content)
463    }
464}
465
466///
467/// Generic archive reader that returns manifest headers, entry headers, and
468/// compressed output.
469///
470/// The caller should check if the archive is encrypted by calling the
471/// `is_encrypted()` function, and if it returns `true`, then call
472/// `enable_encryption()` with a password provided by the user.
473///
474pub struct Reader {
475    // underlying reader for a specific file format
476    reader: Box<dyn VersionedReader>,
477    // archive header read from the input data
478    header: ArchiveHeader,
479    // secret key for encrypting files, if encryption is enabled
480    secret_key: Option<Vec<u8>>,
481    // number of bytes to read for the block after the manifest (if none, the
482    // manifest has not yet been read, or the block has already been read)
483    block_size: Option<u32>,
484    // buffered content, if any (this includes entries and file content)
485    // content: Option<Vec<u8>>,
486    content: Option<std::io::Cursor<Vec<u8>>>,
487}
488
489impl Reader {
490    ///
491    /// Create a new Reader with the given versioned reader.
492    ///
493    fn new(mut input: Box<dyn VersionedReader>) -> Result<Self, Error> {
494        let rows = input.read_next_header()?;
495        let header = ArchiveHeader::try_from(rows)?;
496        Ok(Self {
497            reader: input,
498            header,
499            secret_key: None,
500            block_size: None,
501            content: None,
502        })
503    }
504
505    ///
506    /// Return `true` if the archive appears to have encrypted content.
507    ///
508    pub fn is_encrypted(&self) -> bool {
509        self.header.key_algo != KeyDerivation::None
510    }
511
512    ///
513    /// Enable encryption when reading the archive, using the given passphrase.
514    ///
515    pub fn enable_encryption(&mut self, password: &str) -> Result<(), Error> {
516        let kd = self.header.key_algo.clone();
517        if let Some(ref salt) = self.header.salt {
518            let mut params: KeyDerivationParams = Default::default();
519            params = params.time_cost(self.header.time_cost);
520            params = params.mem_cost(self.header.mem_cost);
521            params = params.para_cost(self.header.para_cost);
522            params = params.tag_length(self.header.tag_length);
523            self.secret_key = Some(derive_key(&kd, password, salt, &params)?);
524            Ok(())
525        } else {
526            Err(Error::InternalError(
527                "called enable_encryption() on plain archive".into(),
528            ))
529        }
530    }
531
532    ///
533    /// Extracts all of the entries in the archive to the current directory.
534    ///
535    pub fn extract_all(&mut self, output_dir: &Path) -> Result<u64, Error> {
536        // allocate a large buffer for decompressing content to save time
537        let mut buffer: Vec<u8> = Vec::with_capacity(BUNDLE_SIZE as usize);
538        let mut path_builder = PathBuilder::new();
539        let mut file_count: u64 = 0;
540        // loop until the end of the file is reached
541        loop {
542            // try to read the next manifest header, if any
543            match self.read_next_manifest()? {
544                Some(manifest) => {
545                    // collect all files/links into a list to process them a bit later
546                    let mut files: Vec<(OutboundContent, PathBuf)> = vec![];
547                    for _ in 0..manifest.num_entries {
548                        let entry_rows = self.read_next_entry()?;
549                        let entry = Entry::try_from(entry_rows.clone())?;
550                        if let Some(dir_id) = entry.dir_id {
551                            let entry_parent = entry.parent.unwrap_or(0);
552                            path_builder.insert(dir_id, entry_parent, entry.name.clone());
553                        }
554                        let path = if let Some(parent) = entry.parent {
555                            path_builder.get_full_path(parent)?.join(entry.name)
556                        } else {
557                            PathBuf::from(entry.name)
558                        };
559                        if entry.dir_id.is_some() {
560                            // ensure directories exist, even the empty ones
561                            let safe_path = super::sanitize_path(path)?;
562                            let fpath = output_dir.to_path_buf().join(safe_path);
563                            fs::create_dir_all(&fpath)?;
564                        } else {
565                            let oc = OutboundContent::try_from(entry_rows.clone())?;
566                            files.push((oc, path));
567                        }
568                    }
569
570                    let mut content = self.read_content()?;
571                    if manifest.comp_algo == Compression::ZStandard {
572                        zstd::stream::copy_decode(content.as_slice(), &mut buffer)?;
573                    } else {
574                        // the only remaining option is copy (keep the larger buffer
575                        // to optimize memory management)
576                        if buffer.len() > content.len() {
577                            buffer.extend(content.drain(..));
578                        } else {
579                            buffer = content;
580                        }
581                    }
582
583                    // process each of the outbound content elements
584                    for (entry, path) in files.iter() {
585                        // perform basic sanitization of the path to prevent abuse
586                        let safe_path = super::sanitize_path(path)?;
587                        let fpath = output_dir.to_path_buf().join(safe_path);
588                        if entry.kind == Kind::File {
589                            // make sure the file exists and is writable
590                            let mut output = fs::OpenOptions::new()
591                                .write(true)
592                                .create(true)
593                                .open(&fpath)?;
594                            let file_len = fs::metadata(fpath)?.len();
595                            if file_len == 0 {
596                                // just created a new file, count it
597                                file_count += 1;
598                            }
599                            // if the file was an empty file, then we are already done
600                            if entry.size > 0 {
601                                // ensure the file has the appropriate length for writing this
602                                // content chunk into the file, extending it if necessary
603                                if file_len < entry.itempos {
604                                    output.set_len(entry.itempos)?;
605                                }
606                                // seek to the correct position within the file for this chunk
607                                if entry.itempos > 0 {
608                                    output.seek(SeekFrom::Start(entry.itempos))?;
609                                }
610                                let mut cursor = std::io::Cursor::new(&buffer);
611                                cursor.seek(SeekFrom::Start(entry.contentpos))?;
612                                let mut chunk = cursor.take(entry.size);
613                                io::copy(&mut chunk, &mut output)?;
614                            }
615                        } else if entry.kind == Kind::Link {
616                            // links are always captured in whole, never chunks
617                            let mut cursor = std::io::Cursor::new(&buffer);
618                            cursor.seek(SeekFrom::Start(entry.contentpos))?;
619                            let mut chunk = cursor.take(entry.size);
620                            let mut raw_bytes: Vec<u8> = vec![];
621                            chunk.read_to_end(&mut raw_bytes)?;
622                            write_link(&raw_bytes, &fpath)?;
623                        }
624                    }
625                    buffer.clear();
626                }
627                None => return Ok(file_count),
628            }
629        }
630    }
631
632    ///
633    /// Attempt to read the next manifest header from the archive.
634    ///
635    /// Returns `None` if the end of the file has been reached.
636    ///
637    /// If the archive is encrypted and `enable_encryption()` has been called,
638    /// then the encrypted block of entries and file content will be decrypted.
639    ///
640    /// Call `read_next_entry()` to get the next entry in the manifest, doing so
641    /// `manifest.num_entries` times. Then call `read_content()` to get the
642    /// compressed file data. Failing to do so will result in strange errors.
643    ///
644    fn read_next_manifest(&mut self) -> Result<Option<Manifest>, Error> {
645        if self.content.is_some() || self.block_size.is_some() {
646            return Err(Error::InternalError(
647                "you forgot to call read_content()".into(),
648            ));
649        }
650        match self.reader.read_next_header() {
651            Ok(rows) => {
652                if rows.contains_key(&TAG_ENCRYPTED_SIZE) {
653                    // an encrypted block of entries and file data, must be
654                    // decrypted and the plain text content cached for later
655                    let encrypted = Encrypted::try_from(rows)?;
656                    let cipher = self.reader.read_n_bytes(encrypted.block_size as u64)?;
657                    if let Some(ref secret) = self.secret_key {
658                        let plain = decrypt_data(
659                            &self.header.enc_algo,
660                            secret,
661                            &cipher,
662                            &encrypted.init_vector,
663                        )?;
664                        let mut cursor = std::io::Cursor::new(&plain);
665                        let rows = read_header(&mut cursor)?;
666                        let manifest = Manifest::try_from(rows)?;
667                        let mut buffer: Vec<u8> = vec![];
668                        cursor.read_to_end(&mut buffer)?;
669                        self.content = Some(std::io::Cursor::new(buffer));
670                        self.block_size = None;
671                        return Ok(Some(manifest));
672                    } else {
673                        return Err(Error::InternalError(
674                            "encrypted archive, call enable_encryption()".into(),
675                        ));
676                    }
677                } else {
678                    let manifest = Manifest::try_from(rows)?;
679                    // discard any previously cached content and set the next
680                    // block size so that we know how to read the content
681                    self.block_size = Some(manifest.block_size);
682                    self.content = None;
683                    return Ok(Some(manifest));
684                }
685            }
686            Err(err) => {
687                return match err {
688                    Error::UnexpectedEof => Ok(None),
689                    Error::IOError(ioerr) => {
690                        if ioerr.kind() == ErrorKind::UnexpectedEof {
691                            Ok(None)
692                        } else {
693                            Err(Error::from(ioerr))
694                        }
695                    }
696                    _ => Err(Error::from(err)),
697                }
698            }
699        }
700    }
701
702    ///
703    /// Read the next set of header rows from the archive.
704    ///
705    /// This can be called at most `num_entries` times before it starts to
706    /// return garbage, or an error.
707    ///
708    fn read_next_entry(&mut self) -> Result<HeaderMap, Error> {
709        if let Some(reader) = self.content.as_mut() {
710            read_header(reader)
711        } else {
712            self.reader.read_next_header()
713        }
714    }
715
716    ///
717    /// Read the upcoming block of compressed file content from the archive.
718    ///
719    /// This only yields expected results if `read_next_entry()` has been called
720    /// the appropriate number of times.
721    ///
722    fn read_content(&mut self) -> Result<Vec<u8>, Error> {
723        if let Some(mut reader) = self.content.take() {
724            let pos = reader.stream_position()? as usize;
725            let mut buffer = reader.into_inner();
726            buffer.drain(..pos);
727            Ok(buffer)
728        } else if let Some(count) = self.block_size.take() {
729            self.reader.read_n_bytes(count as u64)
730        } else {
731            Err(Error::InternalError(
732                "should call read_next_manifest() first".into(),
733            ))
734        }
735    }
736}
737
738// Calculate the hash value for a given string.
739fn calculate_hash(name: &str) -> u64 {
740    use std::hash::{DefaultHasher, Hash, Hasher};
741    let mut s = DefaultHasher::new();
742    name.hash(&mut s);
743    s.finish()
744}
745
746///
747/// An iterator over the entries within an archive.
748///
749/// The caller should check if the archive is encrypted by calling the
750/// `is_encrypted()` function, and if it returns `true`, then call
751/// `enable_encryption()` with a password provided by the user.
752///
753pub struct Entries {
754    reader: Reader,
755    path_builder: PathBuilder,
756    entries_remaining: Option<u32>,
757    visited: HashSet<u64>,
758}
759
760impl Entries {
761    ///
762    /// Create an `Entries` iterator for the given file.
763    ///
764    pub fn new<P: AsRef<Path>>(infile: P) -> Result<Entries, Error> {
765        let reader = from_file(infile)?;
766        let path_builder = PathBuilder::new();
767        Ok(Self {
768            reader,
769            path_builder,
770            entries_remaining: None,
771            visited: HashSet::new(),
772        })
773    }
774
775    ///
776    /// Return `true` if the archive appears to have encrypted content.
777    ///
778    pub fn is_encrypted(&self) -> bool {
779        self.reader.is_encrypted()
780    }
781
782    ///
783    /// Enable encryption when reading the archive, using the given passphrase.
784    ///
785    pub fn enable_encryption(&mut self, password: &str) -> Result<(), Error> {
786        self.reader.enable_encryption(password)
787    }
788
789    // Retrieve the next entry from the manifest, reading the next manifest if
790    // needed, and skipping content once the end of the manifest is reached.
791    // Once the end of the input is reached, returns `Ok(None)` indefinitely.
792    fn get_next_entry(&mut self) -> Result<Option<Entry>, Error> {
793        loop {
794            match self.entries_remaining.take() {
795                Some(0) => {
796                    // throw away the compressed content since we are only listing
797                    self.reader.read_content()?;
798                }
799                Some(remaining) => {
800                    // get the next entry from the manifest
801                    let entry_rows = self.reader.read_next_entry()?;
802                    let mut entry = Entry::try_from(entry_rows)?;
803                    if let Some(dir_id) = entry.dir_id {
804                        let entry_parent = entry.parent.unwrap_or(0);
805                        self.path_builder
806                            .insert(dir_id, entry_parent, entry.name.clone());
807                    }
808                    if let Some(parent) = entry.parent {
809                        let mut fullpath = self.path_builder.get_full_path(parent)?;
810                        fullpath = fullpath.join(entry.name);
811                        entry.name = fullpath.to_string_lossy().to_string();
812                    }
813                    self.entries_remaining = Some(remaining - 1);
814
815                    // return unique paths, since a file may be split across
816                    // more than one content, we will see its entry again
817                    let hash = calculate_hash(&entry.name);
818                    if !self.visited.contains(&hash) {
819                        self.visited.insert(hash);
820                        return Ok(Some(entry));
821                    }
822                }
823                None => {
824                    // try to read the next manifest
825                    if let Some(manifest) = self.reader.read_next_manifest()? {
826                        self.entries_remaining = Some(manifest.num_entries);
827                    } else {
828                        // reached the end of the file
829                        return Ok(None);
830                    }
831                }
832            }
833        }
834    }
835}
836
837impl Iterator for Entries {
838    type Item = Result<Entry, Error>;
839
840    fn next(&mut self) -> Option<Self::Item> {
841        match self.get_next_entry() {
842            Ok(None) => None,
843            Ok(some) => some.map(|e| Ok(e)),
844            Err(err) => Some(Err(err)),
845        }
846    }
847}
848
849///
850/// Create a `Reader` from the given file.
851///
852/// ```no_run
853/// # let passwd: Option<&str> = None;
854/// let mut reader = exaf_rs::reader::from_file("archive.exa").expect("from file");
855/// if reader.is_encrypted() && passwd.is_none() {
856///     println!("Archive is encrypted, please provide a password.");
857/// } else {
858///     if let Some(password) = passwd {
859///         reader.enable_encryption(password).expect("enable crypto");
860///     }
861///     let path = std::env::current_dir().expect("no env?");
862///     reader.extract_all(&path).expect("extract all");
863/// }
864/// ```
865///
866pub fn from_file<P: AsRef<Path>>(infile: P) -> Result<Reader, Error> {
867    let mut input = File::open(infile)?;
868    let mut archive_start = [0; 6];
869    input.read_exact(&mut archive_start)?;
870    if archive_start[0..4] != [b'E', b'X', b'A', b'F'] {
871        return Err(Error::MissingMagic);
872    }
873    // for now, only know how to build version 1 readers
874    if archive_start[4] != 1 {
875        return Err(Error::UnsupportedVersion);
876    }
877    Reader::new(Box::new(ReaderV1::new(input)))
878}
879
880#[cfg(test)]
881mod tests {
882    use super::*;
883
884    #[test]
885    fn test_get_header_u16() -> Result<(), Error> {
886        let input: Vec<u8> = vec![0, 1, 0x12, 0x34, 0, 2, 255, 255];
887        let rows = read_header(input.as_slice())?;
888        assert_eq!(rows.len(), 1);
889        let maybe_value = get_header_u16(&rows, &0x1234)?;
890        let value = maybe_value.unwrap();
891        assert_eq!(value, 65_535);
892        Ok(())
893    }
894
895    #[test]
896    fn test_get_header_u32_up() -> Result<(), Error> {
897        let input: Vec<u8> = vec![0, 1, 0x12, 0x34, 0, 2, 255, 255];
898        let rows = read_header(input.as_slice())?;
899        assert_eq!(rows.len(), 1);
900        let maybe_value = get_header_u32(&rows, &0x1234)?;
901        let value = maybe_value.unwrap();
902        assert_eq!(value, 65_535);
903        Ok(())
904    }
905
906    #[test]
907    fn test_get_header_u64_up() -> Result<(), Error> {
908        let input: Vec<u8> = vec![0, 1, 0x12, 0x34, 0, 4, 255, 255, 255, 255];
909        let rows = read_header(input.as_slice())?;
910        assert_eq!(rows.len(), 1);
911        let maybe_value = get_header_u64(&rows, &0x1234)?;
912        let value = maybe_value.unwrap();
913        assert_eq!(value, 4_294_967_295);
914        Ok(())
915    }
916
917    #[test]
918    fn test_get_header_time_i32() -> Result<(), Error> {
919        let input: Vec<u8> = vec![0, 1, 0x12, 0x34, 0, 4, 0x66, 0x38, 0x17, 0x80];
920        let rows = read_header(input.as_slice())?;
921        assert_eq!(rows.len(), 1);
922        let maybe_value = get_header_time(&rows, &0x1234)?;
923        let value = maybe_value.unwrap();
924        assert_eq!(value.year(), 2024);
925        assert_eq!(value.month(), 5);
926        assert_eq!(value.day(), 5);
927        Ok(())
928    }
929
930    #[test]
931    fn test_get_header_time_i64() -> Result<(), Error> {
932        // the value may fit in 4 bytes but its signed value is outside of the
933        // range supported by an i32 so we must use i64
934        let input: Vec<u8> = vec![0, 1, 0x12, 0x34, 0, 8, 0, 0, 0, 0, 0x93, 0xf7, 0x14, 0x00];
935        let rows = read_header(input.as_slice())?;
936        assert_eq!(rows.len(), 1);
937        let maybe_value = get_header_time(&rows, &0x1234)?;
938        let value = maybe_value.unwrap();
939        assert_eq!(value.year(), 2048);
940        assert_eq!(value.month(), 8);
941        assert_eq!(value.day(), 30);
942        Ok(())
943    }
944
945    #[test]
946    fn test_get_header_str() -> Result<(), Error> {
947        let input: Vec<u8> = vec![0, 1, 0x12, 0x34, 0, 6, b'f', b'o', b'o', b'b', b'a', b'r'];
948        let rows = read_header(input.as_slice())?;
949        assert_eq!(rows.len(), 1);
950        let maybe_value = get_header_str(&rows, &0x1234)?;
951        let value = maybe_value.unwrap();
952        assert_eq!(value, "foobar");
953        Ok(())
954    }
955
956    #[test]
957    fn test_get_header_bytes() -> Result<(), Error> {
958        let input: Vec<u8> = vec![0, 1, 0x12, 0x34, 0, 6, b'f', b'o', b'o', b'b', b'a', b'r'];
959        let rows = read_header(input.as_slice())?;
960        assert_eq!(rows.len(), 1);
961        let maybe_value = get_header_bytes(&rows, &0x1234)?;
962        let value = maybe_value.unwrap();
963        assert_eq!(value, "foobar".as_bytes());
964        Ok(())
965    }
966
967    #[test]
968    fn test_version1_reader_one_tiny_file() -> Result<(), Error> {
969        let input_path = "test/fixtures/version1/one_tiny_file.exa";
970        let mut reader = from_file(input_path)?;
971        let maybe_manifest = reader.read_next_manifest()?;
972        let manifest = maybe_manifest.unwrap();
973        assert_eq!(manifest.num_entries, 1);
974        assert_eq!(manifest.comp_algo, Compression::ZStandard);
975        assert_eq!(manifest.block_size, 32);
976        Ok(())
977    }
978
979    #[test]
980    fn test_read_header() -> Result<(), Error> {
981        let raw_bytes: Vec<u8> = vec![
982            0x00, 0x0a, 0x49, 0x44, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x4e, 0x4d, 0x00, 0x03,
983            0x74, 0x6d, 0x70, 0x4d, 0x4f, 0x00, 0x04, 0x00, 0x00, 0x41, 0xed, 0x4d, 0x54, 0x00,
984            0x08, 0x00, 0x00, 0x00, 0x00, 0x66, 0x26, 0xef, 0xd3, 0x43, 0x54, 0x00, 0x08, 0x00,
985            0x00, 0x00, 0x00, 0x66, 0x11, 0xb6, 0xb8, 0x41, 0x54, 0x00, 0x08, 0x00, 0x00, 0x00,
986            0x00, 0x66, 0x26, 0xef, 0xd4, 0x55, 0x4e, 0x00, 0x08, 0x6e, 0x66, 0x69, 0x65, 0x64,
987            0x6c, 0x65, 0x72, 0x47, 0x4e, 0x00, 0x05, 0x73, 0x74, 0x61, 0x66, 0x66, 0x55, 0x49,
988            0x00, 0x04, 0x00, 0x00, 0x01, 0xf5, 0x47, 0x49, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14,
989            0x00, 0x0b,
990        ];
991        let rows = read_header(raw_bytes.as_slice())?;
992        assert_eq!(rows.len(), 10);
993        // no use trying to check all of the values as some of them are timestamps
994        assert_eq!(rows.get(&TAG_DIRECTORY_ID), Some(vec![0, 0, 0, 1].as_ref()));
995        assert_eq!(rows.get(&TAG_NAME), Some(vec![b't', b'm', b'p'].as_ref()));
996        Ok(())
997    }
998}