1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
use std::{
    collections::BTreeMap,
    error::Error,
    fmt, io,
    path::{Path, PathBuf},
};
use tar;

/// Provides access to files in a tarball stored behind a Read impl.
#[derive(Debug)]
pub struct Tardex<R> {
    dex: BTreeMap<PathBuf, Entry<R>>,
}

impl<R> Tardex<R>
where
    R: io::Read + io::Seek + Clone,
{
    /// Construct a new Tardex from a seekable, cloneable reader.  Note that this excludes
    /// `std::fs::File`.
    pub fn new(reader: R) -> Result<Self> {
        let mut tar = tar::Archive::new(reader.clone());
        let mut dex = BTreeMap::new();
        for tar_entry in tar.entries()? {
            let tar_entry = tar_entry?;
            let header = tar_entry.header();
            let path = tar_entry.path()?.into_owned();
            let offset = tar_entry.raw_file_position();
            match header.entry_type() {
                tar::EntryType::Regular => (),
                _ => continue,
            }
            let meta = Metadata::from_header(tar_entry.header())?;
            let entry = Entry::in_tarball(reader.clone(), offset, meta)?;
            dex.insert(path, entry);
        }
        Ok(Tardex { dex })
    }

    /// Access the entry at a path.
    pub fn entry<P>(&self, k: P) -> Option<Entry<R>>
    where
        P: AsRef<Path>,
    {
        self.dex.get(k.as_ref()).cloned()
    }
}

impl<R> Tardex<R> {
    /// Returns the tarball's paths in lexical order
    pub fn paths(&self) -> impl Iterator<Item = &Path> {
        self.dex.keys().map(|x| x.as_path())
    }
}

#[derive(Debug)]
pub enum TardexError {
    IoError(io::Error),
}

impl fmt::Display for TardexError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            TardexError::IoError(err) => write!(f, "i/o error {}", err),
        }
    }
}

impl From<io::Error> for TardexError {
    fn from(err: io::Error) -> TardexError {
        TardexError::IoError(err)
    }
}

impl Error for TardexError {}

pub type Result<T> = std::result::Result<T, TardexError>;

/// An entry corresponds to a file in the tarball.
#[derive(Debug)]
pub struct Entry<R> {
    read: std::io::Take<R>,
    meta: Metadata,
}

impl<R> Entry<R> {
    pub fn metadata(&self) -> Metadata {
        self.meta
    }
}

impl<R> Clone for Entry<R>
where
    R: io::Read + Clone,
{
    fn clone(&self) -> Self {
        let limit = self.read.limit();
        let inner = self.read.get_ref().clone();
        Entry {
            read: inner.take(limit),
            meta: self.meta,
        }
    }
}

impl<R> io::Read for Entry<R>
where
    R: io::Read + Clone,
{
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        self.read.read(buf)
    }
}

impl<R> Entry<R>
where
    R: io::Read + io::Seek + Clone,
{
    fn in_tarball(tarball_reader: R, file_pos: u64, meta: Metadata) -> Result<Entry<R>> {
        let mut entry_reader = tarball_reader.clone();
        entry_reader.seek(io::SeekFrom::Start(file_pos))?;
        Ok(Entry {
            meta,
            read: entry_reader.take(meta.len),
        })
    }
}

#[derive(Debug, Clone, Copy)]
pub struct Metadata {
    mtime: u64,
    len: u64,
}

impl Metadata {
    pub fn from_header(header: &tar::Header) -> Result<Metadata> {
        Ok(Metadata {
            mtime: header.mtime()?,
            len: header.size()?,
        })
    }

    pub fn mtime(&self) -> u64 {
        self.mtime
    }

    pub fn len(&self) -> u64 {
        self.len
    }

    // clippy suggested it.
    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }
}

#[cfg(test)]
mod tests {
    use super::Tardex;
    use std::{
        io::{Cursor, Read},
        path::Path,
    };

    static TAR_FIXTURE: &'static [u8] =
        include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/fixture/fixture.tar"));

    #[test]
    fn test_paths() {
        let tardex = Tardex::new(Cursor::new(TAR_FIXTURE)).unwrap();
        let mut paths = tardex.paths();
        assert_eq!(Path::new("a.txt"), paths.next().unwrap());
        assert_eq!(Path::new("kida/a.txt"), paths.next().unwrap());
        assert_eq!(Path::new("kida/b.txt"), paths.next().unwrap());
        assert!(paths.next().is_none());
    }

    #[test]
    fn test_content() {
        let tardex = Tardex::new(Cursor::new(TAR_FIXTURE)).unwrap();
        let mut entry = tardex.entry("a.txt").unwrap();
        let mut a_contents = String::new();
        entry
            .read_to_string(&mut a_contents)
            .expect("read_to_string failed");
        assert_eq!(a_contents, "A is for Apple\n");

        entry = tardex.entry("kida/b.txt").unwrap();
        let mut kida_b_contents = String::new();
        entry
            .read_to_string(&mut kida_b_contents)
            .expect("read_to_string failed");
        assert_eq!(
            kida_b_contents,
            "Kid A In Alphabet Land Bashes Another Belligerent Beastie - The Bellicose Blot!\n"
        );
    }

    #[test]
    fn test_meta() {
        // These tests aren't exactly great, but the fixture itself is a little loose now (and
        // can't be reliably recreated). This is as good it'll get for now.
        const JAN_1_2019: u64 = 1546300800;
        let tardex = Tardex::new(Cursor::new(TAR_FIXTURE)).unwrap();
        let paths = tardex.paths();
        for path in paths {
            let entry = tardex
                .entry(path)
                .expect(&format!("failed to get {}", path.display()));
            let meta = entry.metadata();
            assert!(meta.len() > 0);
            assert!(meta.mtime() > JAN_1_2019);
        }
    }
}