cmprss 0.4.0

A compression multi-tool for the command line.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
extern crate tar;

use anyhow::{anyhow, bail};
use clap::Args;
use indicatif::ProgressBar;
use std::fs::{File, OpenOptions};
use std::io::{self, Read, Seek, SeekFrom, Write};
use std::path::Path;
use tar::{Archive, Builder, EntryType, Header};
use tempfile::tempfile;

use super::containers::total_input_bytes;
use crate::progress::{OutputTarget, ProgressArgs, ProgressReader, create_progress_bar};
use crate::utils::{CmprssInput, CmprssOutput, CommonArgs, Compressor, ExtractedTarget, Result};

#[derive(Args, Debug)]
pub struct TarArgs {
    #[clap(flatten)]
    pub common_args: CommonArgs,

    #[clap(flatten)]
    pub progress_args: ProgressArgs,
}

#[derive(Default, Clone)]
pub struct Tar {
    pub progress_args: ProgressArgs,
}

impl Tar {
    pub fn new(args: &TarArgs) -> Tar {
        Tar {
            progress_args: args.progress_args,
        }
    }
}

impl Compressor for Tar {
    /// Full name for tar, also used for extension
    fn name(&self) -> &str {
        "tar"
    }

    /// Tar extracts to a directory by default
    fn default_extracted_target(&self) -> ExtractedTarget {
        ExtractedTarget::Directory
    }

    fn compress(&self, input: CmprssInput, output: CmprssOutput) -> Result {
        match output {
            CmprssOutput::Path(path) => {
                let total = match &input {
                    CmprssInput::Path(paths) => Some(total_input_bytes(paths)),
                    _ => None,
                };
                let bar =
                    create_progress_bar(total, self.progress_args.progress, OutputTarget::File);
                let file = File::create(path)?;
                self.compress_internal(input, Builder::new(file), bar.as_ref())?;
                if let Some(b) = bar {
                    b.finish();
                }
                Ok(())
            }
            CmprssOutput::Pipe(mut pipe) => {
                // Create a temporary file to write the tar to
                let mut temp_file = tempfile()?;
                self.compress_internal(input, Builder::new(&mut temp_file), None)?;

                // Reset the file position to the beginning
                temp_file.seek(SeekFrom::Start(0))?;

                // Copy the temporary file to the pipe
                io::copy(&mut temp_file, &mut pipe)?;
                Ok(())
            }
            CmprssOutput::Writer(mut writer) => {
                // Pipeline-internal: tar is the innermost stage, writing into an
                // in-memory pipe feeding the outer codec(s). We still own the
                // progress bar because only tar sees the real input bytes; outer
                // stages suppress their bar (their input size is unknown).
                let total = match &input {
                    CmprssInput::Path(paths) => Some(total_input_bytes(paths)),
                    _ => None,
                };
                let bar =
                    create_progress_bar(total, self.progress_args.progress, OutputTarget::File);
                let mut temp_file = tempfile()?;
                self.compress_internal(input, Builder::new(&mut temp_file), bar.as_ref())?;
                temp_file.seek(SeekFrom::Start(0))?;
                io::copy(&mut temp_file, &mut writer)?;
                if let Some(b) = bar {
                    b.finish();
                }
                Ok(())
            }
        }
    }

    fn extract(&self, input: CmprssInput, output: CmprssOutput) -> Result {
        match output {
            CmprssOutput::Path(ref out_dir) => {
                // Create the output directory if it doesn't exist
                if !out_dir.exists() {
                    std::fs::create_dir_all(out_dir)?;
                } else if !out_dir.is_dir() {
                    bail!("tar extraction output must be a directory");
                }

                match input {
                    CmprssInput::Path(paths) => {
                        if paths.len() != 1 {
                            bail!("tar extraction expects exactly one archive file");
                        }
                        let file = File::open(&paths[0])?;
                        let size = file.metadata()?.len();
                        self.unpack_with_progress(file, Some(size), out_dir)
                    }
                    CmprssInput::Pipe(mut pipe) => {
                        // Create a temporary file to store the tar content
                        let mut temp_file = tempfile()?;

                        // Copy from pipe to temporary file
                        io::copy(&mut pipe, &mut temp_file)?;

                        // Reset the file position to the beginning
                        temp_file.seek(SeekFrom::Start(0))?;
                        let size = temp_file.metadata()?.len();
                        self.unpack_with_progress(temp_file, Some(size), out_dir)
                    }
                    CmprssInput::Reader(reader) => {
                        let mut archive = Archive::new(reader.0);
                        archive.unpack(out_dir)?;
                        Ok(())
                    }
                }
            }
            CmprssOutput::Pipe(_) => bail!("tar extraction to stdout is not supported"),
            CmprssOutput::Writer(mut writer) => match input {
                CmprssInput::Path(paths) => {
                    if paths.len() != 1 {
                        bail!("tar extraction expects exactly one archive file");
                    }
                    let mut file = File::open(&paths[0])?;
                    io::copy(&mut file, &mut writer)?;
                    Ok(())
                }
                CmprssInput::Pipe(mut pipe) => {
                    io::copy(&mut pipe, &mut writer)?;
                    Ok(())
                }
                CmprssInput::Reader(mut reader) => {
                    io::copy(&mut reader, &mut writer)?;
                    Ok(())
                }
            },
        }
    }

    fn append(&self, input: CmprssInput, output: CmprssOutput) -> Result {
        let path = match output {
            CmprssOutput::Path(p) => p,
            _ => bail!("tar append requires the archive path as the output target"),
        };
        if !path.is_file() {
            bail!("tar append target must be an existing file: {:?}", path);
        }

        // Locate the offset just past the last entry's data (512-byte padded)
        // so we can truncate off the trailing zero blocks and resume writing
        // entries from there. Using the iterator is cheap: tar entries carry
        // their own position, so we walk headers without reading file data.
        let end_of_entries = {
            let reader = File::open(&path)?;
            let mut archive = Archive::new(reader);
            let mut end: u64 = 0;
            for entry in archive.entries()? {
                let entry = entry?;
                let file_pos = entry.raw_file_position();
                let size = entry.size();
                // Round up to the next 512-byte block boundary.
                let padded = size.div_ceil(512) * 512;
                end = file_pos + padded;
            }
            end
        };

        let mut file = OpenOptions::new().read(true).write(true).open(&path)?;
        // Truncate any trailing end-of-archive zero blocks so the new entries
        // start at `end_of_entries` and Builder::finish writes fresh ones.
        file.set_len(end_of_entries)?;
        file.seek(SeekFrom::Start(end_of_entries))?;

        let total = match &input {
            CmprssInput::Path(paths) => Some(total_input_bytes(paths)),
            _ => None,
        };
        let bar = create_progress_bar(total, self.progress_args.progress, OutputTarget::File);
        self.compress_internal(input, Builder::new(file), bar.as_ref())?;
        if let Some(b) = bar {
            b.finish();
        }
        Ok(())
    }

    fn list(&self, input: CmprssInput) -> Result {
        let reader: Box<dyn Read> = match input {
            CmprssInput::Path(paths) => {
                if paths.len() != 1 {
                    bail!("tar listing expects exactly one archive file");
                }
                Box::new(File::open(&paths[0])?)
            }
            CmprssInput::Pipe(stdin) => Box::new(stdin),
            CmprssInput::Reader(reader) => reader.0,
        };
        let mut archive = Archive::new(reader);
        let stdout = io::stdout();
        let mut out = stdout.lock();
        for entry in archive.entries()? {
            let entry = entry?;
            let path = entry.path()?;
            writeln!(out, "{}", path.display())?;
        }
        Ok(())
    }
}

impl Tar {
    /// Internal compress helper. When `bar` is `Some`, recursively walks
    /// path inputs ourselves (rather than using `Builder::append_dir_all`)
    /// so every file read runs through `ProgressReader`, sharing a single
    /// bar across all entries.
    fn compress_internal<W: Write>(
        &self,
        input: CmprssInput,
        mut archive: Builder<W>,
        bar: Option<&ProgressBar>,
    ) -> Result {
        match input {
            CmprssInput::Path(paths) => {
                for path in paths {
                    let name = path
                        .file_name()
                        .ok_or_else(|| anyhow!("input path has no file name: {:?}", path))?;
                    if path.is_file() {
                        append_file_entry(&mut archive, Path::new(name), &path, bar)?;
                    } else if path.is_dir() {
                        append_dir_entry(&mut archive, Path::new(name), &path, bar)?;
                    } else {
                        bail!("tar does not support this file type");
                    }
                }
            }
            CmprssInput::Pipe(mut pipe) => {
                // For pipe input, we'll create a single file named "archive"
                let mut temp_file = tempfile()?;
                io::copy(&mut pipe, &mut temp_file)?;
                temp_file.seek(SeekFrom::Start(0))?;
                archive.append_file("archive", &mut temp_file)?;
            }
            CmprssInput::Reader(_) => {
                bail!("tar does not accept an in-memory reader input");
            }
        }
        Ok(archive.finish()?)
    }

    fn unpack_with_progress<R: Read>(
        &self,
        reader: R,
        size: Option<u64>,
        out_dir: &Path,
    ) -> Result {
        let bar = create_progress_bar(size, self.progress_args.progress, OutputTarget::File);
        let reader = ProgressReader::new(reader, bar.clone());
        let mut archive = Archive::new(reader);
        archive.unpack(out_dir)?;
        if let Some(b) = bar {
            b.finish();
        }
        Ok(())
    }
}

/// Append one regular file to the tar archive, wrapping reads in a
/// `ProgressReader` that ticks the shared bar.
fn append_file_entry<W: Write>(
    archive: &mut Builder<W>,
    archive_name: &Path,
    disk_path: &Path,
    bar: Option<&ProgressBar>,
) -> Result {
    let mut file = File::open(disk_path)?;
    let meta = file.metadata()?;
    let mut header = Header::new_gnu();
    header.set_metadata(&meta);
    header.set_size(meta.len());
    let reader = ProgressReader::new(&mut file, bar.cloned());
    archive.append_data(&mut header, archive_name, reader)?;
    Ok(())
}

/// Write the directory header, then recurse into its children.
fn append_dir_entry<W: Write>(
    archive: &mut Builder<W>,
    archive_name: &Path,
    disk_path: &Path,
    bar: Option<&ProgressBar>,
) -> Result {
    let meta = std::fs::metadata(disk_path)?;
    let mut header = Header::new_gnu();
    header.set_metadata(&meta);
    header.set_entry_type(EntryType::Directory);
    header.set_size(0);
    archive.append_data(&mut header, archive_name, io::empty())?;
    for entry in std::fs::read_dir(disk_path)? {
        let entry = entry?;
        let child_archive = archive_name.join(entry.file_name());
        let child_disk = entry.path();
        if child_disk.is_file() {
            append_file_entry(archive, &child_archive, &child_disk, bar)?;
        } else if child_disk.is_dir() {
            append_dir_entry(archive, &child_archive, &child_disk, bar)?;
        }
        // Skip symlinks/other types; they weren't handled before either.
    }
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_utils::*;
    use assert_fs::prelude::*;
    use predicates::prelude::*;
    use std::path::PathBuf;

    /// Test the basic interface of the Tar compressor
    #[test]
    fn test_tar_interface() {
        let compressor = Tar::default();
        test_compressor_interface(&compressor, "tar", Some("tar"));
    }

    /// Test the default compression level
    #[test]
    fn test_tar_default_compression() -> Result {
        let compressor = Tar::default();
        test_compression(&compressor)
    }

    /// Append new entries into an existing tar and confirm both old and new
    /// entries extract correctly.
    #[test]
    fn test_append_adds_entries() -> Result {
        let compressor = Tar::default();
        let working_dir = assert_fs::TempDir::new()?;

        let original = working_dir.child("original.txt");
        original.write_str("original contents")?;
        let extra = working_dir.child("extra.txt");
        extra.write_str("appended contents")?;

        let archive = working_dir.child("archive.tar");
        compressor.compress(
            CmprssInput::Path(vec![original.path().to_path_buf()]),
            CmprssOutput::Path(archive.path().to_path_buf()),
        )?;
        let size_before = std::fs::metadata(archive.path())?.len();

        compressor.append(
            CmprssInput::Path(vec![extra.path().to_path_buf()]),
            CmprssOutput::Path(archive.path().to_path_buf()),
        )?;
        let size_after = std::fs::metadata(archive.path())?.len();
        assert!(
            size_after > size_before,
            "archive did not grow after append: {size_before} -> {size_after}",
        );

        let extract_dir = working_dir.child("extracted");
        std::fs::create_dir_all(extract_dir.path())?;
        compressor.extract(
            CmprssInput::Path(vec![archive.path().to_path_buf()]),
            CmprssOutput::Path(extract_dir.path().to_path_buf()),
        )?;

        extract_dir
            .child("original.txt")
            .assert(predicate::path::eq_file(original.path()));
        extract_dir
            .child("extra.txt")
            .assert(predicate::path::eq_file(extra.path()));
        Ok(())
    }

    /// Appending to a missing target must error rather than silently creating
    /// a new archive.
    #[test]
    fn test_append_missing_target_errors() {
        let compressor = Tar::default();
        let working_dir = assert_fs::TempDir::new().unwrap();
        let extra = working_dir.child("extra.txt");
        extra.write_str("x").unwrap();
        let missing = working_dir.child("nope.tar");

        let err = compressor
            .append(
                CmprssInput::Path(vec![extra.path().to_path_buf()]),
                CmprssOutput::Path(missing.path().to_path_buf()),
            )
            .expect_err("append to a missing archive should error");
        assert!(err.to_string().contains("must be an existing file"));
    }

    /// Test tar-specific functionality: directory handling
    #[test]
    fn test_directory_handling() -> Result {
        let compressor = Tar::default();
        let dir = assert_fs::TempDir::new()?;
        let file_path = dir.child("file.txt");
        file_path.write_str("garbage data for testing")?;
        let working_dir = assert_fs::TempDir::new()?;
        let archive = working_dir.child("dir_archive.tar");
        archive.assert(predicate::path::missing());

        compressor.compress(
            CmprssInput::Path(vec![dir.path().to_path_buf()]),
            CmprssOutput::Path(archive.path().to_path_buf()),
        )?;
        archive.assert(predicate::path::is_file());

        let extract_dir = working_dir.child("extracted");
        std::fs::create_dir_all(extract_dir.path())?;
        compressor.extract(
            CmprssInput::Path(vec![archive.path().to_path_buf()]),
            CmprssOutput::Path(extract_dir.path().to_path_buf()),
        )?;

        let dir_name: PathBuf = dir.path().file_name().unwrap().into();
        extract_dir
            .child(dir_name)
            .child("file.txt")
            .assert(predicate::path::eq_file(file_path.path()));
        Ok(())
    }
}