gzippy 0.8.0

The fastest parallel gzip. Drop-in replacement for gzip and pigz, and a Rust library.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
//! gzippy - The fastest parallel gzip
//!
//! A drop-in replacement for gzip that uses multiple processors for compression.
//! Inspired by [pigz](https://zlib.net/pigz/) by Mark Adler.

use std::env;
use std::path::Path;
use std::process;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Mutex;

// ── Core infrastructure ───────────────────────────────────────────────────────
mod analyze;
mod cli;
mod error;
mod format;
mod index_mode;
mod utils;

// ── Compression & decompression stacks ───────────────────────────────────────
mod compress; // engine (mod.rs) + io, parallel, pipelined, optimization, simple
mod decompress; // engine (mod.rs) + io, format, bgzf, SIMD tables, scan_inflate

// ── Hardware backends (ISA-L, libdeflate FFI) ─────────────────────────────────
mod backends; // isal, isal_compress, isal_decompress, libdeflate

// ── Threading infrastructure ──────────────────────────────────────────────────
mod infra; // thread_pool, scheduler, io_thread

// ── Test infrastructure ───────────────────────────────────────────────────────
#[cfg(test)]
mod tests;

use cli::GzippyArgs;
use error::GzippyError;

const VERSION: &str = concat!("gzippy ", env!("CARGO_PKG_VERSION"));

/// Track the current output file so signal handlers can clean it up.
/// When set, an incomplete output file exists that should be deleted on abort.
static OUTPUT_FILE: Mutex<Option<String>> = Mutex::new(None);
static INTERRUPTED: AtomicBool = AtomicBool::new(false);

/// Set the current output file path for signal handler cleanup.
pub fn set_output_file(path: Option<String>) {
    if let Ok(mut guard) = OUTPUT_FILE.lock() {
        *guard = path;
    }
}

fn install_signal_handlers() {
    unsafe {
        // SIGINT (Ctrl-C), SIGTERM, SIGHUP: clean up and exit
        for &sig in &[libc::SIGINT, libc::SIGTERM, libc::SIGHUP] {
            libc::signal(sig, signal_handler as *const () as libc::sighandler_t);
        }
        // SIGPIPE: exit quietly (e.g., piping to head)
        libc::signal(libc::SIGPIPE, libc::SIG_DFL);
    }
}

extern "C" fn signal_handler(sig: libc::c_int) {
    // Mark as interrupted (atomic, signal-safe)
    INTERRUPTED.store(true, Ordering::SeqCst);

    // Try to clean up the output file.
    // Mutex::lock may not be signal-safe, but try_lock is better.
    // In the worst case we just skip cleanup.
    if let Ok(guard) = OUTPUT_FILE.try_lock() {
        if let Some(ref path) = *guard {
            let _ = std::fs::remove_file(path);
        }
    }

    // Restore default handler and re-raise so parent sees correct signal
    unsafe {
        libc::signal(sig, libc::SIG_DFL);
        libc::raise(sig);
    }
}

fn main() {
    install_signal_handlers();

    let result = run();

    match result {
        Ok(exit_code) => process::exit(exit_code),
        Err(e) => {
            eprintln!("gzippy: {}", e);
            process::exit(1);
        }
    }
}

fn run() -> Result<i32, GzippyError> {
    let args = GzippyArgs::parse()?;

    if args.version {
        println!("{}", VERSION);
        return Ok(0);
    }

    if args.help {
        print_help();
        return Ok(0);
    }

    if args.license {
        print_license();
        return Ok(0);
    }

    // --analyze short-circuits the normal compress/decompress flow.
    if let Some(code) = analyze::maybe_run(&args) {
        return Ok(code);
    }

    // --index and --seek short-circuit the normal decompress flow.
    if let Some(code) = index_mode::maybe_run(&args) {
        return Ok(code);
    }

    // Support gunzip/ungzippy/zcat/gzcat symlinks
    let program_path = env::args().next().unwrap_or_else(|| "gzippy".to_string());
    let program_name = Path::new(&program_path)
        .file_name()
        .and_then(|name| name.to_str())
        .unwrap_or("gzippy");

    let decompress = args.decompress
        || program_name.contains("ungzippy")
        || program_name.contains("gunzip")
        || program_name == "zcat"
        || program_name == "gzcat";

    // zcat/gzcat imply decompress-to-stdout
    let stdout_mode = args.stdout || program_name == "zcat" || program_name == "gzcat";

    // Refuse to write compressed binary data to a terminal (unless -f).
    // Applies to: explicit -c/stdout mode, OR no files given (stdin→stdout compress).
    if !decompress && !args.test && !args.force && (stdout_mode || args.files.is_empty()) {
        use std::io::IsTerminal;
        if std::io::stdout().is_terminal() {
            eprintln!(
                "gzippy: compressed data not written to a terminal. Use -f to force compression."
            );
            return Ok(1);
        }
    }
    // Refuse to read compressed data from a terminal (unless -f).
    if (decompress || args.test) && !args.force && args.files.is_empty() {
        use std::io::IsTerminal;
        if std::io::stdin().is_terminal() {
            eprintln!(
                "gzippy: compressed data not read from a terminal. Use -f to force decompression."
            );
            return Ok(1);
        }
    }

    // Apply stdout_mode back to args for downstream use
    let mut args = args;
    if stdout_mode {
        args.stdout = true;
    }

    // --test implies decompress mode
    let decompress = decompress || args.test;

    let mut exit_code = 0;

    // Handle --list mode
    if args.list {
        if args.files.is_empty() {
            eprintln!("gzippy: --list does not support stdin");
            return Ok(1);
        }
        if args.verbose {
            println!(
                "method  crc     date  time    compressed uncompressed  ratio uncompressed_name"
            );
        } else {
            println!("  compressed uncompressed  ratio uncompressed_name");
        }
        let mut total_comp = 0u64;
        let mut total_uncomp = 0u64;
        for file in &args.files {
            match list_file(file, &args) {
                Ok((comp, uncomp)) => {
                    total_comp += comp;
                    total_uncomp += uncomp;
                }
                Err(e) => {
                    eprintln!("gzippy: {}: {}", file, e);
                    exit_code = 1;
                }
            }
        }
        if args.files.len() > 1 {
            let ratio = if total_uncomp > 0 {
                (1.0 - total_comp as f64 / total_uncomp as f64) * 100.0
            } else {
                0.0
            };
            println!(
                "{:>12} {:>12} {:4.1}% (totals)",
                total_comp, total_uncomp, ratio
            );
        }
        return Ok(exit_code);
    }

    if args.files.is_empty() {
        // Process stdin
        if decompress {
            if args.test {
                exit_code = test_stdin(&args)?;
            } else {
                exit_code = decompress::io::decompress_stdin(&args)?;
            }
        } else {
            exit_code = compress::io::compress_stdin(&args)?;
        }
    } else {
        // Process files
        for file in &args.files {
            let result = if args.test {
                test_file(file, &args)
            } else if decompress {
                decompress::io::decompress_file(file, &args)
            } else {
                compress::io::compress_file(file, &args)
            };

            match result {
                Ok(code) => {
                    if code != 0 {
                        exit_code = code;
                    }
                }
                Err(e) => {
                    eprintln!("gzippy: {}: {}", file, e);
                    exit_code = 1;
                }
            }
        }
    }

    Ok(exit_code)
}

/// Test integrity of a compressed file by decompressing to a sink
fn test_file(filename: &str, args: &GzippyArgs) -> Result<i32, GzippyError> {
    use memmap2::Mmap;
    use std::fs::File;

    let input_path = Path::new(filename);
    if !input_path.exists() {
        return Err(GzippyError::FileNotFound(filename.to_string()));
    }

    let input_file = File::open(input_path)?;
    let mmap = unsafe { Mmap::map(&input_file)? };

    // Decompress into a Vec (discarded after) to verify integrity
    let mut sink = Vec::new();
    let result = decompress::decompress_gzip_to_writer(&mmap, &mut sink);

    match result {
        Ok(_) => {
            if !args.quiet {
                eprintln!("{}: OK", filename);
            }
            Ok(0)
        }
        Err(e) => {
            eprintln!("{}: {}", filename, e);
            Ok(1)
        }
    }
}

/// Test integrity of compressed data from stdin
fn test_stdin(args: &GzippyArgs) -> Result<i32, GzippyError> {
    use std::io::Read;

    let stdin = std::io::stdin();
    let mut input_data = Vec::new();
    {
        let mut reader = std::io::BufReader::new(stdin.lock());
        reader.read_to_end(&mut input_data)?;
    }

    let mut sink = Vec::new();
    let result = decompress::decompress_gzip_to_writer(&input_data, &mut sink);

    match result {
        Ok(_) => {
            if !args.quiet {
                eprintln!("stdin: OK");
            }
            Ok(0)
        }
        Err(e) => {
            eprintln!("stdin: {}", e);
            Ok(1)
        }
    }
}

/// List compressed file information (gzip -l format)
fn list_file(filename: &str, args: &GzippyArgs) -> Result<(u64, u64), GzippyError> {
    use std::fs;

    let metadata =
        fs::metadata(filename).map_err(|_| GzippyError::FileNotFound(filename.to_string()))?;
    let compressed_size = metadata.len();

    // Read the gzip file
    let data = fs::read(filename).map_err(GzippyError::Io)?;

    if data.len() < 18 || data[0] != 0x1f || data[1] != 0x8b {
        return Err(GzippyError::invalid_argument(format!(
            "{}: not in gzip format",
            filename
        )));
    }

    // ISIZE is last 4 bytes of the gzip file
    let isize_bytes = &data[data.len() - 4..];
    let uncompressed_size = u32::from_le_bytes([
        isize_bytes[0],
        isize_bytes[1],
        isize_bytes[2],
        isize_bytes[3],
    ]) as u64;

    // CRC32 is 4 bytes before ISIZE
    let crc_bytes = &data[data.len() - 8..data.len() - 4];
    let crc32 = u32::from_le_bytes([crc_bytes[0], crc_bytes[1], crc_bytes[2], crc_bytes[3]]);

    let ratio = if uncompressed_size > 0 {
        ((1.0 - compressed_size as f64 / uncompressed_size as f64) * 100.0).clamp(-99.9, 99.9)
    } else {
        0.0
    };

    // Output name: prefer FNAME from header, else strip suffix from the given path.
    // Show the full path as given (matching gzip -l behavior).
    let fname = extract_list_fname(&data);
    let display_name_owned;
    let display_name: &str = if let Some(ref name) = fname {
        name.as_str()
    } else {
        let output_name = crate::utils::strip_compression_extension(Path::new(filename));
        display_name_owned = output_name.to_str().unwrap_or(filename).to_string();
        &display_name_owned
    };

    if args.verbose {
        let mtime = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
        let date_str = if mtime > 0 {
            format_unix_timestamp(mtime)
        } else {
            "            ".to_string() // 12 spaces matching "MMM DD HH:MM"
        };
        println!(
            "defla {:08x} {} {:>12} {:>12} {:4.1}% {}",
            crc32, date_str, compressed_size, uncompressed_size, ratio, display_name
        );
    } else {
        println!(
            "{:>12} {:>12} {:4.1}% {}",
            compressed_size, uncompressed_size, ratio, display_name
        );
    }

    Ok((compressed_size, uncompressed_size))
}

/// Extract FNAME from gzip header for list display
fn extract_list_fname(data: &[u8]) -> Option<String> {
    if data.len() < 10 || data[0] != 0x1f || data[1] != 0x8b || data[2] != 0x08 {
        return None;
    }
    let flags = data[3];
    if flags & 0x08 == 0 {
        return None;
    }
    let mut pos = 10;
    if flags & 0x04 != 0 {
        if pos + 2 > data.len() {
            return None;
        }
        let xlen = u16::from_le_bytes([data[pos], data[pos + 1]]) as usize;
        pos += 2 + xlen;
    }
    let start = pos;
    while pos < data.len() && data[pos] != 0 {
        pos += 1;
    }
    if pos >= data.len() {
        return None;
    }
    String::from_utf8(data[start..pos].to_vec()).ok()
}

/// Format a Unix timestamp into a basic date/time string
fn format_unix_timestamp(timestamp: u32) -> String {
    const MONTHS: &[&str] = &[
        "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
    ];

    // Simple Unix timestamp to date conversion
    let secs = timestamp as u64;
    let days = secs / 86400;
    let time_of_day = secs % 86400;
    let hours = time_of_day / 3600;
    let minutes = (time_of_day % 3600) / 60;

    // Calculate year/month/day from days since epoch
    let mut year = 1970u32;
    let mut remaining_days = days;

    loop {
        let days_in_year =
            if year.is_multiple_of(4) && (!year.is_multiple_of(100) || year.is_multiple_of(400)) {
                366
            } else {
                365
            };
        if remaining_days < days_in_year {
            break;
        }
        remaining_days -= days_in_year;
        year += 1;
    }

    let leap = year.is_multiple_of(4) && (!year.is_multiple_of(100) || year.is_multiple_of(400));
    let days_in_months: &[u64] = if leap {
        &[31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    } else {
        &[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    };

    let mut month = 0usize;
    for (i, &dim) in days_in_months.iter().enumerate() {
        if remaining_days < dim {
            month = i;
            break;
        }
        remaining_days -= dim;
    }
    let day = remaining_days + 1;

    // gzip format: "Jan  1 12:00" — no year, day right-aligned in 2 chars
    format!("{} {:2} {:02}:{:02}", MONTHS[month], day, hours, minutes)
}

fn print_help() {
    println!("Usage: gzippy [OPTION]... [FILE]...");
    println!();
    println!("Compress or decompress FILEs (by default, compress in place).");
    println!("Uses multiple processors for parallel compression.");
    println!();
    println!("Options:");
    println!("  -1..-9              Compression level (1=fast, 9=best, default=6)");
    println!("  --level N           Set compression level 1-12");
    println!("  --ultra             True zopfli (level 11, single-member; -p tunes intra-block parallelism only)");
    println!("  --max               Maximum compression (level 12, libdeflate near-zopfli)");
    println!("  -c, --stdout        Write to stdout, keep original files");
    println!("  -d, --decompress    Decompress");
    println!("  -f, --force         Force overwrite / compress links / pass-through");
    println!("  -k, --keep          Keep original file");
    println!("  -l, --list          List compressed file info");
    println!("  -t, --test          Test compressed file integrity");
    println!("  -n, --no-name       Don't save/restore original name and timestamp");
    println!("  -N, --name          Save/restore original name and timestamp");
    println!("  -m, --no-time       Don't save/restore modification time");
    println!("  -M, --time          Save/restore modification time (pigz)");
    println!("  -p, --processes N   Number of threads (default: all CPUs)");
    println!("  -b, --blocksize N   Block size for parallel compression");
    println!("  -r, --recursive     Recurse into directories");
    println!("  -R, --rsyncable     Make output rsync-friendly");
    println!("  -S, --suffix .suf   Use suffix .suf instead of .gz");
    println!("  -Y, --synchronous   Synchronous output (fsync after write)");
    println!("  -i, --independent   Force independent blocks (parallel decompress)");
    println!("  -C, --comment TEXT  Add comment to gzip header");
    println!("  -H, --huffman       Huffman-only compression");
    println!("  -U, --rle           Run-length encoding compression");
    println!("  -q, --quiet         Suppress output");
    println!("  -v, --verbose       Verbose output");
    println!("  -h, --help          Show this help");
    println!("  -V, --version       Show version");
    println!("  -L, --license       Show license");
    println!();
    println!("Compression levels:");
    println!("  1-6              Fast (libdeflate, parallel decompress)");
    println!("  7-9              Balanced (zlib-ng, gzip-compatible)");
    println!("  10,12            libdeflate ultra (near-zopfli ratio, parallel)");
    println!("  11               True zopfli (single-member, slowest, best ratio)");
    println!();
    println!("Examples:");
    println!("  gzippy file.txt          Compress file.txt -> file.txt.gz");
    println!("  gzippy -d file.txt.gz    Decompress file.txt.gz -> file.txt");
    println!("  gzippy -p4 -9 file.txt   Compress with 4 threads, best compression");
    println!("  cat file | gzippy > out  Compress stdin to stdout");
}

fn print_license() {
    println!("gzippy - The fastest gzip");
    println!();
    println!("Inspired by pigz by Mark Adler, Copyright (C) 2007-2023");
    println!();
    println!("zlib License - see LICENSE file for details.");
}