use flate2::read::GzDecoder;
use std::fs::File;
use std::io::{self, stdin, BufRead, BufReader};
use std::path::Path;
const BUFFER_SIZE: usize = 128 * 1024;
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Box<dyn BufRead + Send>> {
let path = path.as_ref();
if path.to_str() == Some("-") {
return Ok(Box::new(BufReader::with_capacity(BUFFER_SIZE, stdin())));
}
let file = File::open(path)?;
let is_gzip = path
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.eq_ignore_ascii_case("gz"))
.unwrap_or(false);
if is_gzip {
let decoder = GzDecoder::new(file);
Ok(Box::new(BufReader::with_capacity(BUFFER_SIZE, decoder)))
} else {
Ok(Box::new(BufReader::with_capacity(BUFFER_SIZE, file)))
}
}
#[must_use]
pub fn from_file(file: File, is_gzip: bool) -> Box<dyn BufRead + Send> {
if is_gzip {
let decoder = GzDecoder::new(file);
Box::new(BufReader::with_capacity(BUFFER_SIZE, decoder))
} else {
Box::new(BufReader::with_capacity(BUFFER_SIZE, file))
}
}
#[cfg(test)]
mod tests {
use super::*;
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_plain_text_file() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, "line 1").unwrap();
writeln!(file, "line 2").unwrap();
writeln!(file, "line 3").unwrap();
file.flush().unwrap();
let reader = open(file.path()).unwrap();
let lines: Vec<String> = reader.lines().collect::<io::Result<Vec<_>>>().unwrap();
assert_eq!(lines, vec!["line 1", "line 2", "line 3"]);
}
#[test]
fn test_gzip_file() {
let mut file = NamedTempFile::with_suffix(".gz").unwrap();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
writeln!(encoder, "compressed 1").unwrap();
writeln!(encoder, "compressed 2").unwrap();
let compressed_data = encoder.finish().unwrap();
file.write_all(&compressed_data).unwrap();
file.flush().unwrap();
let reader = open(file.path()).unwrap();
let lines: Vec<String> = reader.lines().collect::<io::Result<Vec<_>>>().unwrap();
assert_eq!(lines, vec!["compressed 1", "compressed 2"]);
}
#[test]
fn test_empty_file() {
let file = NamedTempFile::new().unwrap();
let reader = open(file.path()).unwrap();
let lines: Vec<String> = reader.lines().collect::<io::Result<Vec<_>>>().unwrap();
assert!(lines.is_empty());
}
#[test]
fn test_from_file_explicit_gzip() {
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
writeln!(encoder, "forced gzip").unwrap();
let compressed_data = encoder.finish().unwrap();
let mut file = NamedTempFile::with_suffix(".bin").unwrap();
file.write_all(&compressed_data).unwrap();
file.flush().unwrap();
let file = File::open(file.path()).unwrap();
let reader = from_file(file, true);
let lines: Vec<String> = reader.lines().collect::<io::Result<Vec<_>>>().unwrap();
assert_eq!(lines, vec!["forced gzip"]);
}
#[test]
fn test_case_insensitive_gz_extension() {
let mut file = NamedTempFile::with_suffix(".GZ").unwrap();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
writeln!(encoder, "upper case gz").unwrap();
let compressed_data = encoder.finish().unwrap();
file.write_all(&compressed_data).unwrap();
file.flush().unwrap();
let reader = open(file.path()).unwrap();
let lines: Vec<String> = reader.lines().collect::<io::Result<Vec<_>>>().unwrap();
assert_eq!(lines, vec!["upper case gz"]);
}
}