use anyhow::Result;
use csv;
use grep_cli::stdout;
use gzp::{deflate::Gzip, BgzfSyncReader, Compression, ZBuilder};
use std::fs::File;
use std::io::{self, BufReader, BufWriter, Read, Write};
use std::path::Path;
use termcolor::ColorChoice;
pub fn get_reader<P: AsRef<Path>>(
path: &Option<P>,
has_headers: bool,
bgzipped: bool,
) -> Result<csv::Reader<Box<dyn Read>>> {
let raw_reader: Box<dyn Read> = match path {
Some(path) if path.as_ref().to_str().unwrap() != "-" => {
let reader = BufReader::new(File::open(path)?);
if bgzipped {
Box::new(BgzfSyncReader::new(reader))
} else {
Box::new(reader)
}
}
_ => {
let reader = io::stdin();
if bgzipped {
Box::new(BgzfSyncReader::new(reader))
} else {
Box::new(reader)
}
}
};
Ok(csv::ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(has_headers)
.from_reader(raw_reader))
}
pub fn get_writer<P: AsRef<Path>>(
path: &Option<P>,
gzipped: bool,
write_headers: bool,
threads: usize,
compression_level: u32,
) -> Result<csv::Writer<Box<dyn Write>>> {
let raw_writer: Box<dyn Write> = match path {
Some(path) if path.as_ref().to_str().unwrap() != "-" => {
let writer = BufWriter::new(File::create(path)?);
if gzipped {
Box::new(
ZBuilder::<Gzip, _>::new()
.num_threads(threads)
.compression_level(Compression::new(compression_level))
.from_writer(writer),
)
} else {
Box::new(writer)
}
}
_ => {
let writer = stdout(ColorChoice::Never);
if gzipped {
Box::new(
ZBuilder::<Gzip, _>::new()
.num_threads(threads)
.compression_level(Compression::new(compression_level))
.from_writer(writer),
)
} else {
Box::new(writer)
}
}
};
Ok(csv::WriterBuilder::new()
.delimiter(b'\t')
.has_headers(write_headers)
.from_writer(raw_writer))
}
#[cfg(test)]
mod tests {
use super::{get_reader, get_writer};
use flate2::read::GzDecoder;
use std::io::Read;
use tempfile::tempdir;
#[test]
fn plain_tsv_roundtrip_works() {
let dir = tempdir().unwrap();
let path = dir.path().join("plain.tsv");
let out = Some(path.clone());
{
let mut writer = get_writer(&out, false, true, 1, 3).unwrap();
writer.write_record(["chrom", "pos", "depth"]).unwrap();
writer.write_record(["chr22", "50783283", "2294"]).unwrap();
writer.flush().unwrap();
}
let mut reader = get_reader(&out, true, false).unwrap();
let rows: Vec<Vec<String>> = reader
.records()
.map(|r| {
r.unwrap()
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
})
.collect();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0], vec!["chr22", "50783283", "2294"]);
}
#[test]
fn gzipped_writer_outputs_readable_gzip_stream() {
let dir = tempdir().unwrap();
let path = dir.path().join("bgzf.tsv.gz");
let out = Some(path.clone());
{
let mut writer = get_writer(&out, true, false, 1, 3).unwrap();
writer.write_record(["chr22", "22901237", "100"]).unwrap();
writer.write_record(["chr22", "22901238", "101"]).unwrap();
writer.flush().unwrap();
}
let bytes = std::fs::read(path).unwrap();
let mut decoder = GzDecoder::new(&bytes[..]);
let mut content = String::new();
decoder.read_to_string(&mut content).unwrap();
let lines: Vec<&str> = content.lines().collect();
assert_eq!(lines.len(), 2);
assert_eq!(lines[0], "chr22\t22901237\t100");
assert_eq!(lines[1], "chr22\t22901238\t101");
}
}