redicat 0.4.2

REDICAT - RNA Editing Cellular Assessment Toolkit: A highly parallelized utility for analyzing RNA editing events in single-cell RNA-seq data
Documentation
use anyhow::Result;
use csv;
use grep_cli::stdout;
use gzp::{deflate::Gzip, BgzfSyncReader, Compression, ZBuilder};
use std::fs::File;
use std::io::{self, BufReader, BufWriter, Read, Write};
use std::path::Path;
use termcolor::ColorChoice;

/// Build a CSV reader for optional file/stdin sources.
pub fn get_reader<P: AsRef<Path>>(
    path: &Option<P>,
    has_headers: bool,
    bgzipped: bool,
) -> Result<csv::Reader<Box<dyn Read>>> {
    let raw_reader: Box<dyn Read> = match path {
        Some(path) if path.as_ref().to_str().unwrap() != "-" => {
            let reader = BufReader::new(File::open(path)?);
            if bgzipped {
                Box::new(BgzfSyncReader::new(reader))
            } else {
                Box::new(reader)
            }
        }
        _ => {
            let reader = io::stdin();
            if bgzipped {
                Box::new(BgzfSyncReader::new(reader))
            } else {
                Box::new(reader)
            }
        }
    };

    Ok(csv::ReaderBuilder::new()
        .delimiter(b'\t')
        .has_headers(has_headers)
        .from_reader(raw_reader))
}

/// Build a CSV writer targeting a file or stdout with optional BGZF compression.
pub fn get_writer<P: AsRef<Path>>(
    path: &Option<P>,
    gzipped: bool,
    write_headers: bool,
    threads: usize,
    compression_level: u32,
) -> Result<csv::Writer<Box<dyn Write>>> {
    let raw_writer: Box<dyn Write> = match path {
        Some(path) if path.as_ref().to_str().unwrap() != "-" => {
            let writer = BufWriter::new(File::create(path)?);
            if gzipped {
                Box::new(
                    ZBuilder::<Gzip, _>::new()
                        .num_threads(threads)
                        .compression_level(Compression::new(compression_level))
                        .from_writer(writer),
                )
            } else {
                Box::new(writer)
            }
        }
        _ => {
            let writer = stdout(ColorChoice::Never);
            if gzipped {
                Box::new(
                    ZBuilder::<Gzip, _>::new()
                        .num_threads(threads)
                        .compression_level(Compression::new(compression_level))
                        .from_writer(writer),
                )
            } else {
                Box::new(writer)
            }
        }
    };

    Ok(csv::WriterBuilder::new()
        .delimiter(b'\t')
        .has_headers(write_headers)
        .from_writer(raw_writer))
}

#[cfg(test)]
mod tests {
    use super::{get_reader, get_writer};
    use flate2::read::GzDecoder;
    use std::io::Read;
    use tempfile::tempdir;

    #[test]
    fn plain_tsv_roundtrip_works() {
        let dir = tempdir().unwrap();
        let path = dir.path().join("plain.tsv");
        let out = Some(path.clone());

        {
            let mut writer = get_writer(&out, false, true, 1, 3).unwrap();
            writer.write_record(["chrom", "pos", "depth"]).unwrap();
            writer.write_record(["chr22", "50783283", "2294"]).unwrap();
            writer.flush().unwrap();
        }

        let mut reader = get_reader(&out, true, false).unwrap();
        let rows: Vec<Vec<String>> = reader
            .records()
            .map(|r| {
                r.unwrap()
                    .iter()
                    .map(|x| x.to_string())
                    .collect::<Vec<_>>()
            })
            .collect();

        assert_eq!(rows.len(), 1);
        assert_eq!(rows[0], vec!["chr22", "50783283", "2294"]);
    }

    #[test]
    fn gzipped_writer_outputs_readable_gzip_stream() {
        let dir = tempdir().unwrap();
        let path = dir.path().join("bgzf.tsv.gz");
        let out = Some(path.clone());

        {
            let mut writer = get_writer(&out, true, false, 1, 3).unwrap();
            writer.write_record(["chr22", "22901237", "100"]).unwrap();
            writer.write_record(["chr22", "22901238", "101"]).unwrap();
            writer.flush().unwrap();
        }

        let bytes = std::fs::read(path).unwrap();
        let mut decoder = GzDecoder::new(&bytes[..]);
        let mut content = String::new();
        decoder.read_to_string(&mut content).unwrap();

        let lines: Vec<&str> = content.lines().collect();
        assert_eq!(lines.len(), 2);
        assert_eq!(lines[0], "chr22\t22901237\t100");
        assert_eq!(lines[1], "chr22\t22901238\t101");
    }
}