use anyhow::Result;
use std::{
convert::AsRef,
fs::File,
io::{BufRead, BufReader},
path::Path,
};
use super::{FastaReader, FastqReader, FastxRead, Record};
const BUFFER_SIZE: usize = 4096 * 68;
fn initialize_generic_buffer<P>(path: P) -> Result<Box<BufReader<Box<dyn std::io::Read>>>>
where
P: AsRef<Path>,
{
Ok(Box::new(std::io::BufReader::new(
niffler::get_reader(Box::new(File::open(path)?))?.0,
)))
}
fn initialize_generic_reader(
buffer: Box<dyn BufRead>,
is_fasta: bool,
) -> Box<dyn FastxRead<Item = Record>> {
if is_fasta {
Box::new(FastaReader::new(buffer))
} else {
Box::new(FastqReader::new(buffer))
}
}
pub fn initialize_reader<P>(path: P) -> Result<Box<dyn FastxRead<Item = Record>>>
where
P: AsRef<Path>,
{
let mut buffer = initialize_generic_buffer(path)?;
buffer.fill_buf()?;
if buffer.buffer().is_empty() {
return Err(anyhow::anyhow!("No data in input file"));
}
match buffer.buffer()[0] {
b'>' => Ok(initialize_generic_reader(buffer, true)),
b'@' => Ok(initialize_generic_reader(buffer, false)),
_ => Err(anyhow::anyhow!("Unrecognized file format")),
}
}
pub fn initialize_stdin_reader<R: BufRead + 'static>(
reader: R,
) -> Result<Box<dyn FastxRead<Item = Record>>> {
let mut buffer = BufReader::with_capacity(BUFFER_SIZE, reader);
buffer.fill_buf()?;
if buffer.buffer().is_empty() {
return Err(anyhow::anyhow!("No data in stdin"));
}
match buffer.buffer()[0] {
b'>' => Ok(initialize_generic_reader(Box::new(buffer), true)),
b'@' => Ok(initialize_generic_reader(Box::new(buffer), false)),
_ => Err(anyhow::anyhow!("Unrecognized file format")),
}
}
#[cfg(test)]
mod test {
use super::*;
use std::io::Cursor;
#[test]
fn assign_fasta() {
let path = "example/sequences.fa";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_gzfasta() {
let path = "example/sequences.fa.gz";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_bz2fasta() {
let path = "example/sequences.fa.bz2";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_xzfasta() {
let path = "example/sequences.fa.xz";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_zstfasta() {
let path = "example/sequences.fa.zst";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_fastq() {
let path = "example/sequences.fq";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_gzfastq() {
let path = "example/sequences.fq.gz";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_bz2fastq() {
let path = "example/sequences.fq.bz2";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_xzfastq() {
let path = "example/sequences.fq.xz";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_zstfastq() {
let path = "example/sequences.fq.zst";
let reader = initialize_reader(path).expect("invalid path");
let num_records = reader.into_iter().map(|x| assert!(!x.empty())).count();
assert_eq!(num_records, 10);
}
#[test]
fn assign_fa_stdin() {
let example_fa = ">test\nACGT\n>test2\nACGT\n";
let cursor = Cursor::new(example_fa);
let reader = initialize_stdin_reader(cursor).expect("invalid path");
let num_records = reader.count();
assert_eq!(num_records, 2);
}
#[test]
fn assign_fq_stdin() {
let example_fq = "@test\nACGT\n+\n!!!!\n@test2\nACGT\n+\n!!!!\n";
let cursor = Cursor::new(example_fq);
let reader = initialize_stdin_reader(cursor).expect("invalid path");
let num_records = reader.count();
assert_eq!(num_records, 2);
}
#[test]
fn assign_malformed_stdin() {
let example_malformed = "test\nACGT\n+\n!!!!\n@test2\nACGT\n+\n!!!!\n";
let cursor = Cursor::new(example_malformed);
let reader = initialize_stdin_reader(cursor);
assert!(reader.is_err());
}
}