use crate::fasta::{BufferFastaReader, FastaReader};
use crate::fastq::FastqReader;
use crate::reader::{detect_file_format, Reader};
use crate::seq::{Base, SeqFormat};
use crate::utils::OptionPair;
use std::io::Result;
use std::path::Path;
pub struct FastxReader<R: Reader> {
inner: R,
}
impl<R: Reader> FastxReader<R> {
pub fn new(inner: R) -> Self {
Self { inner }
}
}
impl<R: Reader> Reader for FastxReader<R> {
fn next(&mut self) -> Result<Option<Vec<Base<Vec<u8>>>>> {
self.inner.next()
}
}
impl FastxReader<Box<dyn Reader + Send>> {
pub fn from_paths<P: AsRef<Path>>(
paths: OptionPair<P>,
file_index: usize,
quality_score: i32,
) -> Result<Self> {
let file_format = paths.map(|path: &P| detect_file_format(path));
match file_format? {
OptionPair::Single(SeqFormat::Fasta) => {
let reader = FastaReader::from_path(paths.single().unwrap().as_ref(), file_index)?;
Ok(Self::new(Box::new(reader) as Box<dyn Reader + Send>))
}
OptionPair::Single(SeqFormat::Fastq)
| OptionPair::Pair(SeqFormat::Fastq, SeqFormat::Fastq) => {
let reader = FastqReader::from_path(paths, file_index, quality_score)?;
Ok(Self::new(Box::new(reader) as Box<dyn Reader + Send>))
}
_ => panic!("Unsupported file format combination"),
}
}
pub fn from_buffer_reader<P: AsRef<Path>>(
paths: OptionPair<P>,
file_index: usize,
quality_score: i32,
) -> Result<Self> {
let file_format = paths.map(|path: &P| detect_file_format(path));
match file_format? {
OptionPair::Single(SeqFormat::Fasta) => {
let reader =
BufferFastaReader::from_path(paths.single().unwrap().as_ref(), file_index)?;
Ok(Self::new(Box::new(reader) as Box<dyn Reader + Send>))
}
OptionPair::Single(SeqFormat::Fastq)
| OptionPair::Pair(SeqFormat::Fastq, SeqFormat::Fastq) => {
let reader = FastqReader::from_path(paths, file_index, quality_score)?;
Ok(Self::new(Box::new(reader) as Box<dyn Reader + Send>))
}
_ => panic!("Unsupported file format combination"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn test_read_ont_fastq() -> std::io::Result<()> {
let path = Path::new("tests/data/example_ont_reads.fastq");
let mut reader = FastxReader::from_paths(OptionPair::Single(path), 0, 0)?;
let mut read_count = 0;
let mut total_bases = 0;
while let Some(sequences) = reader.next()? {
for sequence in sequences {
read_count += 1;
if let OptionPair::Single(seq) = sequence.body {
total_bases += seq.len();
if read_count == 1 {
assert_eq!(sequence.header.id, "89a96608-1899-49e1-b077-767a40d5ae27");
assert_eq!(seq.len(), 3928, "First sequence should be 3928 bases long");
let start = std::str::from_utf8(&seq[..10]).unwrap();
let end = std::str::from_utf8(&seq[seq.len()-10..]).unwrap();
assert_eq!(start, "ATGTTTTGTA");
assert_eq!(end, "GTGGTGCCAT");
for &base in seq.iter() {
assert!(matches!(base, b'A' | b'T' | b'C' | b'G' | b'N'));
}
}
}
}
}
assert_eq!(read_count, 5, "Should have read 5 sequences");
assert!(total_bases > 0, "Should have read some bases");
assert!(total_bases > 10000, "Total bases should be substantial");
Ok(())
}
}