use std::io::prelude::*;
use traits::*;
use util::*;
use super::re::*;
use super::record::Record;
use super::record_list::RecordList;
pub struct FastqIter<T: BufRead> {
reader: T,
buf: BufferType,
line: String,
}
impl<T: BufRead> FastqIter<T> {
#[inline]
pub fn new(reader: T) -> Self {
FastqIter {
reader: reader,
buf: Vec::with_capacity(8000),
line: String::with_capacity(8000)
}
}
}
impl<T: BufRead> Iterator for FastqIter<T> {
type Item = ResultType<String>;
fn next(&mut self) -> Option<Self::Item> {
text_next_skip_whitespace("@", &mut self.reader, &mut self.buf, &mut self.line)
}
}
#[inline]
fn estimate_record_size(record: &Record) -> usize {
const FASTQ_VOCABULARY_SIZE: usize = 5;
FASTQ_VOCABULARY_SIZE +
record.seq_id.len() +
record.description.len() +
record.sequence.len() +
record.quality.len()
}
#[inline]
fn estimate_list_size(list: &RecordList) -> usize {
list.iter().fold(0, |sum, x| sum + estimate_record_size(x))
}
#[inline(always)]
fn to_fastq<T: Write>(writer: &mut T, record: &Record) -> ResultType<()> {
record_to_fastq(writer, record)
}
pub fn record_to_fastq<T: Write>(writer: &mut T, record: &Record)
-> ResultType<()>
{
write_alls!(writer, b"@", record.seq_id.as_bytes())?;
if !record.description.is_empty() {
write_alls!(writer, b" ", record.description.as_bytes())?;
}
write_alls!(
writer,
b"\n", record.sequence.as_slice(),
b"\n+", record.seq_id.as_bytes()
)?;
if !record.description.is_empty() {
write_alls!(writer, b" ", record.description.as_bytes())?;
}
write_alls!(writer, record.quality.as_slice())?;
Ok(())
}
#[inline(always)]
fn init_cb<T: Write>(writer: &mut T, delimiter: u8)
-> ResultType<TextWriterState<T>>
{
Ok(TextWriterState::new(writer, delimiter))
}
#[inline(always)]
fn export_cb<'a, T: Write>(writer: &mut TextWriterState<T>, record: &'a Record)
-> ResultType<()>
{
writer.export(record, &to_fastq)
}
#[inline(always)]
fn dest_cb<T: Write>(_: &mut TextWriterState<T>)
-> ResultType<()>
{
Ok(())
}
#[inline(always)]
pub fn reference_iterator_to_fastq<'a, Iter, T>(writer: &mut T, iter: Iter)
-> ResultType<()>
where T: Write,
Iter: Iterator<Item = &'a Record>
{
reference_iterator_export(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}
#[inline(always)]
pub fn value_iterator_to_fastq<Iter, T>(writer: &mut T, iter: Iter)
-> ResultType<()>
where T: Write,
Iter: Iterator<Item = ResultType<Record>>
{
value_iterator_export(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}
#[inline(always)]
pub fn reference_iterator_to_fastq_strict<'a, Iter, T>(writer: &mut T, iter: Iter)
-> ResultType<()>
where T: Write,
Iter: Iterator<Item = &'a Record>
{
reference_iterator_export_strict(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}
#[inline(always)]
pub fn value_iterator_to_fastq_strict<Iter, T>(writer: &mut T, iter: Iter)
-> ResultType<()>
where T: Write,
Iter: Iterator<Item = ResultType<Record>>
{
value_iterator_export_strict(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}
#[inline(always)]
pub fn reference_iterator_to_fastq_lenient<'a, Iter, T>(writer: &mut T, iter: Iter)
-> ResultType<()>
where T: Write,
Iter: Iterator<Item = &'a Record>
{
reference_iterator_export_lenient(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}
#[inline(always)]
pub fn value_iterator_to_fastq_lenient<Iter, T>(writer: &mut T, iter: Iter)
-> ResultType<()>
where T: Write,
Iter: Iterator<Item = ResultType<Record>>
{
value_iterator_export_lenient(writer, iter, b'\n', &init_cb, &export_cb, &dest_cb)
}
#[allow(unused_variables)]
pub fn record_from_fastq<T: BufRead>(reader: &mut T)
-> ResultType<Record>
{
let mut lines = reader.lines();
let header = none_to_error!(lines.next(), InvalidInput)?;
let captures = none_to_error!(FastqHeaderRegex::extract().captures(&header), InvalidInput);
let mut record = Record {
seq_id: capture_as_string(&captures, FastqHeaderRegex::SEQID_INDEX),
description: capture_as_string(&captures, FastqHeaderRegex::DESCRIPTION_INDEX),
length: 0,
sequence: vec![],
quality: vec![]
};
let sequence = none_to_error!(lines.next(), InvalidInput)?;
record.sequence = sequence.into_bytes();
record.length = record.sequence.len() as u32;
let header = none_to_error!(lines.next(), InvalidInput)?;
bool_to_error!(header.starts_with('+'), InvalidInput);
let quality = none_to_error!(lines.next(), InvalidInput)?;
record.quality = quality.into_bytes();
bool_to_error!(record.quality.len() as u32 == record.length, InvalidRecord);
Ok(record)
}
pub struct FastqRecordIter<T: BufRead> {
iter: FastqIter<T>
}
impl<T: BufRead> FastqRecordIter<T> {
#[inline]
pub fn new(reader: T) -> Self {
FastqRecordIter {
iter: FastqIter::new(reader)
}
}
}
impl<T: BufRead> Iterator for FastqRecordIter<T> {
type Item = ResultType<Record>;
fn next(&mut self) -> Option<Self::Item> {
let text = match self.iter.next()? {
Err(e) => return Some(Err(e)),
Ok(text) => text,
};
Some(Record::from_fastq_string(&text))
}
}
#[inline(always)]
pub fn iterator_from_fastq<T: BufRead>(reader: T) -> FastqRecordIter<T> {
FastqRecordIter::new(reader)
}
pub type FastqRecordStrictIter<T> = StrictIter<Record, FastqRecordIter<T>>;
#[inline(always)]
pub fn iterator_from_fastq_strict<T: BufRead>(reader: T) -> FastqRecordStrictIter<T> {
FastqRecordStrictIter::new(iterator_from_fastq(reader))
}
pub type FastqRecordLenientIter<T> = LenientIter<Record, FastqRecordIter<T>>;
#[inline(always)]
pub fn iterator_from_fastq_lenient<T: BufRead>(reader: T) -> FastqRecordLenientIter<T> {
FastqRecordLenientIter::new(iterator_from_fastq(reader))
}
impl Fastq for Record {
#[inline]
fn estimate_fastq_size(&self) -> usize {
estimate_record_size(self)
}
#[inline(always)]
fn to_fastq<T: Write>(&self, writer: &mut T) -> ResultType<()> {
record_to_fastq(writer, self)
}
fn from_fastq<T: BufRead>(reader: &mut T) -> ResultType<Self> {
record_from_fastq(reader)
}
}
impl Fastq for RecordList {
#[inline]
fn estimate_fastq_size(&self) -> usize {
estimate_list_size(self)
}
#[inline(always)]
fn to_fastq<T: Write>(&self, writer: &mut T) -> ResultType<()> {
reference_iterator_to_fastq(writer, self.iter())
}
#[inline(always)]
fn from_fastq<T: BufRead>(reader: &mut T) -> ResultType<RecordList> {
iterator_from_fastq(reader).collect()
}
}
impl FastqCollection for RecordList {
#[inline(always)]
fn to_fastq_strict<T: Write>(&self, writer: &mut T) -> ResultType<()> {
reference_iterator_to_fastq_strict(writer, self.iter())
}
#[inline(always)]
fn to_fastq_lenient<T: Write>(&self, writer: &mut T) -> ResultType<()> {
reference_iterator_to_fastq_lenient(writer, self.iter())
}
#[inline(always)]
fn from_fastq_strict<T: BufRead>(reader: &mut T) -> ResultType<RecordList> {
iterator_from_fastq_strict(reader).collect()
}
#[inline(always)]
fn from_fastq_lenient<T: BufRead>(reader: &mut T) -> ResultType<RecordList> {
Ok(iterator_from_fastq_lenient(reader).filter_map(Result::ok).collect())
}
}
#[cfg(test)]
mod tests {
use std::io::{Cursor};
use super::*;
#[test]
fn fastq_iter_test() {
let s = "@tag desc\nCATTAG\n+tag desc\n;;;;;;\n@tag1 desc1\nTAGCAT\n+tag1 desc1\n;;;;;;";
let i = FastqIter::new(Cursor::new(s));
let r: ResultType<Vec<String>> = i.collect();
assert_eq!(r.unwrap(), &["@tag desc\nCATTAG\n+tag desc\n;;;;;;\n", "@tag1 desc1\nTAGCAT\n+tag1 desc1\n;;;;;;"]);
let s = "";
let i = FastqIter::new(Cursor::new(s));
let r: ResultType<Vec<String>> = i.collect();
assert_eq!(r.unwrap(), Vec::<String>::new());
}
}