1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
//! This library provides an(other) attempt at high performance FASTA and FASTQ parsing.
//! There are many similarities to the excellent [fastq-rs](https://github.com/aseyboldt/fastq-rs).
//! However, the API that provides streaming iterators where possible.
//! Additionally, the sequence length of records in the FASTA/FASTQ files
//! is not limited by the size of the buffer. Instead, the buffer will grow until
//! the record fits, allowing parsers with a minimum amount of copying required.
//! How it grows can be configured (see [`BufStrategy`](trait.BufStrategy.html)).
//!
//! # Example FASTQ parser:
//! This code prints the ID string from each FASTQ record.
//!
//! ```no_run
//! use seq_io::fastq::{Reader,Record};
//!
//! let mut reader = Reader::from_path("seqs.fastq").unwrap();
//!
//! while let Some(record) = reader.next() {
//!     let record = record.expect("Error reading record");
//!     println!("{}", record.id().unwrap());
//! }
//! ```
//!
//! # Example FASTA parser calculating mean sequence length:
//! The FASTA reader works just the same. One challenge with the FASTA
//! format is that the sequence can be broken into multiple lines.
//! Therefore, it is not possible to get a slice to the whole sequence
//! without copying the data. But it is possible to use `seq_lines()`
//! for efficiently iterating over each sequence line:
//!
//! ```no_run
//! use seq_io::fasta::{Reader,Record};
//!
//! let mut reader = Reader::from_path("seqs.fasta").unwrap();
//!
//! let mut n = 0;
//! let mut sum = 0;
//! while let Some(record) = reader.next() {
//!     let record = record.expect("Error reading record");
//!     for s in record.seq_lines() {
//!         sum += s.len();
//!     }
//!     n += 1;
//! }
//! println!("mean sequence length of {} records: {:.1} bp", n, sum as f32 / n as f32);
//! ```
//!
//! # Parallel processing
//! Functions for parallel processing can be found in the [`parallel`](parallel/index.html) module

extern crate buf_redux;
extern crate memchr;

#[macro_use]
extern crate serde_derive;
extern crate serde;


use std::error;
use std::fmt;
use std::io;

pub use strategy::*;

mod strategy;


macro_rules! try_opt {
    ($expr: expr) => {
        match $expr {
            Ok(item) => item,
            Err(e) => return Some(Err(::std::convert::From::from(e)))
        }
     };
}


macro_rules! unwrap_or {
    ($expr:expr, $or:block) => {
        match $expr {
            Some(item) => item,
            None => $or
        }
     };
}


pub mod parallel;
pub mod fasta;
pub mod fastq;



/// used by more than one module

#[derive(Default, Debug)]
struct ReadAlways;

impl buf_redux::strategy::ReadStrategy for ReadAlways {
    fn should_read(&self, _: &buf_redux::Buffer) -> bool { true }
}


/// Remove a final '\r' from a byte slice
#[inline]
fn trim_cr(line: &[u8]) -> &[u8] {
    if let Some((&b'\r', remaining)) = line.split_last() {
        remaining
    } else {
        line
    }
}


/// Makes sure the buffer is full after this call (unless EOF reached)
/// code adapted from `io::Read::read_exact`
fn fill_buf<R, Rs, Ms>(reader: &mut buf_redux::BufReader<R, Rs, Ms>) -> io::Result<usize>
    where R: io::Read,
          Rs: buf_redux::strategy::ReadStrategy,
          Ms: buf_redux::strategy::MoveStrategy
{
    let mut num_read = reader.get_buf().len();
    while num_read < reader.capacity() {
        match reader.read_into_buf() {
            Ok(0) => break,
            Ok(n) => num_read += n,
            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
            Err(e) => return Err(e),
        }
    }
    Ok(num_read)
}