1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
use std::io;

use memchr::memchr;

use crate::errors::ParseError;
use crate::parser::record::SequenceRecord;

pub(crate) const BUFSIZE: usize = 64 * 1024;

/// Remove a final '\r' from a byte slice
#[inline]
pub(crate) fn trim_cr(line: &[u8]) -> &[u8] {
    if let Some((&b'\r', remaining)) = line.split_last() {
        remaining
    } else {
        line
    }
}

/// Standard buffer policy: buffer size
/// doubles until it reaches 8 MiB. Above, it will
/// increase in steps of 8 MiB. Buffer size is not limited,
/// it could theoretically grow indefinitely.
pub(crate) fn grow_to(current_size: usize) -> usize {
    if current_size < 1 << 23 {
        current_size * 2
    } else {
        current_size + (1 << 23)
    }
}

/// Makes sure the buffer is full after this call (unless EOF reached)
/// code adapted from `io::Read::read_exact`
pub(crate) fn fill_buf<R>(reader: &mut buf_redux::BufReader<R>) -> io::Result<usize>
where
    R: io::Read,
{
    let initial_size = reader.buffer().len();
    let mut num_read = 0;
    while initial_size + num_read < reader.capacity() {
        match reader.read_into_buf() {
            Ok(0) => break,
            Ok(n) => num_read += n,
            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
            Err(e) => return Err(e),
        }
    }
    Ok(num_read)
}

/// Holds line number and byte offset of our current state in a parser
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Position {
    pub(crate) line: u64,
    pub(crate) byte: u64,
}

impl Position {
    pub fn new(line: u64, byte: u64) -> Position {
        Position { line, byte }
    }

    /// Line number (starting with 1)
    pub fn line(&self) -> u64 {
        self.line
    }

    /// Byte offset within the file
    pub fn byte(&self) -> u64 {
        self.byte
    }
}

/// FASTA or FASTQ?
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Format {
    Fasta,
    Fastq,
}

impl Format {
    pub fn start_char(&self) -> char {
        match self {
            Format::Fasta => '>',
            Format::Fastq => '@',
        }
    }
}

/// Whether it uses \r\n or only \n
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub enum LineEnding {
    Windows,
    Unix,
}

impl LineEnding {
    pub fn to_bytes(&self) -> Vec<u8> {
        match self {
            LineEnding::Windows => vec![b'\r', b'\n'],
            LineEnding::Unix => vec![b'\n'],
        }
    }
}

pub fn find_line_ending(bytes: &[u8]) -> Option<LineEnding> {
    if !bytes.is_empty() {
        if let Some(idx) = memchr(b'\n', &bytes) {
            if idx > 0 && bytes[idx - 1] == b'\r' {
                return Some(LineEnding::Windows);
            } else {
                return Some(LineEnding::Unix);
            }
        }
    }
    None
}
/// The main trait, iterator-like, that the FASTA and FASTQ readers implement
pub trait FastxReader: Send {
    /// Gets the next record in the stream.
    /// This imitates the Iterator API but does not support any iterator functions.
    /// This returns None once we reached the EOF.
    fn next(&mut self) -> Option<Result<SequenceRecord, ParseError>>;
    /// Returns the current line/byte in the stream we are reading from
    fn position(&self) -> &Position;
    /// Returns whether the current stream uses Windows or Unix style line endings
    /// It is `None` only before calling `next`, once `next` has been called it will always
    /// return a line ending.
    fn line_ending(&self) -> Option<LineEnding>;
}