umgap/io/
fastq.rs

1//! Allows operations over the [FASTQ format](https://en.wikipedia.org/wiki/FASTQ_format).
2
3use std::fmt;
4use std::io;
5use std::io::BufRead;
6use std::io::Read;
7use std::iter::Peekable;
8
9use crate::errors;
10use crate::errors::Result;
11
12/// Reads a FASTQ-formatted source (e.g. a file).
13pub struct Reader<R: Read> {
14    lines: Peekable<io::Lines<io::BufReader<R>>>,
15}
16
17impl<R: Read> Reader<R> {
18    /// Creates a Reader from the given Read (e.g. a file)
19    pub fn new(readable: R) -> Self {
20        Reader {
21            lines: io::BufReader::new(readable).lines().peekable(),
22        }
23    }
24
25    /// Reads the next record from the FASTQ file.
26    pub fn read_record(&mut self) -> Result<Option<Record>> {
27        // reading the header
28        let mut header = match self.lines.next() {
29            None => return Ok(None),
30            Some(header) => header?,
31        };
32        if !header.starts_with('@') {
33            bail!(errors::ErrorKind::Io(io::Error::new(
34                io::ErrorKind::Other,
35                "Expected @ at beginning of fastq header."
36            )));
37        }
38        let _ = header.remove(0);
39
40        // reading the sequence
41        let mut lines = 0;
42        let mut sequence = String::new();
43        while self
44            .lines
45            .peek()
46            .and_then(|line| line.as_ref().ok())
47            .map(|line| !line.starts_with('+'))
48            .unwrap_or(false)
49        {
50            sequence.push_str(&self.lines.next().unwrap()?);
51            lines += 1;
52        }
53
54        // skipping the separator
55        if self
56            .lines
57            .next()
58            .and_then(|line| line.ok())
59            .map(|line| !line.starts_with('+'))
60            .unwrap_or(false)
61        {
62            bail!(errors::ErrorKind::Io(io::Error::new(
63                io::ErrorKind::Other,
64                "Expected a + as separator."
65            )));
66        }
67
68        // reading the quality
69        let mut quality = String::with_capacity(sequence.len());
70        for _ in 0..lines {
71            if let Some(line) = self.lines.next() {
72                quality.push_str(&line?)
73            } else {
74                bail!(errors::ErrorKind::Io(io::Error::new(
75                    io::ErrorKind::Other,
76                    "Expected as many quality lines as \
77                                                            sequence lines."
78                )));
79            }
80        }
81
82        Ok(Some(Record {
83            header,
84            sequence,
85            quality,
86        }))
87    }
88
89    /// Returns a Records struct with itself as its reader.
90    pub fn records(self) -> Records<R> {
91        Records { reader: self }
92    }
93}
94
95/// A record as defined by the FASTQ format.
96#[derive(Debug)]
97pub struct Record {
98    /// The FASTQ header (without the preceding '@')
99    pub header: String,
100
101    /// The actual sequence of nucleotides
102    pub sequence: String,
103
104    /// The line discribing the quality of the reads.
105    /// Each character is an integer representing the estimated probability of the base being
106    /// incorrect.
107    pub quality: String,
108}
109
110impl fmt::Display for Record {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        write!(
113            f,
114            "Record ({},{},{})",
115            self.header, self.sequence, self.quality
116        )
117    }
118}
119
120/// Convenience struct which allows for iteration (e.g. using for..in).
121pub struct Records<R: Read> {
122    reader: Reader<R>,
123}
124
125impl<R: Read> Iterator for Records<R> {
126    type Item = Result<Record>;
127
128    fn next(&mut self) -> Option<Result<Record>> {
129        match self.reader.read_record() {
130            Ok(None) => None,
131            Ok(Some(record)) => Some(Ok(record)),
132            Err(err) => Some(Err(err)),
133        }
134    }
135}