1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
use std::fs::File;
use std::io::{stdin, Cursor, Read};
use std::path::Path;
#[cfg(feature = "compression")]
use bzip2::read::BzDecoder;
#[cfg(feature = "compression")]
use flate2::read::MultiGzDecoder;
#[cfg(feature = "compression")]
use xz2::read::XzDecoder;
use crate::errors::ParseError;
pub use crate::parser::fasta::Reader as FastaReader;
pub use crate::parser::fastq::Reader as FastqReader;
mod record;
mod utils;
mod fasta;
mod fastq;
pub use crate::parser::utils::FastxReader;
#[cfg(feature = "compression")]
const GZ_MAGIC: [u8; 2] = [0x1F, 0x8B];
#[cfg(feature = "compression")]
const BZ_MAGIC: [u8; 2] = [0x42, 0x5A];
#[cfg(feature = "compression")]
const XZ_MAGIC: [u8; 2] = [0xFD, 0x37];
fn get_fastx_reader<'a, R: 'a + io::Read + Send>(
reader: R,
first_byte: u8,
) -> Result<Box<dyn FastxReader + 'a>, ParseError> {
match first_byte {
b'>' => Ok(Box::new(FastaReader::new(reader))),
b'@' => Ok(Box::new(FastqReader::new(reader))),
_ => Err(ParseError::new_unknown_format(first_byte)),
}
}
pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>(
mut reader: R,
) -> Result<Box<dyn FastxReader + 'a>, ParseError> {
let mut first_two_bytes = [0; 2];
reader
.read_exact(&mut first_two_bytes)
.map_err(|_| ParseError::new_empty_file())?;
let first_two_cursor = Cursor::new(first_two_bytes);
let new_reader = first_two_cursor.chain(reader);
match first_two_bytes {
#[cfg(feature = "compression")]
GZ_MAGIC => {
let mut gz_reader = MultiGzDecoder::new(new_reader);
let mut first = [0; 1];
gz_reader.read_exact(&mut first)?;
let r = Cursor::new(first).chain(gz_reader);
get_fastx_reader(r, first[0])
}
#[cfg(feature = "compression")]
BZ_MAGIC => {
let mut bz_reader = BzDecoder::new(new_reader);
let mut first = [0; 1];
bz_reader.read_exact(&mut first)?;
let r = Cursor::new(first).chain(bz_reader);
get_fastx_reader(r, first[0])
}
#[cfg(feature = "compression")]
XZ_MAGIC => {
let mut xz_reader = XzDecoder::new(new_reader);
let mut first = [0; 1];
xz_reader.read_exact(&mut first)?;
let r = Cursor::new(first).chain(xz_reader);
get_fastx_reader(r, first[0])
}
_ => get_fastx_reader(new_reader, first_two_bytes[0]),
}
}
pub fn parse_fastx_stdin() -> Result<Box<dyn FastxReader>, ParseError> {
let stdin = stdin();
parse_fastx_reader(stdin)
}
pub fn parse_fastx_file<P: AsRef<Path>>(path: P) -> Result<Box<dyn FastxReader>, ParseError> {
parse_fastx_reader(File::open(&path)?)
}
pub use record::{mask_header_tabs, mask_header_utf8, write_fasta, write_fastq, SequenceRecord};
use std::io;
pub use utils::{Format, LineEnding};
#[cfg(test)]
mod test {
use crate::errors::ParseErrorKind;
use crate::parse_fastx_reader;
#[test]
fn test_empty_file_raises_parser_error_of_same_kind() {
let reader = "".as_bytes();
let actual = parse_fastx_reader(reader);
assert!(actual.is_err());
let actual_err = actual.err().unwrap().kind;
let expected_err = ParseErrorKind::EmptyFile;
assert_eq!(actual_err, expected_err);
}
#[test]
fn test_only_one_byte_in_file_raises_empty_file_error() {
let reader = "@".as_bytes();
let actual = parse_fastx_reader(reader);
assert!(actual.is_err());
let actual_err = actual.err().unwrap().kind;
let expected_err = ParseErrorKind::EmptyFile;
assert_eq!(actual_err, expected_err);
}
}