1use std::io::{prelude::*, BufReader};
4use std::error::Error;
5use std::fs::File;
6use flate2::read::GzDecoder;
7use flate2::Compression;
8use flate2::write::GzEncoder;
9use std::iter::Iterator;
10extern crate flate2;
11
12#[derive(Debug)]
13pub struct Reads {
14 read_id: String,
15 sequence: String,
16 read_name: String,
17 quality: String,
18}
19
20impl Reads {
21 pub fn len(&self) -> usize{
22 self.sequence.len()
23 }
24
25 pub fn lt_qc(&self, score: i32) -> i32{
26 let bytes = self.quality.as_bytes();
27 let x : Vec<i32> = bytes.iter()
28 .map(|&i| (i as i32 ) - 33i32)
29 .filter(|&i| i >= score)
30 .collect();
31 x.len() as i32
32 }
33
34 fn display(&self) -> String {
35 format!("{}\n{}\n{}\n{}\n", self.read_id, self.sequence, self.read_name, self.quality)
36 }
37}
38
39impl Clone for Reads {
40 fn clone(&self) -> Reads {
41 Reads {
42 read_id: self.read_id.to_string(),
43 sequence: self.sequence.to_string(),
44 read_name: self.read_name.to_string(),
45 quality: self.quality.to_string(),
46 }
47 }
48}
49
50pub struct Fastq {
51 reads: Vec<Reads>,
52 length: u64,
53}
54
55impl Fastq {
56 pub fn new(reads: Vec<Reads>) -> Self{
57 let length = reads.len() as u64;
58 Fastq {
59 reads,
60 length,
61 }
62 }
63
64 fn push(&mut self, reads: Reads) {
65 self.reads.push(reads);
66 self.length += 1u64;
67 }
68
69 pub fn total_base_num(&self) -> u64 {
70 self.reads.iter().map(|r| r.len()).fold(0u64, |acc, x| acc + x as u64)
71 }
72 pub fn qc_num(&self, score: i32) -> u64 {
73 self.reads.iter().map(|r| r.lt_qc(score)).fold(0u64, |acc, x| acc + x as u64)
74 }
75
76 pub fn from_file(file_path: &str) -> Result<Self, Box<dyn Error>> {
78 let fastq_gz = File::open(file_path).expect(format!("No such file or directory: {}", file_path).as_str());
80 let fastq_content = GzDecoder::new(fastq_gz);
81 let fastq_reader = BufReader::new(fastq_content);
82 let mut line_iter = fastq_reader.lines().map(|l| l.unwrap());
83 let mut fastq = Fastq::new(Vec::new());
84 loop {
85 let read_id: String;
86 let sequence: String;
87 let read_name: String;
88 let quality: String;
89 match line_iter.next(){
90 None => {break;}
91 Some(element) => {
92 read_id = element;
93 }
94 }
95 match line_iter.next(){
96 None => {break}
97 Some(element) => {
98 sequence = element;
99 }
100 }
101 match line_iter.next(){
102 None => {break}
103 Some(element) => {
104 read_name = element;
105 }
106 }
107 match line_iter.next(){
108 None => {break}
109 Some(element) => {
110 quality = element;
111 }
112 }
113 fastq.push(Reads{
114 read_id,
115 sequence,
116 read_name,
117 quality
118 })
119 }
120 Ok(fastq)
121 }
122
123 pub fn extent(&mut self, other_fastq: &Fastq) {
124 for reads in other_fastq.reads.iter() {
125 self.push(reads.clone())
126 }
127 }
128 pub fn merge_fastq(fastq_vec: Vec<&str>) -> Result<Self, Box<dyn Error>> {
130 let mut ret_fastq = Fastq::new(Vec::new());
131 for fastq_path in fastq_vec.iter() {
133 let fastq_tmp = Fastq::from_file(fastq_path)?;
135 ret_fastq.extent(&fastq_tmp);
136 }
137 Ok(ret_fastq)
138 }
139
140 pub fn to_file(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
141 let mut out_encoder = GzEncoder::new(Vec::new(), Compression::default());
143 for reads in self.reads.iter() {
144 out_encoder.write_all(reads.display().as_bytes())?;
145 }
146 let compressed_bytes = out_encoder.finish()?;
148 let mut file = File::create(file_path).expect("create failed");
149 file.write_all(&compressed_bytes).expect("write failed");
150 Ok(())
151 }
152}
153
154impl Iterator for Fastq {
155 type Item = Reads;
156
157 fn next(&mut self) -> Option<Self::Item> {
158 match self.reads.iter().next() {
159 None => { None }
160 Some(reads) => { Some(reads.clone())}
161 }
162 }
163}
164
165
166#[cfg(test)]
167mod tests {
168 use std::error::Error;
169 use crate::Fastq;
170
171 #[test]
172 fn test_merge_fastq() -> Result<(), Box<dyn Error>>{
173 let fastq_file_vec = vec!["data/s1062207050023_1.fastq.gz", "data/s1062207050023_2.fastq.gz"];
174 let m_fastq = Fastq::merge_fastq(fastq_file_vec)?;
175 m_fastq.to_file("data/s1062207050023.fastq.gz");
176 Ok(())
177 }
178
179 #[test]
180 fn test_iter_fastq() -> Result<(), Box<dyn Error>>{
181 let m_fastq = Fastq::from_file("data/s1062207050023_1.fastq.gz")?;
182 for read in m_fastq {
183 println!("{}", read.display());
184 break;
185 }
186 Ok(())
187 }
188}