1use crate::utils;
2use bio::bio_types::strand::Strand;
3use bio::io::fasta;
4use bio::io::gff::GffType;
5use colored::Colorize;
6use comfy_table::presets::NOTHING;
7use comfy_table::{ContentArrangement, Table};
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11
12pub trait InfoOutput {
19 fn by_file(paths: Vec<PathBuf>);
20 fn by_println(paths: Vec<PathBuf>);
21 fn by_csv(paths: Vec<PathBuf>);
22}
23
24pub struct InfoFa;
25impl InfoOutput for InfoFa {
26 fn by_file(paths: Vec<PathBuf>) {
27 let c = info_fa(paths);
28 let path = PathBuf::from("info_fa.txt");
29 write_file(path, &*c);
30 }
31
32 fn by_println(paths: Vec<PathBuf>) {
33 println!("{}", format_table(info_fa(paths.clone())));
34 }
35
36 fn by_csv(paths: Vec<PathBuf>) {
37 let c = info_fa(paths);
38 let path = PathBuf::from("info_fa.csv");
39 write_file(path, &c);
40 }
41}
42
43pub struct InfoFq;
44impl InfoOutput for InfoFq {
45 fn by_file(paths: Vec<PathBuf>) {
46 let c = info_fq(paths);
47 let path = PathBuf::from("info_fq.txt");
48 write_file(path, &c);
49 }
50
51 fn by_println(paths: Vec<PathBuf>) {
52 println!("{}", info_fq(paths));
53 }
54
55 fn by_csv(paths: Vec<PathBuf>) {
56 let c = info_fq(paths);
57 let path = PathBuf::from("info_fq.csv");
58 write_file(path, &c);
59 }
60}
61
62pub struct InfoGff;
63impl InfoOutput for InfoGff {
64 fn by_file(paths: Vec<PathBuf>) {
65 let c = info_gff(paths, GffType::GFF3);
66 let path = PathBuf::from("info_gff.txt");
67 write_file(path, &c);
68 }
69
70 fn by_println(paths: Vec<PathBuf>) {
71 println!("{}", info_gff(paths, GffType::GFF3));
72 }
73
74 fn by_csv(paths: Vec<PathBuf>) {
75 let c = info_gff(paths, GffType::GFF3);
76 let path = PathBuf::from("info_gff.txt");
77 write_file(path, &c);
78 }
79}
80
81fn info_fa(paths: Vec<PathBuf>) -> String {
82 let mut str_buf: Vec<String> = Vec::new();
83
84 for (i, path) in paths.iter().enumerate() {
85 let reader = fasta::Reader::from_file(&path)
86 .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
87 str_buf.push(format!("File: {:?} \n", path));
88 str_buf.push(format!(
89 "{}\t{}\t{}\t{}\t{}\t\n",
90 "ID", "Seq Type", "Description", "Length", "GC content"
91 ));
92 let (mut count, mut total_len) = (0, 0);
93
94 for record in reader.records() {
95 let record =
96 record.expect(format!("{} reading record.", "Error".red().bold()).as_str());
97 let s_type = utils::try_seq_type_seq(record.seq());
98 str_buf.push(format!(
99 "{}\t{}\t{}\t{}\t{:.2}\t\n",
100 record.id(),
101 s_type,
102 record.desc().unwrap_or("None"),
103 record.seq().len(),
104 match s_type.as_str() {
105 "DNA" => bio::seq_analysis::gc::gc_content(&*record.seq()),
106 _ => 0.0,
107 }
108 ));
109
110 total_len += record.seq().len();
111 count += 1;
112 }
113 str_buf.insert(
114 i,
115 format!(
116 "File'{}' Total length/count : {}/{} \n",
117 path.display(),
118 total_len,
119 count
120 ),
121 );
122 }
123 str_buf.push("\n".to_string());
124 str_buf.into_iter().collect::<String>()
125}
126
127fn info_fq(paths: Vec<PathBuf>) -> String {
128 let mut str_buf: Vec<String> = Vec::new();
129
130 for (i, path) in paths.iter().enumerate() {
131 let reader = bio::io::fastq::Reader::from_file(&path)
132 .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
133 str_buf.push(format!("File: {:?} \n", path));
134 str_buf.push(format!(
135 "{}\t{}\t{}\t{}\t\n",
136 "ID", "Description", "Length", "Quality"
137 ));
138 let (mut count, mut total_len) = (0, 0);
139
140 for record in reader.records() {
141 let record =
142 record.expect(format!("{} reading record.", "Error".red().bold()).as_str());
143 str_buf.push(format!(
144 "{}\t{}\t{}\t{}\t\n",
145 record.id(),
146 record.desc().unwrap_or("None"),
147 record.seq().len(),
148 record.qual().len()
149 ));
150
151 total_len += record.seq().len();
152 count += 1;
153 }
154 str_buf.insert(
155 i,
156 format!(
157 "File'{}' Total length/count : {}/{} \n",
158 path.display(),
159 total_len,
160 count
161 ),
162 );
163 }
164
165 str_buf.push("\n".to_string());
166 str_buf.into_iter().collect::<String>()
167}
168
169fn info_gff(paths: Vec<PathBuf>, gff_type: GffType) -> String {
170 let mut str_buf: Vec<String> = Vec::new();
171
172 for (i, path) in paths.iter().enumerate() {
173 let mut reader = bio::io::gff::Reader::from_file(&path, gff_type)
174 .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
175 str_buf.push(format!("File: {:?} \n", path));
176
177 let mut count = 0;
178 let (mut seq_id, mut source, mut feature_type, mut score, mut strand) = (
179 HashMap::new(),
180 HashMap::new(),
181 HashMap::new(),
182 HashMap::new(),
183 HashMap::new(),
184 );
185
186 for record in reader.records() {
187 let record = record.expect("Error reading record.");
188
189 *seq_id.entry(record.seqname().to_owned()).or_insert(0) += 1;
190 *source.entry(record.source().to_owned()).or_insert(0) += 1;
191 *feature_type
192 .entry(record.feature_type().to_owned())
193 .or_insert(0) += 1;
194 *score.entry(record.score().to_owned()).or_insert(0) += 1;
195 *strand
196 .entry(match record.strand().unwrap_or(Strand::Unknown) {
197 Strand::Forward => "+",
198 Strand::Reverse => "-",
199 Strand::Unknown => ".",
200 })
201 .or_insert(0) += 1;
202
203 count += 1;
204 }
205
206 str_buf.push(format!(
207 "Seq ID: \n {:?}\nSource: \n {:?}\nFeature Type: \n {:?}\nScore: \n {:?}\nStrand: \n {:?}\nCount: \n {}\n",
208 seq_id, source, feature_type, score, strand, count
209 ));
210
211 str_buf.insert(
212 i,
213 format!("File'{}' Total count : {} \n", path.display(), count),
214 );
215 }
216
217 str_buf.push("\n".to_string());
218 str_buf.into_iter().collect::<String>()
219}
220
221fn format_table(input: String) -> String {
222 let rows: Vec<Vec<&str>> = input
223 .split('\n')
224 .filter(|line| !line.trim().is_empty())
225 .map(|line| line.split('\t').collect())
226 .collect();
227
228 let mut table = Table::new();
229 table
230 .load_preset(NOTHING)
231 .set_content_arrangement(ContentArrangement::Dynamic);
232
233 if let Some(headers) = rows.first() {
234 table.set_header(headers);
235 }
236 for row in rows.iter().skip(1) {
237 table.add_row(row);
238 }
239
240 table.to_string()
241}
242
243fn write_file<P: AsRef<Path>>(path: P, content: &str) {
244 fs::write(path, content).expect("Unable to write file");
245}