1use crate::utils;
2use crate::utils::write_file;
3use bio::bio_types::strand::Strand;
4use bio::io::fasta;
5use bio::io::gff::GffType;
6use colored::Colorize;
7use comfy_table::presets::NOTHING;
8use comfy_table::{ContentArrangement, Table};
9use std::collections::HashMap;
10use std::path::PathBuf;
11
12pub trait InfoFetcher {
16 fn info(paths: Vec<PathBuf>, args: Vec<String>) -> String;
17}
18
19pub trait InfoOutput: InfoFetcher {
26 fn by_file(paths: Vec<PathBuf>, args: Vec<String>) {
27 let c = Self::info(paths, args);
28 let path = PathBuf::from("info_fetch.txt");
29 write_file(path, &*c);
30 }
31 fn by_println(paths: Vec<PathBuf>, args: Vec<String>) {
32 println!("{}", format_table(Self::info(paths.clone(), args)));
33 }
34 fn by_csv(paths: Vec<PathBuf>, args: Vec<String>) {
35 let c = Self::info(paths, args);
36 let path = PathBuf::from("info_fetch.csv");
37 write_file(path, &c);
38 }
39}
40
41
42pub struct InfoFa;
43impl InfoFetcher for InfoFa {
44 fn info(paths: Vec<PathBuf>, _args: Vec<String>) -> String {
45 let mut str_buf: Vec<String> = Vec::new();
46
47 for (i, path) in paths.iter().enumerate() {
48 let reader = fasta::Reader::from_file(&path)
49 .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
50 str_buf.push(format!("File: {:?} \n", path));
51 str_buf.push(format!(
52 "{}\t{}\t{}\t{}\t{}\t\n",
53 "ID", "Seq Type", "Description", "Length", "GC content"
54 ));
55 let (mut count, mut total_len) = (0, 0);
56
57 for record in reader.records() {
58 let record =
59 record.expect(format!("{} reading record.", "Error".red().bold()).as_str());
60 let s_type = utils::try_seq_type_seq(record.seq());
61 str_buf.push(format!(
62 "{}\t{}\t{}\t{}\t{:.2}\t\n",
63 record.id(),
64 s_type,
65 record.desc().unwrap_or("None"),
66 record.seq().len(),
67 match s_type.as_str() {
68 "DNA" => bio::seq_analysis::gc::gc_content(&*record.seq()),
69 _ => 0.0,
70 }
71 ));
72
73 total_len += record.seq().len();
74 count += 1;
75 }
76 str_buf.insert(
77 i,
78 format!(
79 "File'{}' Total length/count : {}/{} \n",
80 path.display(),
81 total_len,
82 count
83 ),
84 );
85 }
86 str_buf.push("\n".to_string());
87 str_buf.into_iter().collect::<String>()
88 }
89}
90
91impl InfoOutput for InfoFa {}
92
93
94pub struct InfoFq;
95
96impl InfoFetcher for InfoFq {
97 fn info(paths: Vec<PathBuf>, _args: Vec<String>) -> String {
98 let mut str_buf: Vec<String> = Vec::new();
99
100 for (i, path) in paths.iter().enumerate() {
101 let reader = bio::io::fastq::Reader::from_file(&path)
102 .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
103 str_buf.push(format!("File: {:?} \n", path));
104 str_buf.push(format!(
105 "{}\t{}\t{}\t{}\t\n",
106 "ID", "Description", "Length", "Quality"
107 ));
108 let (mut count, mut total_len) = (0, 0);
109
110 for record in reader.records() {
111 let record =
112 record.expect(format!("{} reading record.", "Error".red().bold()).as_str());
113 str_buf.push(format!(
114 "{}\t{}\t{}\t{}\t\n",
115 record.id(),
116 record.desc().unwrap_or("None"),
117 record.seq().len(),
118 record.qual().len()
119 ));
120
121 total_len += record.seq().len();
122 count += 1;
123 }
124 str_buf.insert(
125 i,
126 format!(
127 "File'{}' Total length/count : {}/{} \n",
128 path.display(),
129 total_len,
130 count
131 ),
132 );
133 }
134
135 str_buf.push("\n".to_string());
136 str_buf.into_iter().collect::<String>()
137 }
138}
139impl InfoOutput for InfoFq {}
140
141
142pub struct InfoGff;
143
144impl InfoFetcher for InfoGff {
145 fn info(paths: Vec<PathBuf>, args: Vec<String>) -> String {
146 let mut str_buf: Vec<String> = Vec::new();
147 let gff_type = match args[0].as_str() {
148 "gff3" => GffType::GFF3,
149 "gtf" => GffType::GTF2,
150 _ => GffType::GFF3,
151 };
152
153 for (i, path) in paths.iter().enumerate() {
154 let mut reader = bio::io::gff::Reader::from_file(&path, gff_type)
155 .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
156 str_buf.push(format!("File: {:?} \n", path));
157
158 let mut count = 0;
159 let (mut seq_id, mut source, mut feature_type, mut score, mut strand) = (
160 HashMap::new(),
161 HashMap::new(),
162 HashMap::new(),
163 HashMap::new(),
164 HashMap::new(),
165 );
166
167 for record in reader.records() {
168 let record = record.expect("Error reading record.");
169
170 *seq_id.entry(record.seqname().to_owned()).or_insert(0) += 1;
171 *source.entry(record.source().to_owned()).or_insert(0) += 1;
172 *feature_type
173 .entry(record.feature_type().to_owned())
174 .or_insert(0) += 1;
175 *score.entry(record.score().to_owned()).or_insert(0) += 1;
176 *strand
177 .entry(match record.strand().unwrap_or(Strand::Unknown) {
178 Strand::Forward => "+",
179 Strand::Reverse => "-",
180 Strand::Unknown => ".",
181 })
182 .or_insert(0) += 1;
183
184 count += 1;
185 }
186
187 str_buf.push(format!(
188 "Seq ID: \n {:?}\nSource: \n {:?}\nFeature Type: \n {:?}\nScore: \n {:?}\nStrand: \n {:?}\nCount: \n {}\n",
189 seq_id, source, feature_type, score, strand, count
190 ));
191
192 str_buf.insert(
193 i,
194 format!("File'{}' Total count : {} \n", path.display(), count),
195 );
196 }
197
198 str_buf.push("\n".to_string());
199 str_buf.into_iter().collect::<String>()
200 }
201}
202
203impl InfoOutput for InfoGff {}
204
205
206fn format_table(input: String) -> String {
207 let rows: Vec<Vec<&str>> = input
208 .split('\n')
209 .filter(|line| !line.trim().is_empty())
210 .map(|line| line.split('\t').collect())
211 .collect();
212
213 let mut table = Table::new();
214 table
215 .load_preset(NOTHING)
216 .set_content_arrangement(ContentArrangement::Dynamic);
217
218 if let Some(headers) = rows.first() {
219 table.set_header(headers);
220 }
221 for row in rows.iter().skip(1) {
222 table.add_row(row);
223 }
224
225 table.to_string()
226}
227