seq_here/
info.rs

1use crate::utils;
2use crate::utils::write_file;
3use bio::bio_types::strand::Strand;
4use bio::io::fasta;
5use bio::io::gff::GffType;
6use colored::Colorize;
7use comfy_table::presets::NOTHING;
8use comfy_table::{ContentArrangement, Table};
9use std::collections::HashMap;
10use std::path::PathBuf;
11
12/// Define the info fetch method
13///
14/// Each type of file has its own way to fetch the information.
15pub trait InfoFetcher {
16    fn info(paths: Vec<PathBuf>, args: Vec<String>) -> String;
17}
18
19/// Define the output method for the different file types
20///
21/// 3 ways to output the information:
22/// `by file` to output to a text file,
23/// `by println` to output to the terminal,
24/// `by csv` to output to a csv file.
25pub trait InfoOutput: InfoFetcher {
26    fn by_file(paths: Vec<PathBuf>, args: Vec<String>) {
27        let c = Self::info(paths, args);
28        let path = PathBuf::from("info_fetch.txt");
29        write_file(path, &*c);
30    }
31    fn by_println(paths: Vec<PathBuf>, args: Vec<String>) {
32        println!("{}", format_table(Self::info(paths.clone(), args)));
33    }
34    fn by_csv(paths: Vec<PathBuf>, args: Vec<String>) {
35        let c = Self::info(paths, args);
36        let path = PathBuf::from("info_fetch.csv");
37        write_file(path, &c);
38    }
39}
40
41
42pub struct InfoFa;
43impl InfoFetcher for InfoFa {
44    fn info(paths: Vec<PathBuf>, _args: Vec<String>) -> String {
45        let mut str_buf: Vec<String> = Vec::new();
46
47        for (i, path) in paths.iter().enumerate() {
48            let reader = fasta::Reader::from_file(&path)
49                .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
50            str_buf.push(format!("File: {:?} \n", path));
51            str_buf.push(format!(
52                "{}\t{}\t{}\t{}\t{}\t\n",
53                "ID", "Seq Type", "Description", "Length", "GC content"
54            ));
55            let (mut count, mut total_len) = (0, 0);
56
57            for record in reader.records() {
58                let record =
59                    record.expect(format!("{} reading record.", "Error".red().bold()).as_str());
60                let s_type = utils::try_seq_type_seq(record.seq());
61                str_buf.push(format!(
62                    "{}\t{}\t{}\t{}\t{:.2}\t\n",
63                    record.id(),
64                    s_type,
65                    record.desc().unwrap_or("None"),
66                    record.seq().len(),
67                    match s_type.as_str() {
68                        "DNA" => bio::seq_analysis::gc::gc_content(&*record.seq()),
69                        _ => 0.0,
70                    }
71                ));
72
73                total_len += record.seq().len();
74                count += 1;
75            }
76            str_buf.insert(
77                i,
78                format!(
79                    "File'{}' Total length/count : {}/{} \n",
80                    path.display(),
81                    total_len,
82                    count
83                ),
84            );
85        }
86        str_buf.push("\n".to_string());
87        str_buf.into_iter().collect::<String>()
88    }
89}
90
91impl InfoOutput for InfoFa {}
92
93
94pub struct InfoFq;
95
96impl InfoFetcher for InfoFq {
97    fn info(paths: Vec<PathBuf>, _args: Vec<String>) -> String {
98        let mut str_buf: Vec<String> = Vec::new();
99
100        for (i, path) in paths.iter().enumerate() {
101            let reader = bio::io::fastq::Reader::from_file(&path)
102                .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
103            str_buf.push(format!("File: {:?} \n", path));
104            str_buf.push(format!(
105                "{}\t{}\t{}\t{}\t\n",
106                "ID", "Description", "Length", "Quality"
107            ));
108            let (mut count, mut total_len) = (0, 0);
109
110            for record in reader.records() {
111                let record =
112                    record.expect(format!("{} reading record.", "Error".red().bold()).as_str());
113                str_buf.push(format!(
114                    "{}\t{}\t{}\t{}\t\n",
115                    record.id(),
116                    record.desc().unwrap_or("None"),
117                    record.seq().len(),
118                    record.qual().len()
119                ));
120
121                total_len += record.seq().len();
122                count += 1;
123            }
124            str_buf.insert(
125                i,
126                format!(
127                    "File'{}' Total length/count : {}/{} \n",
128                    path.display(),
129                    total_len,
130                    count
131                ),
132            );
133        }
134
135        str_buf.push("\n".to_string());
136        str_buf.into_iter().collect::<String>()
137    }
138}
139impl InfoOutput for InfoFq {}
140
141
142pub struct InfoGff;
143
144impl InfoFetcher for InfoGff {
145    fn info(paths: Vec<PathBuf>, args: Vec<String>) -> String {
146        let mut str_buf: Vec<String> = Vec::new();
147        let gff_type = match args[0].as_str() {
148            "gff3" => GffType::GFF3,
149            "gtf" => GffType::GTF2,
150            _ => GffType::GFF3,
151        };
152
153        for (i, path) in paths.iter().enumerate() {
154            let mut reader = bio::io::gff::Reader::from_file(&path, gff_type)
155                .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
156            str_buf.push(format!("File: {:?} \n", path));
157
158            let mut count = 0;
159            let (mut seq_id, mut source, mut feature_type, mut score, mut strand) = (
160                HashMap::new(),
161                HashMap::new(),
162                HashMap::new(),
163                HashMap::new(),
164                HashMap::new(),
165            );
166
167            for record in reader.records() {
168                let record = record.expect("Error reading record.");
169
170                *seq_id.entry(record.seqname().to_owned()).or_insert(0) += 1;
171                *source.entry(record.source().to_owned()).or_insert(0) += 1;
172                *feature_type
173                    .entry(record.feature_type().to_owned())
174                    .or_insert(0) += 1;
175                *score.entry(record.score().to_owned()).or_insert(0) += 1;
176                *strand
177                    .entry(match record.strand().unwrap_or(Strand::Unknown) {
178                        Strand::Forward => "+",
179                        Strand::Reverse => "-",
180                        Strand::Unknown => ".",
181                    })
182                    .or_insert(0) += 1;
183
184                count += 1;
185            }
186
187            str_buf.push(format!(
188                "Seq ID: \n  {:?}\nSource: \n  {:?}\nFeature Type: \n  {:?}\nScore: \n  {:?}\nStrand: \n  {:?}\nCount: \n  {}\n",
189                seq_id, source, feature_type, score, strand, count
190            ));
191
192            str_buf.insert(
193                i,
194                format!("File'{}' Total count : {} \n", path.display(), count),
195            );
196        }
197
198        str_buf.push("\n".to_string());
199        str_buf.into_iter().collect::<String>()
200    }
201}
202
203impl InfoOutput for InfoGff {}
204
205
206fn format_table(input: String) -> String {
207    let rows: Vec<Vec<&str>> = input
208        .split('\n')
209        .filter(|line| !line.trim().is_empty())
210        .map(|line| line.split('\t').collect())
211        .collect();
212
213    let mut table = Table::new();
214    table
215        .load_preset(NOTHING)
216        .set_content_arrangement(ContentArrangement::Dynamic);
217
218    if let Some(headers) = rows.first() {
219        table.set_header(headers);
220    }
221    for row in rows.iter().skip(1) {
222        table.add_row(row);
223    }
224
225    table.to_string()
226}
227