seq_here/
info.rs

1use crate::utils;
2use bio::bio_types::strand::Strand;
3use bio::io::fasta;
4use bio::io::gff::GffType;
5use colored::Colorize;
6use comfy_table::presets::NOTHING;
7use comfy_table::{ContentArrangement, Table};
8use std::collections::HashMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11
12/// Define the trait for the different file types
13///
14/// 3 ways to output the information:
15/// `by file` to output to a text file,
16/// `by println` to output to the terminal,
17/// `by csv` to output to a csv file.
18pub trait InfoOutput {
19    fn by_file(paths: Vec<PathBuf>);
20    fn by_println(paths: Vec<PathBuf>);
21    fn by_csv(paths: Vec<PathBuf>);
22}
23
24pub struct InfoFa;
25impl InfoOutput for InfoFa {
26    fn by_file(paths: Vec<PathBuf>) {
27        let c = info_fa(paths);
28        let path = PathBuf::from("info_fa.txt");
29        write_file(path, &*c);
30    }
31
32    fn by_println(paths: Vec<PathBuf>) {
33        println!("{}", format_table(info_fa(paths.clone())));
34    }
35
36    fn by_csv(paths: Vec<PathBuf>) {
37        let c = info_fa(paths);
38        let path = PathBuf::from("info_fa.csv");
39        write_file(path, &c);
40    }
41}
42
43pub struct InfoFq;
44impl InfoOutput for InfoFq {
45    fn by_file(paths: Vec<PathBuf>) {
46        let c = info_fq(paths);
47        let path = PathBuf::from("info_fq.txt");
48        write_file(path, &c);
49    }
50
51    fn by_println(paths: Vec<PathBuf>) {
52        println!("{}", info_fq(paths));
53    }
54
55    fn by_csv(paths: Vec<PathBuf>) {
56        let c = info_fq(paths);
57        let path = PathBuf::from("info_fq.csv");
58        write_file(path, &c);
59    }
60}
61
62pub struct InfoGff;
63impl InfoOutput for InfoGff {
64    fn by_file(paths: Vec<PathBuf>) {
65        let c = info_gff(paths, GffType::GFF3);
66        let path = PathBuf::from("info_gff.txt");
67        write_file(path, &c);
68    }
69
70    fn by_println(paths: Vec<PathBuf>) {
71        println!("{}", info_gff(paths, GffType::GFF3));
72    }
73
74    fn by_csv(paths: Vec<PathBuf>) {
75        let c = info_gff(paths, GffType::GFF3);
76        let path = PathBuf::from("info_gff.txt");
77        write_file(path, &c);
78    }
79}
80
81fn info_fa(paths: Vec<PathBuf>) -> String {
82    let mut str_buf: Vec<String> = Vec::new();
83
84    for (i, path) in paths.iter().enumerate() {
85        let reader = fasta::Reader::from_file(&path)
86            .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
87        str_buf.push(format!("File: {:?} \n", path));
88        str_buf.push(format!(
89            "{}\t{}\t{}\t{}\t{}\t\n",
90            "ID", "Seq Type", "Description", "Length", "GC content"
91        ));
92        let (mut count, mut total_len) = (0, 0);
93
94        for record in reader.records() {
95            let record =
96                record.expect(format!("{} reading record.", "Error".red().bold()).as_str());
97            let s_type = utils::try_seq_type_seq(record.seq());
98            str_buf.push(format!(
99                "{}\t{}\t{}\t{}\t{:.2}\t\n",
100                record.id(),
101                s_type,
102                record.desc().unwrap_or("None"),
103                record.seq().len(),
104                match s_type.as_str() {
105                    "DNA" => bio::seq_analysis::gc::gc_content(&*record.seq()),
106                    _ => 0.0,
107                }
108            ));
109
110            total_len += record.seq().len();
111            count += 1;
112        }
113        str_buf.insert(
114            i,
115            format!(
116                "File'{}' Total length/count : {}/{} \n",
117                path.display(),
118                total_len,
119                count
120            ),
121        );
122    }
123    str_buf.push("\n".to_string());
124    str_buf.into_iter().collect::<String>()
125}
126
127fn info_fq(paths: Vec<PathBuf>) -> String {
128    let mut str_buf: Vec<String> = Vec::new();
129
130    for (i, path) in paths.iter().enumerate() {
131        let reader = bio::io::fastq::Reader::from_file(&path)
132            .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
133        str_buf.push(format!("File: {:?} \n", path));
134        str_buf.push(format!(
135            "{}\t{}\t{}\t{}\t\n",
136            "ID", "Description", "Length", "Quality"
137        ));
138        let (mut count, mut total_len) = (0, 0);
139
140        for record in reader.records() {
141            let record =
142                record.expect(format!("{} reading record.", "Error".red().bold()).as_str());
143            str_buf.push(format!(
144                "{}\t{}\t{}\t{}\t\n",
145                record.id(),
146                record.desc().unwrap_or("None"),
147                record.seq().len(),
148                record.qual().len()
149            ));
150
151            total_len += record.seq().len();
152            count += 1;
153        }
154        str_buf.insert(
155            i,
156            format!(
157                "File'{}' Total length/count : {}/{} \n",
158                path.display(),
159                total_len,
160                count
161            ),
162        );
163    }
164
165    str_buf.push("\n".to_string());
166    str_buf.into_iter().collect::<String>()
167}
168
169fn info_gff(paths: Vec<PathBuf>, gff_type: GffType) -> String {
170    let mut str_buf: Vec<String> = Vec::new();
171
172    for (i, path) in paths.iter().enumerate() {
173        let mut reader = bio::io::gff::Reader::from_file(&path, gff_type)
174            .expect(format!("{} reading file {}.", "Error".red().bold(), &path.display()).as_str());
175        str_buf.push(format!("File: {:?} \n", path));
176
177        let mut count = 0;
178        let (mut seq_id, mut source, mut feature_type, mut score, mut strand) = (
179            HashMap::new(),
180            HashMap::new(),
181            HashMap::new(),
182            HashMap::new(),
183            HashMap::new(),
184        );
185
186        for record in reader.records() {
187            let record = record.expect("Error reading record.");
188
189            *seq_id.entry(record.seqname().to_owned()).or_insert(0) += 1;
190            *source.entry(record.source().to_owned()).or_insert(0) += 1;
191            *feature_type
192                .entry(record.feature_type().to_owned())
193                .or_insert(0) += 1;
194            *score.entry(record.score().to_owned()).or_insert(0) += 1;
195            *strand
196                .entry(match record.strand().unwrap_or(Strand::Unknown) {
197                    Strand::Forward => "+",
198                    Strand::Reverse => "-",
199                    Strand::Unknown => ".",
200                })
201                .or_insert(0) += 1;
202
203            count += 1;
204        }
205
206        str_buf.push(format!(
207            "Seq ID: \n  {:?}\nSource: \n  {:?}\nFeature Type: \n  {:?}\nScore: \n  {:?}\nStrand: \n  {:?}\nCount: \n  {}\n",
208            seq_id, source, feature_type, score, strand, count
209        ));
210
211        str_buf.insert(
212            i,
213            format!("File'{}' Total count : {} \n", path.display(), count),
214        );
215    }
216
217    str_buf.push("\n".to_string());
218    str_buf.into_iter().collect::<String>()
219}
220
221fn format_table(input: String) -> String {
222    let rows: Vec<Vec<&str>> = input
223        .split('\n')
224        .filter(|line| !line.trim().is_empty())
225        .map(|line| line.split('\t').collect())
226        .collect();
227
228    let mut table = Table::new();
229    table
230        .load_preset(NOTHING)
231        .set_content_arrangement(ContentArrangement::Dynamic);
232
233    if let Some(headers) = rows.first() {
234        table.set_header(headers);
235    }
236    for row in rows.iter().skip(1) {
237        table.add_row(row);
238    }
239
240    table.to_string()
241}
242
243fn write_file<P: AsRef<Path>>(path: P, content: &str) {
244    fs::write(path, content).expect("Unable to write file");
245}