count_constant_sites/
lib.rs1extern crate seq_io;
2extern crate flate2;
3
4use seq_io::fasta;
5use std::collections::HashMap;
6use flate2::read::GzDecoder;
7use std::io::{Read,BufReader};
8use std::fs::File;
9
10
11pub fn count_constant_sites(filename: &str) -> HashMap<char, u64> {
12 let infile = File::open(filename).unwrap();
13 let buf_reader = BufReader::new(infile);
14
15 let mut reader = fasta::Reader::new(if filename.ends_with(".gz") {
16 Box::new(GzDecoder::new(buf_reader)) as Box<dyn Read>
17 } else {
18 Box::new(buf_reader) as Box<dyn Read>
19 });
20 let mut first_sequence = true;
21 let mut sites: Vec<char> = Vec::new();
22 while let Some(result) = reader.next() {
23 let record = result.expect("Error reading record");
24 if first_sequence {
25 for line in record.seq_lines() {
26 for base in line {
27 sites.push((*base as char).to_ascii_lowercase());
28 }
29 }
30 first_sequence = false;
31 } else {
32 let mut i = 0;
33 for line in record.seq_lines() {
34 for base in line {
35 if sites[i] != (*base as char).to_ascii_lowercase() {
36 sites[i] = '-';
37 }
38 i += 1;
39 }
40 }
41 }
42 }
43
44 let mut constant_sites: HashMap<char, u64> = HashMap::new();
45 constant_sites.insert('a', 0);
46 constant_sites.insert('c', 0);
47 constant_sites.insert('g', 0);
48 constant_sites.insert('t', 0);
49
50 for base in sites {
51 if constant_sites.contains_key(&base) {
52 constant_sites.insert(base, constant_sites.get(&base).unwrap() + 1);
53 }
54 }
55
56 return constant_sites;
57}
58
59#[cfg(test)]
60mod tests {
61 use super::count_constant_sites;
62
63 #[test]
64 fn test_count_constant_sites() {
65 let constant_sites = count_constant_sites("data/input_fasta1.fasta");
66 assert_eq!(*constant_sites.get(&'a').unwrap(), 1);
67 assert_eq!(*constant_sites.get(&'c').unwrap(), 0);
68 assert_eq!(*constant_sites.get(&'g').unwrap(), 0);
69 assert_eq!(*constant_sites.get(&'t').unwrap(), 1);
70 }
71}