count_constant_sites/
lib.rs

1extern crate seq_io;
2extern crate flate2;
3
4use seq_io::fasta;
5use std::collections::HashMap;
6use flate2::read::GzDecoder;
7use std::io::{Read,BufReader};
8use std::fs::File;
9
10
11pub fn count_constant_sites(filename: &str) -> HashMap<char, u64> {
12    let infile = File::open(filename).unwrap();
13    let buf_reader = BufReader::new(infile);
14
15    let mut reader =  fasta::Reader::new(if filename.ends_with(".gz") {
16        Box::new(GzDecoder::new(buf_reader)) as Box<dyn Read>
17    } else {
18        Box::new(buf_reader) as Box<dyn Read>
19    });
20    let mut first_sequence = true;
21    let mut sites: Vec<char> = Vec::new();
22    while let Some(result) = reader.next() {
23        let record = result.expect("Error reading record");
24        if first_sequence {
25            for line in record.seq_lines() {
26                for base in line {
27                    sites.push((*base as char).to_ascii_lowercase());
28                }
29            }
30            first_sequence = false;
31        } else {
32            let mut i = 0;
33            for line in record.seq_lines() {
34                for base in line {
35                    if sites[i] != (*base as char).to_ascii_lowercase() {
36                        sites[i] = '-';
37                    }
38                    i += 1;
39                }
40            }
41        }
42    }
43
44    let mut constant_sites: HashMap<char, u64> = HashMap::new();
45    constant_sites.insert('a', 0);
46    constant_sites.insert('c', 0);
47    constant_sites.insert('g', 0);
48    constant_sites.insert('t', 0);
49
50    for base in sites {
51        if constant_sites.contains_key(&base) {
52            constant_sites.insert(base, constant_sites.get(&base).unwrap() + 1);
53        }
54    }
55
56    return constant_sites;
57}
58
59#[cfg(test)]
60mod tests {
61    use super::count_constant_sites;
62
63    #[test]
64    fn test_count_constant_sites() {
65        let constant_sites = count_constant_sites("data/input_fasta1.fasta");
66        assert_eq!(*constant_sites.get(&'a').unwrap(), 1);
67        assert_eq!(*constant_sites.get(&'c').unwrap(), 0);
68        assert_eq!(*constant_sites.get(&'g').unwrap(), 0);
69        assert_eq!(*constant_sites.get(&'t').unwrap(), 1);
70    }
71}