1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
extern crate seq_io;
extern crate flate2;

use seq_io::fasta;
use std::collections::HashMap;
use flate2::read::GzDecoder;
use std::io::{Read,BufReader};
use std::fs::File;


pub fn count_constant_sites(filename: &str) -> HashMap<char, u64> {
    let infile = File::open(filename).unwrap();
    let buf_reader = BufReader::new(infile);

    let mut reader =  fasta::Reader::new(if filename.ends_with(".gz") {
        Box::new(GzDecoder::new(buf_reader)) as Box<dyn Read>
    } else {
        Box::new(buf_reader) as Box<dyn Read>
    });
    let mut first_sequence = true;
    let mut sites: Vec<char> = Vec::new();
    while let Some(result) = reader.next() {
        let record = result.expect("Error reading record");
        if first_sequence {
            for line in record.seq_lines() {
                for base in line {
                    sites.push((*base as char).to_ascii_lowercase());
                }
            }
            first_sequence = false;
        } else {
            let mut i = 0;
            for line in record.seq_lines() {
                for base in line {
                    if sites[i] != (*base as char).to_ascii_lowercase() {
                        sites[i] = '-';
                    }
                    i += 1;
                }
            }
        }
    }

    let mut constant_sites: HashMap<char, u64> = HashMap::new();
    constant_sites.insert('a', 0);
    constant_sites.insert('c', 0);
    constant_sites.insert('g', 0);
    constant_sites.insert('t', 0);

    for base in sites {
        if constant_sites.contains_key(&base) {
            constant_sites.insert(base, constant_sites.get(&base).unwrap() + 1);
        }
    }

    return constant_sites;
}

#[cfg(test)]
mod tests {
    use super::count_constant_sites;

    #[test]
    fn test_count_constant_sites() {
        let constant_sites = count_constant_sites("data/input_fasta1.fasta");
        assert_eq!(*constant_sites.get(&'a').unwrap(), 1);
        assert_eq!(*constant_sites.get(&'c').unwrap(), 0);
        assert_eq!(*constant_sites.get(&'g').unwrap(), 0);
        assert_eq!(*constant_sites.get(&'t').unwrap(), 1);
    }
}