li_stephens/
lib.rs

1use ndarray::{Array1,Array2,Axis};
2use std::cmp;
3
4
5const BASES : [char;10] = ['a','c','g','t','n','A','C','G','T','N'];
6
7/// Quick helper function for testing whether or not a character is a valid base pair
8pub fn is_base(char_val : char) -> bool {
9    if BASES.contains(&char_val) {
10        return true;
11    }
12    return false;
13}
14
15/// Calculates the liklihood of a given test sequence being generated from the same population that generated the given haplotypes
16/// and a rate for mutations and recombinations. The haplotypes and test sequence must be input as a matrix of characters. The lenght
17/// of all the haplotypes and the test sequence must be the same and all characters in uppercase.
18pub fn li_stephens_probs_matrix(haplotypes: &ndarray::Array2::<char>, test_haplotype: &ndarray::Array1::<char>, mutation_prob: f64, recombination_prob: f64) -> f64 {
19    let mut max_val = haplotypes.len_of(Axis(1));
20
21    max_val = cmp::max(max_val,test_haplotype.len());
22
23
24    let new_test_array = test_haplotype.as_slice().unwrap();
25
26    for i in 0..new_test_array.len()-1 {
27        if !is_base(new_test_array[i]) {
28            panic!("Not all characters in the test haplotype are a base");
29        }
30    }
31
32    for i in 0..haplotypes.len_of(Axis(0)) {
33        for j in 0..haplotypes.len_of(Axis(1)) {
34            if !is_base(haplotypes[[i,j]]) {
35                panic!("Not all characters in haplotype {} are a base",i);
36            }
37        }
38    }   
39
40
41    let mut prob_array = Array2::<f64>::zeros((haplotypes.len_of(Axis(0)),max_val));
42
43    let char_array = haplotypes;
44
45    let test_array = test_haplotype;
46
47    for idx in 0..max_val {
48        for inner_idx in 0..haplotypes.len_of(Axis(0)) {
49            let mut transition: f64 = 1.0;
50            if idx != 0 {
51                transition = (1.0-recombination_prob)*prob_array[[inner_idx,idx-1]];
52                for inner_idx2 in 0..haplotypes.len_of(Axis(0)) {
53                    transition += (recombination_prob/(haplotypes.len() as f64))*prob_array[[inner_idx2,idx-1]];
54                }
55            }
56            let emission = if test_array[idx] == 'N' || char_array[[inner_idx,idx]] == 'N' {
57                1.0
58            } else if test_array[idx] == char_array[[inner_idx,idx]] {
59                1.0-mutation_prob
60            } else {
61                mutation_prob/3.0
62            };
63            prob_array[[inner_idx,idx]] = transition*emission;
64        }
65    }
66
67    let mut ret_val : f64 = 0.0;
68
69    for final_index in 0..haplotypes.len_of(Axis(0)) {
70            if prob_array[[final_index,max_val-1]] > ret_val {
71                ret_val = prob_array[[final_index,max_val-1]];
72            }
73    }
74
75    return ret_val;
76
77
78
79    
80}
81
82
83
84/// Calculates the liklihood of a given test sequence being generated from the same population that generated the given haplotypes
85/// and a rate for mutations and recombinations. The haplotypes and test sequence must be input as a vector of strings and a string
86/// respectively.
87pub fn li_stephens_probs(haplotypes: &Vec<String>, test_haplotype: &String, mutation_prob: f64, recombination_prob: f64) -> f64 {
88
89    let mut max_val = 0;
90
91    for x in haplotypes {
92        if x.len() > max_val {
93            max_val = x.len();
94        }
95    }
96
97
98    max_val = cmp::max(max_val,test_haplotype.len());
99
100    let mut char_array = Array2::<char>::from_elem((haplotypes.len(),max_val),'N');
101
102    let mut cur_index = 0;
103
104    for x in haplotypes {
105        for idx in 0..(x.len()) {
106            char_array[[cur_index,idx]] = (x.as_bytes()[idx] as char).to_ascii_uppercase();
107        }
108        cur_index += 1;
109    }
110
111    let mut test_array = Array1::<char>::from_elem(max_val,'N');
112
113    for idx in 0..(test_array.len()) {
114        test_array[[idx]] = test_haplotype.as_bytes()[idx] as char;
115    }
116
117
118
119    return li_stephens_probs_matrix(&char_array,&test_array,mutation_prob,recombination_prob);
120
121}