bio_seq_algos/
durbin_algo.rs1use utils::*;
2
3pub struct SaPartFuncMats {
4 pub sa_forward_part_func_mat: PartFuncMat,
5 pub sa_forward_part_func_mat_4_char_align: PartFuncMat,
6 pub sa_forward_part_func_mat_4_gap_1: PartFuncMat,
7 pub sa_forward_part_func_mat_4_gap_2: PartFuncMat,
8 pub sa_backward_part_func_mat_4_char_align: PartFuncMat,
9 pub sa_backward_part_func_mat_4_gap_1: PartFuncMat,
10 pub sa_backward_part_func_mat_4_gap_2: PartFuncMat,
11}
12
13impl SaPartFuncMats {
14 fn new(slp: &(usize, usize)) -> SaPartFuncMats {
15 let zero_mat = vec![vec![0.; slp.1 + 2]; slp.0 + 2];
16 SaPartFuncMats {
17 sa_forward_part_func_mat: zero_mat.clone(),
18 sa_forward_part_func_mat_4_char_align: zero_mat.clone(),
19 sa_forward_part_func_mat_4_gap_1: zero_mat.clone(),
20 sa_forward_part_func_mat_4_gap_2: zero_mat.clone(),
21 sa_backward_part_func_mat_4_char_align: zero_mat.clone(),
22 sa_backward_part_func_mat_4_gap_1: zero_mat.clone(),
23 sa_backward_part_func_mat_4_gap_2: zero_mat,
24 }
25 }
26}
27
28impl SaScoringParams {
29 pub fn new(ca_sm: &CaScoreMat, opening_gap_penalty: SaScore, extending_gap_penalty: SaScore) -> SaScoringParams {
30 SaScoringParams {
31 ca_sm: ca_sm.clone(),
32 opening_gap_penalty: opening_gap_penalty,
33 extending_gap_penalty: extending_gap_penalty,
34 }
35 }
36}
37
38pub fn durbin_algo(seq_pair: &SsPair, sa_sps: &SaScoringParams) -> ProbMat {
39 let seq_len_pair = (seq_pair.0.len(), seq_pair.1.len());
40 let (sa_part_func_mats, scale_param_mat) = get_sa_part_func_mats_and_scale_param_mat(seq_pair, &seq_len_pair, sa_sps);
41 get_char_align_prob_mat(&sa_part_func_mats, &seq_len_pair, &scale_param_mat)
42}
43
44pub fn get_cap_mat_and_unaligned_char_psp(sp: &SsPair, sa_sps: &SaScoringParams) -> (ProbMat, ProbSeqPair) {
45 let slp = (sp.0.len(), sp.1.len());
46 let cap_mat = durbin_algo(sp, sa_sps);
47 let mut ucp_seq_pair = (vec![0.; slp.0], vec![0.; slp.1]);
48 for i in 0 .. slp.0 {
49 let mut ucp = 1.;
50 for j in 0 .. slp.1 {
51 ucp -= cap_mat[i][j];
52 }
53 assert!(0. <= ucp && ucp <= 1.);
54 ucp_seq_pair.0[i] = ucp;
55 }
56 for j in 0 .. slp.1 {
57 let mut ucp = 1.;
58 for i in 0 .. slp.0 {
59 ucp -= cap_mat[i][j];
60 }
61 assert!(0. <= ucp && ucp <= 1.);
62 ucp_seq_pair.1[j] = ucp;
63 }
64 (cap_mat, ucp_seq_pair)
65}
66
67pub fn get_sa_part_func_mats_and_scale_param_mat(sp: &SsPair, slp: &(usize, usize), sa_sps: &SaScoringParams) -> (SaPartFuncMats, ScaleParamMat) {
68 let mut exp_sa_sps = sa_sps.clone();
69 for ca_score in exp_sa_sps.ca_sm.values_mut() {
70 *ca_score = ca_score.exp();
71 };
72 exp_sa_sps.opening_gap_penalty = exp_sa_sps.opening_gap_penalty.exp();
73 exp_sa_sps.extending_gap_penalty = exp_sa_sps.extending_gap_penalty.exp();
74 let mut sa_part_func_mats = SaPartFuncMats::new(slp);
75 let mut scale_param_mat = vec![vec![0.; slp.1 + 2]; slp.0 + 2];
76 for i in 0 .. slp.0 + 2 {
77 for j in 0 .. slp.1 + 2 {
78 if (i == 0 && j == 0) || (i == slp.0 + 1 && j == slp.1 + 1) {
79 sa_part_func_mats.sa_forward_part_func_mat[i][j] = 1.;
80 sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] = 1.;
81 scale_param_mat[i][j] = 1.;
82 continue;
83 } else if i == slp.0 + 1 || j == slp.1 + 1 {
84 scale_param_mat[i][j] = 1.;
85 continue;
86 }
87 let mut scale_param = 0.;
88 if i > 0 && j > 0 {
89 let ca_score = exp_sa_sps.ca_sm[&(sp.0[i - 1], sp.1[j - 1])];
90 let forward_part_func = sa_part_func_mats.sa_forward_part_func_mat[i - 1][j - 1] * ca_score;
91 scale_param += forward_part_func;
92 sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] = forward_part_func;
93 }
94 if i > 0 {
95 let forward_part_func = (sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i - 1][j] + sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i - 1][j]) * exp_sa_sps.opening_gap_penalty + sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i - 1][j] * exp_sa_sps.extending_gap_penalty;
96 scale_param += forward_part_func;
97 sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][j] = forward_part_func;
98 }
99 if j > 0 {
100 let forward_part_func = (sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j - 1] + sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][j - 1]) * exp_sa_sps.opening_gap_penalty + sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j - 1] * exp_sa_sps.extending_gap_penalty;
101 scale_param += forward_part_func;
102 sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j] = forward_part_func;
103 }
104 sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] /= scale_param;
105 sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][j] /= scale_param;
106 sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j] /= scale_param;
107 sa_part_func_mats.sa_forward_part_func_mat[i][j] = 1.;
108 scale_param_mat[i][j] = scale_param;
109 }
110 }
111 for i in (0 .. slp.0 + 2).rev() {
112 for j in (0 .. slp.1 + 2).rev() {
113 if i == slp.0 + 1 && j == slp.1 + 1 {
114 sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] = 1.;
115 continue;
116 } else if i == slp.0 + 1 || j == slp.1 + 1 || i == 0 || j == 0 {
117 continue;
118 }
119 let ca_score = if i + 1 == slp.0 + 1 && j + 1 == slp.1 + 1 {1.} else if i + 1 == slp.0 + 1 || j + 1 == slp.1 + 1 {0.} else {exp_sa_sps.ca_sm[&(sp.0[i], sp.1[j])]};
120 let part_func = sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i + 1][j + 1] * ca_score;
121 sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] += part_func;
122 sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] += part_func;
123 sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] += part_func;
124 sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i + 1][j] * exp_sa_sps.opening_gap_penalty;
125 sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i + 1][j] * exp_sa_sps.extending_gap_penalty;
126 sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i + 1][j] * exp_sa_sps.opening_gap_penalty;
127 sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j + 1] * exp_sa_sps.opening_gap_penalty;
128 sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j + 1] * exp_sa_sps.opening_gap_penalty;
129 sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j + 1] * exp_sa_sps.extending_gap_penalty;
130 let scale_param = scale_param_mat[i][j];
131 sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] /= scale_param;
132 sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] /= scale_param;
133 sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] /= scale_param;
134 }
135 }
136 (sa_part_func_mats, scale_param_mat)
137}
138
139fn get_char_align_prob_mat(sa_part_func_mats: &SaPartFuncMats, slp: &(usize, usize), scale_param_mat: &ScaleParamMat) -> ProbMat {
140 let mut cap_mat = vec![vec![0.; slp.1]; slp.0];
141 for i in 1 .. slp.0 + 1 {
142 for j in 1 .. slp.1 + 1 {
143 let cap = sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] * sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] * scale_param_mat[i][j];
144 assert!(0. <= cap && cap <= 1.);
145 cap_mat[i - 1][j - 1] = cap;
146 }
147 }
148 cap_mat
149}