bio_seq_algos/
durbin_algo.rs

1use utils::*;
2
3pub struct SaPartFuncMats {
4  pub sa_forward_part_func_mat: PartFuncMat,
5  pub sa_forward_part_func_mat_4_char_align: PartFuncMat,
6  pub sa_forward_part_func_mat_4_gap_1: PartFuncMat,
7  pub sa_forward_part_func_mat_4_gap_2: PartFuncMat,
8  pub sa_backward_part_func_mat_4_char_align: PartFuncMat,
9  pub sa_backward_part_func_mat_4_gap_1: PartFuncMat,
10  pub sa_backward_part_func_mat_4_gap_2: PartFuncMat,
11}
12
13impl SaPartFuncMats {
14  fn new(slp: &(usize, usize)) -> SaPartFuncMats {
15    let zero_mat = vec![vec![0.; slp.1 + 2]; slp.0 + 2];
16    SaPartFuncMats {
17      sa_forward_part_func_mat: zero_mat.clone(),
18      sa_forward_part_func_mat_4_char_align: zero_mat.clone(),
19      sa_forward_part_func_mat_4_gap_1: zero_mat.clone(),
20      sa_forward_part_func_mat_4_gap_2: zero_mat.clone(),
21      sa_backward_part_func_mat_4_char_align: zero_mat.clone(),
22      sa_backward_part_func_mat_4_gap_1: zero_mat.clone(),
23      sa_backward_part_func_mat_4_gap_2: zero_mat,
24    }
25  }
26}
27
28impl SaScoringParams {
29  pub fn new(ca_sm: &CaScoreMat, opening_gap_penalty: SaScore, extending_gap_penalty: SaScore) -> SaScoringParams {
30    SaScoringParams {
31      ca_sm: ca_sm.clone(),
32      opening_gap_penalty: opening_gap_penalty,
33      extending_gap_penalty: extending_gap_penalty,
34    }
35  }
36}
37
38pub fn durbin_algo(seq_pair: &SsPair, sa_sps: &SaScoringParams) -> ProbMat {
39  let seq_len_pair = (seq_pair.0.len(), seq_pair.1.len());
40  let (sa_part_func_mats, scale_param_mat) = get_sa_part_func_mats_and_scale_param_mat(seq_pair, &seq_len_pair, sa_sps);
41  get_char_align_prob_mat(&sa_part_func_mats, &seq_len_pair, &scale_param_mat)
42}
43
44pub fn get_cap_mat_and_unaligned_char_psp(sp: &SsPair, sa_sps: &SaScoringParams) -> (ProbMat, ProbSeqPair) {
45  let slp = (sp.0.len(), sp.1.len());
46  let cap_mat = durbin_algo(sp, sa_sps);
47  let mut ucp_seq_pair = (vec![0.; slp.0], vec![0.; slp.1]);
48  for i in 0 .. slp.0 {
49    let mut ucp = 1.;
50    for j in 0 .. slp.1 {
51      ucp -= cap_mat[i][j];
52    }
53    assert!(0. <= ucp && ucp <= 1.);
54    ucp_seq_pair.0[i] = ucp;
55  }
56  for j in 0 .. slp.1 {
57    let mut ucp = 1.;
58    for i in 0 .. slp.0 {
59      ucp -= cap_mat[i][j];
60    }
61    assert!(0. <= ucp && ucp <= 1.);
62    ucp_seq_pair.1[j] = ucp;
63  }
64  (cap_mat, ucp_seq_pair)
65}
66
67pub fn get_sa_part_func_mats_and_scale_param_mat(sp: &SsPair, slp: &(usize, usize), sa_sps: &SaScoringParams) -> (SaPartFuncMats, ScaleParamMat) {
68  let mut exp_sa_sps = sa_sps.clone();
69  for ca_score in exp_sa_sps.ca_sm.values_mut() {
70    *ca_score = ca_score.exp();
71  };
72  exp_sa_sps.opening_gap_penalty = exp_sa_sps.opening_gap_penalty.exp();
73  exp_sa_sps.extending_gap_penalty = exp_sa_sps.extending_gap_penalty.exp();
74  let mut sa_part_func_mats = SaPartFuncMats::new(slp);
75  let mut scale_param_mat = vec![vec![0.; slp.1 + 2]; slp.0 + 2];
76  for i in 0 .. slp.0 + 2 {
77    for j in 0 .. slp.1 + 2 {
78      if (i == 0 && j == 0) || (i == slp.0 + 1 && j == slp.1 + 1) {
79        sa_part_func_mats.sa_forward_part_func_mat[i][j] = 1.;
80        sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] = 1.;
81        scale_param_mat[i][j] = 1.;
82        continue;
83      } else if i == slp.0 + 1 || j == slp.1 + 1 {
84        scale_param_mat[i][j] = 1.;
85        continue;
86      }
87      let mut scale_param = 0.;
88      if i > 0 && j > 0 {
89        let ca_score = exp_sa_sps.ca_sm[&(sp.0[i - 1], sp.1[j - 1])];
90        let forward_part_func = sa_part_func_mats.sa_forward_part_func_mat[i - 1][j - 1] * ca_score;
91        scale_param += forward_part_func;
92        sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] = forward_part_func;
93      }
94      if i > 0 {
95        let forward_part_func = (sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i - 1][j] + sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i - 1][j]) * exp_sa_sps.opening_gap_penalty + sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i - 1][j] * exp_sa_sps.extending_gap_penalty;
96        scale_param += forward_part_func;
97        sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][j] = forward_part_func;
98      }
99      if j > 0 {
100        let forward_part_func = (sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j - 1] + sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][j - 1]) * exp_sa_sps.opening_gap_penalty + sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j - 1] * exp_sa_sps.extending_gap_penalty;
101        scale_param += forward_part_func;
102        sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j] = forward_part_func;
103      }
104      sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] /= scale_param;
105      sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][j] /= scale_param;
106      sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j] /= scale_param;
107      sa_part_func_mats.sa_forward_part_func_mat[i][j] = 1.;
108      scale_param_mat[i][j] = scale_param;
109    }
110  }
111  for i in (0 .. slp.0 + 2).rev() {
112    for j in (0 .. slp.1 + 2).rev() {
113      if i == slp.0 + 1 && j == slp.1 + 1 {
114        sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] = 1.;
115        continue;
116      } else if i == slp.0 + 1 || j == slp.1 + 1 || i == 0 || j == 0 {
117        continue;
118      }
119      let ca_score = if i + 1 == slp.0 + 1 && j + 1 == slp.1 + 1 {1.} else if i + 1 == slp.0 + 1 || j + 1 == slp.1 + 1 {0.} else {exp_sa_sps.ca_sm[&(sp.0[i], sp.1[j])]};
120      let part_func = sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i + 1][j + 1] * ca_score;
121      sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] += part_func;
122      sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] += part_func;
123      sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] += part_func;
124      sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i + 1][j] * exp_sa_sps.opening_gap_penalty;
125      sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i + 1][j] * exp_sa_sps.extending_gap_penalty;
126      sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i + 1][j] * exp_sa_sps.opening_gap_penalty;
127      sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j + 1] * exp_sa_sps.opening_gap_penalty;
128      sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j + 1] * exp_sa_sps.opening_gap_penalty;
129      sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] += sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j + 1] * exp_sa_sps.extending_gap_penalty;
130      let scale_param = scale_param_mat[i][j];
131      sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] /= scale_param;
132      sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] /= scale_param;
133      sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] /= scale_param;
134    }
135  }
136  (sa_part_func_mats, scale_param_mat)
137}
138
139fn get_char_align_prob_mat(sa_part_func_mats: &SaPartFuncMats, slp: &(usize, usize), scale_param_mat: &ScaleParamMat) -> ProbMat {
140  let mut cap_mat = vec![vec![0.; slp.1]; slp.0];
141  for i in 1 .. slp.0 + 1 {
142    for j in 1 .. slp.1 + 1 {
143      let cap = sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] * sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] * scale_param_mat[i][j];
144      assert!(0. <= cap && cap <= 1.);
145      cap_mat[i - 1][j - 1] = cap;
146    }
147  }
148  cap_mat
149}