1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
use utils::*; pub struct SaPartFuncMats { pub sa_forward_part_func_mat_4_char_align: PartFuncMat, pub sa_forward_part_func_mat_4_gap_1: PartFuncMat, pub sa_forward_part_func_mat_4_gap_2: PartFuncMat, pub sa_backward_part_func_mat_4_char_align: PartFuncMat, pub sa_backward_part_func_mat_4_gap_1: PartFuncMat, pub sa_backward_part_func_mat_4_gap_2: PartFuncMat, } impl SaPartFuncMats { fn new(slp: &(usize, usize)) -> SaPartFuncMats { let zero_mat = vec![vec![0.; slp.1 + 2]; slp.0 + 2]; SaPartFuncMats { sa_forward_part_func_mat_4_char_align: zero_mat.clone(), sa_forward_part_func_mat_4_gap_1: zero_mat.clone(), sa_forward_part_func_mat_4_gap_2: zero_mat.clone(), sa_backward_part_func_mat_4_char_align: zero_mat.clone(), sa_backward_part_func_mat_4_gap_1: zero_mat.clone(), sa_backward_part_func_mat_4_gap_2: zero_mat, } } } impl SaScoringParams { pub fn new(ca_sm: &CaScoreMat, opening_gap_penalty: SaScore, extending_gap_penalty: SaScore) -> SaScoringParams { SaScoringParams { ca_sm: ca_sm.clone(), opening_gap_penalty: opening_gap_penalty, extending_gap_penalty: extending_gap_penalty, } } } #[inline] pub fn durbin_algo(seq_pair: &SsPair, sa_sps: &SaScoringParams) -> ProbMat { let seq_len_pair = (seq_pair.0.len(), seq_pair.1.len()); let (sa_part_func_mats, scale_param_mat) = get_sa_part_func_mats_and_scale_param_mat(seq_pair, &seq_len_pair, sa_sps); get_char_align_prob_mat(&sa_part_func_mats, &seq_len_pair, &scale_param_mat) } #[inline] pub fn get_cap_mat_and_unaligned_char_psp(sp: &SsPair, sa_sps: &SaScoringParams) -> (ProbMat, ProbSeqPair) { let slp = (sp.0.len(), sp.1.len()); let cap_mat = durbin_algo(sp, sa_sps); let mut ucp_seq_pair = (vec![0.; slp.0], vec![0.; slp.1]); for i in 0 .. slp.0 { let mut ucp = 1.; for j in 0 .. slp.1 { ucp -= cap_mat[i][j]; } ucp_seq_pair.0[i] = ucp; } for j in 0 .. slp.1 { let mut ucp = 1.; for i in 0 .. slp.0 { ucp -= cap_mat[i][j]; } ucp_seq_pair.1[j] = ucp; } (cap_mat, ucp_seq_pair) } #[inline] pub fn get_sa_part_func_mats_and_scale_param_mat(sp: &SsPair, slp: &(usize, usize), sa_sps: &SaScoringParams) -> (SaPartFuncMats, ScaleParamMat) { let mut exp_sa_sps = sa_sps.clone(); for ca_score in exp_sa_sps.ca_sm.values_mut() { *ca_score = ca_score.exp(); }; exp_sa_sps.opening_gap_penalty = exp_sa_sps.opening_gap_penalty.exp(); exp_sa_sps.extending_gap_penalty = exp_sa_sps.extending_gap_penalty.exp(); let mut sa_part_func_mats = SaPartFuncMats::new(slp); let mut scale_param_mat = vec![vec![0.; slp.1 + 2]; slp.0 + 2]; scale_param_mat[0][0] = 1.; sa_part_func_mats.sa_forward_part_func_mat_4_char_align[0][0] = 1.; for i in 1 .. slp.0 + 1 { scale_param_mat[i][0] = exp_sa_sps.opening_gap_penalty * get_egp(&(1, i - 1), sa_sps).exp(); sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][0] = 1.; } for i in 1 .. slp.1 + 1 { scale_param_mat[0][i] = exp_sa_sps.opening_gap_penalty * get_egp(&(1, i - 1), sa_sps).exp(); sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[0][i] = 1.; } for i in 1 .. slp.0 + 2 { for j in 1 .. slp.1 + 2 { let ca_score = if i == slp.0 + 1 && j == slp.1 + 1 { 1. } else if i == slp.0 + 1 || j == slp.1 + 1 { 0. } else { exp_sa_sps.ca_sm[&(sp.0[i - 1], sp.1[j - 1])] }; let mut scale_param = 0.; let forward_part_func = (sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i - 1][j - 1] + sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i - 1][j - 1] + sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i - 1][j - 1]) * ca_score; scale_param += forward_part_func; sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] = forward_part_func; if i < slp.0 + 1 && j < slp.1 + 1 { let forward_part_func = sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i - 1][j] * exp_sa_sps.opening_gap_penalty + sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i - 1][j] * exp_sa_sps.extending_gap_penalty; scale_param += forward_part_func; sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][j] = forward_part_func; let forward_part_func = sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j - 1] * exp_sa_sps.opening_gap_penalty + sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j - 1] * exp_sa_sps.extending_gap_penalty; scale_param += forward_part_func; sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j] = forward_part_func; } scale_param_mat[i][j] = scale_param; sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] /= scale_param; if i < slp.0 + 1 && j < slp.1 + 1 { sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][j] /= scale_param; sa_part_func_mats.sa_forward_part_func_mat_4_gap_2[i][j] /= scale_param; } } } sa_part_func_mats.sa_backward_part_func_mat_4_char_align[slp.0 + 1][slp.1 + 1] = 1. / scale_param_mat[slp.0 + 1][slp.1 + 1]; for i in 0 .. slp.0 { sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[i][slp.1 + 1] = exp_sa_sps.opening_gap_penalty * get_egp(&(i, slp.0 - 1), sa_sps).exp() / scale_param_mat[i][slp.1 + 1]; } for i in 0 .. slp.1 { sa_part_func_mats.sa_forward_part_func_mat_4_gap_1[slp.0 + 1][i] = exp_sa_sps.opening_gap_penalty * get_egp(&(i, slp.1 - 1), sa_sps).exp() / scale_param_mat[slp.0 + 1][i]; } for i in (0 .. slp.0 + 1).rev() { for j in (0 .. slp.1 + 1).rev() { let ca_score = if i == slp.0 && j == slp.1 { 1. } else if i == slp.0 || j == slp.1 { 0. } else { exp_sa_sps.ca_sm[&(sp.0[i], sp.1[j])] }; let scale_param = scale_param_mat[i][j]; sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j] = (sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i + 1][j + 1] * ca_score + (sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i + 1][j] + sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j + 1]) * exp_sa_sps.opening_gap_penalty) / scale_param; if i > 0 && j > 0 { sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i][j] = (sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i + 1][j + 1] * ca_score + sa_part_func_mats.sa_backward_part_func_mat_4_gap_1[i + 1][j] * exp_sa_sps.extending_gap_penalty) / scale_param; sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j] = (sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i + 1][j + 1] * ca_score + sa_part_func_mats.sa_backward_part_func_mat_4_gap_2[i][j + 1] * exp_sa_sps.extending_gap_penalty) / scale_param; } } } (sa_part_func_mats, scale_param_mat) } #[inline] fn get_char_align_prob_mat(sa_part_func_mats: &SaPartFuncMats, slp: &(usize, usize), scale_param_mat: &ScaleParamMat) -> ProbMat { let mut cap_mat = vec![vec![0.; slp.1]; slp.0]; for i in 1 .. slp.0 + 1 { for j in 1 .. slp.1 + 1 { cap_mat[i - 1][j - 1] = scale_param_mat[i][j] * sa_part_func_mats.sa_forward_part_func_mat_4_char_align[i][j] * sa_part_func_mats.sa_backward_part_func_mat_4_char_align[i][j]; } } cap_mat } #[inline] fn get_egp(pp: &PosPair, sa_sps: &SaScoringParams) -> SaScore { (pp.1 + 1 - pp.0) as SaScore * sa_sps.extending_gap_penalty }