1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
extern crate phyloprob; pub use phyloprob::*; pub type Mea = Prob; #[derive(Clone)] pub struct MeaSs { pub bp_pos_pairs: PosPairs, pub ea: Mea, } pub type Meas = Vec<Mea>; pub type MeaMat = FxHashMap<PosPair, Mea>; pub type Poss = Vec<Pos>; pub type PosSeqsWithPoss = FxHashMap<Pos, Poss>; pub type PosPairs = Vec<PosPair>; pub type PosPairSeqsWithPosPairs = FxHashMap<PosPair, PosPairs>; pub type MeaSsChar = u8; pub type MeaSsStr = Vec<MeaSsChar>; impl MeaSs { pub fn new() -> MeaSs { MeaSs { bp_pos_pairs: PosPairs::new(), ea: 0., } } } pub const UNPAIRING_BASE: MeaSsChar = '.' as MeaSsChar; pub const BASE_PAIRING_LEFT_BASE: MeaSsChar = '(' as MeaSsChar; pub const BASE_PAIRING_RIGHT_BASE: MeaSsChar = ')' as MeaSsChar; pub fn phylofold(bpp_mat: &SparseProbMat, upp_mat: &Probs, seq_len: usize, gamma: Prob) -> MeaSs { let mut mea_mat = vec![vec![0.; seq_len]; seq_len]; let seq_len = seq_len as Pos; for sub_seq_len in 1 .. seq_len - 1 { for i in 1 .. seq_len - sub_seq_len { let j = i + sub_seq_len - 1; let (long_i, long_j) = (i as usize, j as usize); if i == j { mea_mat[long_i][long_j] = upp_mat[long_i]; continue; } let mut mea = mea_mat[long_i + 1][long_j] + upp_mat[long_i]; let ea = mea_mat[long_i][long_j - 1] + upp_mat[long_j]; if ea > mea { mea = ea; } let pos_pair = (i, j); if bpp_mat.contains_key(&pos_pair) { let ea = mea_mat[long_i + 1][long_j - 1] + gamma * bpp_mat[&pos_pair]; if ea > mea { mea = ea; } } for k in long_i .. long_j { let ea = mea_mat[long_i][k] + mea_mat[k + 1][long_j]; if ea > mea { mea = ea; } } mea_mat[long_i][long_j] = mea; } } let mut mea_ss = MeaSs::new(); let mut pos_pair_stack = vec![(1, seq_len - 2)]; while pos_pair_stack.len() > 0 { let pos_pair = pos_pair_stack.pop().unwrap(); let (i, j) = pos_pair; if j <= i {continue;} let (long_i, long_j) = (i as usize, j as usize); let mea = mea_mat[long_i][long_j]; if mea == mea_mat[long_i + 1][long_j] + upp_mat[long_i] { pos_pair_stack.push((i + 1, j)); } else if mea == mea_mat[long_i][long_j - 1] + upp_mat[long_j] { pos_pair_stack.push((i, j - 1)); } else if bpp_mat.contains_key(&pos_pair) && mea == mea_mat[long_i + 1][long_j - 1] + gamma * bpp_mat[&pos_pair] { pos_pair_stack.push((i + 1, j - 1)); mea_ss.bp_pos_pairs.push(pos_pair); } else { for k in i .. j { let long_k = k as usize; if mea == mea_mat[long_i][long_k] + mea_mat[long_k + 1][long_j] { pos_pair_stack.push((i, k)); pos_pair_stack.push((k + 1, j)); break; } } } } mea_ss.ea = mea_mat[1][seq_len as usize - 2]; mea_ss }