use std::mem;
use bio::stats::LogProb;
pub trait GapParameters {
fn prob_gap_x(&self) -> LogProb;
fn prob_gap_y(&self) -> LogProb;
fn prob_gap_x_extend(&self) -> LogProb;
fn prob_gap_y_extend(&self) -> LogProb;
}
pub trait StartEndGapParameters {
#[inline]
#[allow(unused_variables)]
fn prob_start_gap_x(&self, i: usize) -> LogProb {
if self.free_start_gap_x() {
LogProb::ln_one()
} else {
LogProb::ln_zero()
}
}
fn free_start_gap_x(&self) -> bool;
fn free_end_gap_x(&self) -> bool;
}
pub trait EmissionParameters {
fn prob_emit_xy(&self, i: usize, j: usize) -> LogProb;
fn prob_emit_x(&self, i: usize) -> LogProb;
fn prob_emit_y(&self, j: usize) -> LogProb;
fn len_x(&self) -> usize;
fn len_y(&self) -> usize;
}
pub struct PairHMM {
fm: [Vec<LogProb>; 2],
fx: [Vec<LogProb>; 2],
fy: [Vec<LogProb>; 2],
prob_cols: Vec<LogProb>
}
impl PairHMM {
pub fn new() -> Self {
PairHMM {
fm: [Vec::new(), Vec::new()],
fx: [Vec::new(), Vec::new()],
fy: [Vec::new(), Vec::new()],
prob_cols: Vec::new()
}
}
pub fn prob_related<G, E>(
&mut self,
gap_params: &G,
emission_params: &E
) -> LogProb where
G: GapParameters + StartEndGapParameters,
E: EmissionParameters
{
for k in 0..2 {
self.fm[k].clear();
self.fx[k].clear();
self.fy[k].clear();
self.prob_cols.clear();
self.fm[k].resize(emission_params.len_y() + 1, LogProb::ln_zero());
self.fx[k].resize(emission_params.len_y() + 1, LogProb::ln_zero());
self.fy[k].resize(emission_params.len_y() + 1, LogProb::ln_zero());
if gap_params.free_end_gap_x() {
let c = (emission_params.len_x() * 3).saturating_sub(self.prob_cols.capacity());
self.prob_cols.reserve_exact(c);
}
}
let prob_no_gap = gap_params.prob_gap_x().ln_add_exp(gap_params.prob_gap_y()).ln_one_minus_exp();
let prob_no_gap_x_extend = gap_params.prob_gap_x_extend().ln_one_minus_exp();
let prob_no_gap_y_extend = gap_params.prob_gap_y_extend().ln_one_minus_exp();
let prob_gap_x = gap_params.prob_gap_x();
let prob_gap_y = gap_params.prob_gap_y();
let prob_gap_x_extend = gap_params.prob_gap_x_extend();
let prob_gap_y_extend = gap_params.prob_gap_y_extend();
let mut prev = 0;
let mut curr = 1;
self.fm[prev][0] = LogProb::ln_one();
for i in 0..emission_params.len_x() {
self.fm[prev][0] = gap_params.prob_start_gap_x(i);
let prob_emit_x = emission_params.prob_emit_x(i);
for j in 0..emission_params.len_y() {
let j_ = j + 1;
self.fm[curr][j_] = emission_params.prob_emit_xy(i, j) + LogProb::ln_sum_exp(&[
prob_no_gap + self.fm[prev][j_ - 1],
prob_no_gap_x_extend + self.fx[prev][j_ - 1],
prob_no_gap_y_extend + self.fy[prev][j_ - 1]
]);
self.fx[curr][j_] = prob_emit_x + (
prob_gap_y + self.fm[prev][j_]
).ln_add_exp(
prob_gap_y_extend + self.fx[prev][j_]
);
self.fy[curr][j_] = emission_params.prob_emit_y(j) + (
prob_gap_x + self.fm[curr][j_ - 1]
).ln_add_exp(
prob_gap_x_extend + self.fy[curr][j_ - 1]
);
}
if gap_params.free_end_gap_x() {
self.prob_cols.push(self.fm[curr].last().unwrap().clone());
self.prob_cols.push(self.fx[curr].last().unwrap().clone());
self.prob_cols.push(self.fy[curr].last().unwrap().clone());
}
mem::swap(&mut curr, &mut prev);
for v in self.fm[curr].iter_mut() {
*v = LogProb::ln_zero();
}
}
let p = if gap_params.free_end_gap_x() {
LogProb::ln_sum_exp(&self.prob_cols)
} else {
LogProb::ln_sum_exp(&[
self.fm[prev].last().unwrap().clone(),
self.fx[prev].last().unwrap().clone(),
self.fy[prev].last().unwrap().clone()
])
};
assert!(!p.is_nan());
if p > LogProb::ln_one() {
LogProb::ln_one()
} else {
p
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use bio::stats::{Prob, LogProb};
use constants;
fn prob_emit_xy(x: &[u8], y: &[u8], i: usize, j: usize) -> LogProb {
if x[i] == y[j] {
LogProb::from(Prob(1.0) - constants::PROB_ILLUMINA_SUBST)
} else {
LogProb::from(constants::PROB_ILLUMINA_SUBST / Prob(3.0))
}
}
fn prob_emit_x_or_y() -> LogProb {
LogProb::from(Prob(1.0) - constants::PROB_ILLUMINA_SUBST)
}
struct TestEmissionParams {
x: &'static [u8],
y: &'static [u8]
}
impl EmissionParameters for TestEmissionParams {
fn prob_emit_xy(&self, i: usize, j: usize) -> LogProb {
if self.x[i] == self.y[j] {
LogProb::from(Prob(1.0) - constants::PROB_ILLUMINA_SUBST)
} else {
LogProb::from(constants::PROB_ILLUMINA_SUBST / Prob(3.0))
}
}
fn prob_emit_x(&self, _: usize) -> LogProb {
prob_emit_x_or_y()
}
fn prob_emit_y(&self, _: usize) -> LogProb {
prob_emit_x_or_y()
}
fn len_x(&self) -> usize {
self.x.len()
}
fn len_y(&self) -> usize {
self.y.len()
}
}
struct TestGapParams;
impl GapParameters for TestGapParams {
fn prob_gap_x(&self) -> LogProb {
LogProb::from(constants::PROB_ILLUMINA_INS)
}
fn prob_gap_y(&self) -> LogProb {
LogProb::from(constants::PROB_ILLUMINA_DEL)
}
fn prob_gap_x_extend(&self) -> LogProb {
LogProb::ln_zero()
}
fn prob_gap_y_extend(&self) -> LogProb {
LogProb::ln_zero()
}
}
impl StartEndGapParameters for TestGapParams {
fn free_start_gap_x(&self) -> bool {
false
}
fn free_end_gap_x(&self) -> bool {
false
}
}
#[test]
fn test_same() {
let x = b"AGCTCGATCGATCGATC";
let y = b"AGCTCGATCGATCGATC";
let emission_params = TestEmissionParams { x: x, y: y};
let gap_params = TestGapParams;
let mut pair_hmm = PairHMM::new();
let p = pair_hmm.prob_related(&gap_params, &emission_params);
assert!(*p <= 0.0);
assert_relative_eq!(*p, 0.0, epsilon=0.1);
}
#[test]
fn test_insertion() {
let x = b"AGCTCGATCGATCGATC";
let y = b"AGCTCGATCTGATCGATCT";
let emission_params = TestEmissionParams { x: x, y: y};
let gap_params = TestGapParams;
let mut pair_hmm = PairHMM::new();
let p = pair_hmm.prob_related(&gap_params, &emission_params);
assert!(*p <= 0.0);
assert_relative_eq!(p.exp(), constants::PROB_ILLUMINA_INS.powi(2), epsilon=1e-11);
}
#[test]
fn test_deletion() {
let x = b"AGCTCGATCTGATCGATCT";
let y = b"AGCTCGATCGATCGATC";
let emission_params = TestEmissionParams { x: x, y: y};
let gap_params = TestGapParams;
let mut pair_hmm = PairHMM::new();
let p = pair_hmm.prob_related(&gap_params, &emission_params);
assert!(*p <= 0.0);
assert_relative_eq!(p.exp(), constants::PROB_ILLUMINA_DEL.powi(2), epsilon=1e-10);
}
#[test]
fn test_mismatch() {
let x = b"AGCTCGAGCGATCGATC";
let y = b"TGCTCGATCGATCGATC";
let emission_params = TestEmissionParams { x: x, y: y};
let gap_params = TestGapParams;
let mut pair_hmm = PairHMM::new();
let p = pair_hmm.prob_related(&gap_params, &emission_params);
assert!(*p <= 0.0);
assert_relative_eq!(p.exp(), (constants::PROB_ILLUMINA_SUBST / Prob(3.0)).powi(2), epsilon=1e-6);
}
}