use bio::stats::{bayesian::model::Likelihood, LogProb};
use lru::LruCache;
use crate::utils::NUMERICAL_EPSILON;
use crate::variants::evidence::observations::pileup::Pileup;
use crate::variants::evidence::observations::read_observation::ProcessedReadObservation;
use crate::variants::model::bias::Artifacts;
use crate::variants::model::AlleleFreq;
pub(crate) type ContaminatedSampleCache = LruCache<ContaminatedSampleEvent, LogProb>;
pub(crate) type SingleSampleCache = LruCache<Event, LogProb>;
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
pub(crate) struct Event {
pub(crate) allele_freq: AlleleFreq,
pub(crate) artifacts: Artifacts,
pub(crate) is_discrete: bool,
}
impl Event {
pub(crate) fn is_artifact(&self) -> bool {
self.artifacts.is_artifact()
}
}
fn prob_sample_alt(observation: &ProcessedReadObservation, allele_freq: LogProb) -> LogProb {
if allele_freq != LogProb::ln_one() {
(allele_freq + observation.prob_sample_alt).cap_numerical_overshoot(NUMERICAL_EPSILON)
} else {
allele_freq
}
}
pub(crate) trait ContaminatedSamplePairView<T> {
fn primary(&self) -> &T;
fn secondary(&self) -> &T;
}
impl<T> ContaminatedSamplePairView<T> for Vec<T> {
fn primary(&self) -> &T {
&self[0]
}
fn secondary(&self) -> &T {
&self[1]
}
}
#[derive(PartialEq, Eq, Debug, Clone, Hash)]
pub(crate) struct ContaminatedSampleEvent {
pub(crate) primary: Event,
pub(crate) secondary: Event,
}
#[derive(Clone, Copy, Debug)]
pub(crate) struct ContaminatedSampleLikelihoodModel {
purity: LogProb,
impurity: LogProb,
}
impl Default for ContaminatedSampleLikelihoodModel {
fn default() -> Self {
ContaminatedSampleLikelihoodModel::new(1.0)
}
}
impl ContaminatedSampleLikelihoodModel {
pub(crate) fn new(purity: f64) -> Self {
assert!(purity > 0.0 && purity <= 1.0);
let purity = LogProb(purity.ln());
ContaminatedSampleLikelihoodModel {
purity,
impurity: purity.ln_one_minus_exp(),
}
}
fn likelihood_observation(
&self,
allele_freq_primary: LogProb,
allele_freq_secondary: LogProb,
biases_primary: &Artifacts,
biases_secondary: &Artifacts,
observation: &ProcessedReadObservation,
) -> LogProb {
let prob_primary =
self.purity + likelihood_mapping(allele_freq_primary, biases_primary, observation);
let prob_secondary = self.impurity
+ likelihood_mapping(allele_freq_secondary, biases_secondary, observation);
let total = (observation.prob_mapping() + prob_secondary.ln_add_exp(prob_primary))
.ln_add_exp(
observation.prob_mismapping()
+ observation.prob_missed_allele
+ biases_primary.prob_any(observation),
);
assert!(!total.is_nan());
total
}
}
impl Likelihood<ContaminatedSampleCache> for ContaminatedSampleLikelihoodModel {
type Event = ContaminatedSampleEvent;
type Data = Pileup;
fn compute(
&self,
events: &Self::Event,
pileup: &Self::Data,
cache: &mut ContaminatedSampleCache,
) -> LogProb {
if let Some(prob) = cache.get(events) {
*prob
} else {
let ln_af_primary = LogProb(events.primary.allele_freq.ln());
let ln_af_secondary = LogProb(events.secondary.allele_freq.ln());
let likelihood =
pileup
.read_observations()
.iter()
.fold(LogProb::ln_one(), |prob, obs| {
let lh = self.likelihood_observation(
ln_af_primary,
ln_af_secondary,
&events.primary.artifacts,
&events.secondary.artifacts,
obs,
);
prob + lh
});
assert!(!likelihood.is_nan());
cache.put(events.clone(), likelihood);
likelihood
}
}
}
#[derive(Clone, Copy, Debug, Default)]
pub(crate) struct SampleLikelihoodModel {}
impl SampleLikelihoodModel {
pub(crate) fn new() -> Self {
SampleLikelihoodModel {}
}
fn likelihood_observation(
&self,
allele_freq: LogProb,
biases: &Artifacts,
observation: &ProcessedReadObservation,
) -> LogProb {
let prob = likelihood_mapping(allele_freq, biases, observation);
let total = (observation.prob_mapping() + prob).ln_add_exp(
observation.prob_mismapping()
+ observation.prob_missed_allele
+ biases.prob_any(observation),
);
assert!(!total.is_nan());
total
}
}
fn likelihood_mapping(
allele_freq: LogProb,
biases: &Artifacts,
observation: &ProcessedReadObservation,
) -> LogProb {
let prob_sample_alt = prob_sample_alt(observation, allele_freq);
let prob_sample_ref = prob_sample_alt.ln_one_minus_exp();
let prob_bias_alt = biases.prob_alt(observation);
let prob_bias_ref = biases.prob_ref(observation);
let prob = LogProb::ln_sum_exp(&[
prob_sample_alt + prob_bias_alt + observation.prob_alt,
prob_sample_ref + observation.prob_ref + prob_bias_ref,
]);
assert!(!prob.is_nan());
prob
}
impl Likelihood<SingleSampleCache> for SampleLikelihoodModel {
type Event = Event;
type Data = Pileup;
fn compute(&self, event: &Event, pileup: &Pileup, cache: &mut SingleSampleCache) -> LogProb {
if let Some(prob) = cache.get(event) {
*prob
} else {
let ln_af = LogProb(event.allele_freq.ln());
let likelihood =
pileup
.read_observations()
.iter()
.fold(LogProb::ln_one(), |prob, obs| {
let lh = self.likelihood_observation(ln_af, &event.artifacts, obs);
prob + lh
});
assert!(!likelihood.is_nan());
cache.put(event.clone(), likelihood);
likelihood
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::variants::model::bias::Artifacts;
use crate::variants::model::likelihood;
use crate::variants::model::tests::observation;
use bio::stats::LogProb;
use itertools_num::linspace;
fn biases() -> Artifacts {
Artifacts::none()
}
fn event(allele_freq: f64) -> Event {
Event {
allele_freq: AlleleFreq(allele_freq),
artifacts: biases(),
is_discrete: true,
}
}
#[test]
fn test_likelihood_observation_absent_single() {
let observation = observation(LogProb::ln_one(), LogProb::ln_zero(), LogProb::ln_one());
let model = SampleLikelihoodModel::new();
let lh =
model.likelihood_observation(LogProb(AlleleFreq(0.0).ln()), &biases(), &observation);
assert_relative_eq!(*lh, *biases().prob_ref(&observation));
}
#[test]
fn test_likelihood_observation_absent() {
let model = ContaminatedSampleLikelihoodModel::new(1.0);
let observation = observation(LogProb::ln_one(), LogProb::ln_zero(), LogProb::ln_one());
let lh = model.likelihood_observation(
LogProb(AlleleFreq(0.0).ln()),
LogProb(AlleleFreq(0.0).ln()),
&biases(),
&biases(),
&observation,
);
assert_relative_eq!(*lh, *biases().prob_ref(&observation));
}
#[test]
fn test_likelihood_pileup_absent() {
let model = ContaminatedSampleLikelihoodModel::new(1.0);
let mut observations = Pileup::default();
for _ in 0..10 {
observations.read_observations_mut().push(observation(
LogProb::ln_one(),
LogProb::ln_zero(),
LogProb::ln_one(),
));
}
let mut cache = likelihood::ContaminatedSampleCache::new(100);
let lh = model.compute(
&ContaminatedSampleEvent {
primary: event(0.0),
secondary: event(0.0),
},
&observations,
&mut cache,
);
assert_relative_eq!(
*lh,
*observations
.read_observations()
.iter()
.map(|observation| biases().prob_ref(&observation))
.sum::<LogProb>()
);
}
#[test]
fn test_likelihood_pileup_absent_single() {
let model = SampleLikelihoodModel::new();
let mut observations = Pileup::default();
for _ in 0..10 {
observations.read_observations_mut().push(observation(
LogProb::ln_one(),
LogProb::ln_zero(),
LogProb::ln_one(),
));
}
let mut cache = likelihood::SingleSampleCache::new(100);
let evt = event(0.0);
let lh = model.compute(&evt, &observations, &mut cache);
assert_relative_eq!(
*lh,
*observations
.read_observations()
.iter()
.map(|observation| biases().prob_ref(&observation))
.sum::<LogProb>()
);
assert!(cache.get(&evt).is_some())
}
#[test]
#[allow(clippy::float_cmp)]
fn test_likelihood_pileup() {
let model = ContaminatedSampleLikelihoodModel::new(1.0);
let mut observations = Pileup::default();
for _ in 0..5 {
observations.read_observations_mut().push(observation(
LogProb::ln_one(),
LogProb::ln_one(),
LogProb::ln_zero(),
));
}
for _ in 0..5 {
observations.read_observations_mut().push(observation(
LogProb::ln_one(),
LogProb::ln_zero(),
LogProb::ln_one(),
));
}
let mut cache = likelihood::ContaminatedSampleCache::new(100);
let lh = model.compute(
&ContaminatedSampleEvent {
primary: event(0.5),
secondary: event(0.0),
},
&observations,
&mut cache,
);
for af in linspace(0.0, 1.0, 10) {
if af != 0.5 {
let evt = ContaminatedSampleEvent {
primary: event(af),
secondary: event(0.0),
};
let l = model.compute(&evt, &observations, &mut cache);
assert!(lh > l);
assert!(cache.get(&evt).is_some());
}
}
}
}