libprosic/model/priors/
tumor_normal.rs

1use std::f64;
2
3use bio::stats::{LogProb, Prob};
4use itertools::Itertools;
5use itertools_num::linspace;
6use ordered_float::NotNan;
7
8use model::{AlleleFreq, ContinuousAlleleFreqs, DiscreteAlleleFreqs, PairPileup, Variant};
9
10use priors::InfiniteSitesNeutralVariationModel;
11use priors::PairModel;
12
13/// Tumor-normal prior model using ploidy, heterozygosity (in normal tissue) and tumor mutation rate
14/// per effective cell division.
15/// The latter is the quotient mu/beta, with mu being the mutation rate and beta being the fraction
16/// of effective cell divisions (both lineages survive). Alone, the parameters are not observable.
17/// However, mu/beta can be estimated from e.g. SNV calls. It is the slope of the linear model
18/// `y = mu/beta * (x -  1 / fmax)``, with `x` being the reciprocal of the observed allele frequencies
19/// and y being the number of observed mutations corresponding to each frequency
20/// (see Williams et al. Nature Genetics 2016).
21///
22/// Based on the Williams model, the tail probability of a somatic allele frequency F > f can be expressed
23/// as
24/// `Pr(F > f) = M(f) / n = mu/beta (1 / f - 1 / fmax) / n`
25/// with `n` being the size of the genome and `fmax` is the expected allele frequency of clonal variants
26/// at the beginning of tumor evolution.
27/// From this, we can obtain the cumulative distribution function as `Pr(F <= f) = 1 - Pr(F > f)`.
28/// Consequently, the density becomes the first derivative, i.e. `Pr(F = f) = - M(f)' / n = mu/beta * 1/n * 1/f²` for f>=fmin
29/// with `fmin = sqrt(mu/beta * 1/n)`.
30///
31/// The prior probability for a germline allele frequency f (e.g. 0.0, 0.5, 1.0) in the tumor is
32/// calculated with an `InfiniteSitesNeutralVariationModel`. This is valid since clonal variants
33/// come from the underlying normal tissue and Williams model assumes that allele frequencies
34/// do not change during tumor evolution (no genetic drift, no selection).
35///
36/// For the final prior, we consider a given tumor purity and calculate the combined prior
37/// for all possible allele frequency combinations satisfying `af = purity * af_tumor + (1-purity) * af_normal`.
38pub struct TumorNormalModel {
39    pub normal_model: InfiniteSitesNeutralVariationModel,
40    effective_mutation_rate: f64,
41    deletion_factor: f64,
42    insertion_factor: f64,
43    genome_size: u64,
44    pub allele_freqs_tumor: ContinuousAlleleFreqs,
45    pub allele_freqs_normal: DiscreteAlleleFreqs,
46    pub grid_points: usize,
47    af_min: AlleleFreq,
48    ploidy: u32,
49}
50
51impl TumorNormalModel {
52    /// Create new model.
53    ///
54    /// # Arguments
55    ///
56    /// * `ploidy` - the ploidy in the corresponding normal sample (e.g. 2 for diploid)
57    /// * `effective_mutation_rate` - the SNV mutation rate per effective cell division in the tumor
58    /// * `deletion_factor` - ratio of deletions compared to SNV mutation rate
59    /// * `insertion_factor` - ratio of insertions compared to SNV mutation rate
60    /// * `genome_size` - the size of the genome
61    /// * `heterozygosity` - expected heterozygosity in the corresponding normal
62    pub fn new(
63        ploidy: u32,
64        effective_mutation_rate: f64,
65        deletion_factor: f64,
66        insertion_factor: f64,
67        genome_size: u64,
68        heterozygosity: Prob,
69    ) -> Self {
70        assert!(effective_mutation_rate < genome_size as f64);
71        let af_min = AlleleFreq((effective_mutation_rate / genome_size as f64).sqrt());
72
73        TumorNormalModel {
74            normal_model: InfiniteSitesNeutralVariationModel::new(1, ploidy, heterozygosity),
75            effective_mutation_rate: effective_mutation_rate,
76            deletion_factor: deletion_factor,
77            insertion_factor: insertion_factor,
78            genome_size: genome_size,
79            allele_freqs_tumor: ContinuousAlleleFreqs::inclusive(0.0..1.0),
80            // TODO make max_amplification parameter (=1) configurable and consider it properly
81            // in the normal model.
82            allele_freqs_normal: DiscreteAlleleFreqs::feasible(ploidy),
83            grid_points: 51,
84            af_min: af_min,
85            ploidy: ploidy,
86        }
87    }
88
89    pub fn somatic_prior_prob(&self, af_somatic: AlleleFreq, variant: &Variant) -> LogProb {
90        // af_somatic can become negative, meaning that at some point a variant from normal was lost
91        // in one cell (LOH!!). Again, the frequency corresponds to time in tumor evolution since the model
92        // assumes that all frequencies stay constant. Hence, we can simply take the absolute value
93        // of af_somatic here. This is equivalent to calculating
94        // af_tumor = af_normal - af_somatic
95        // for that case.
96        let af_somatic = af_somatic.abs();
97
98        // mu/beta * 1 / (af**2 * n)
99        if af_somatic <= *self.af_min {
100            return LogProb::ln_one();
101        }
102
103        // adjust effective mutation rate by type-specific factor
104        let factor = match variant {
105            &Variant::Deletion(_) => self.deletion_factor.ln(),
106            &Variant::Insertion(_) => self.insertion_factor.ln(),
107            &Variant::SNV(_) => 0.0, // no factor for SNVs
108            &Variant::None => 0.0,   // no factor for potential homozygous reference sites
109        };
110
111        LogProb(
112            self.effective_mutation_rate.ln() + factor
113                - (2.0 * af_somatic.ln() + (self.genome_size as f64).ln()),
114        )
115    }
116
117    pub fn normal_prior_prob(&self, af_normal: AlleleFreq, _: &Variant) -> LogProb {
118        let m = *af_normal * self.ploidy as f64;
119        if relative_eq!(m % 1.0, 0.0) {
120            // if m is discrete
121            self.normal_model.prior_prob(m.round() as u32)
122        } else {
123            // invalid allele frequency
124            LogProb::ln_zero()
125        }
126    }
127}
128
129impl PairModel<ContinuousAlleleFreqs, DiscreteAlleleFreqs> for TumorNormalModel {
130    fn prior_prob(
131        &self,
132        af_tumor: AlleleFreq,
133        af_normal: AlleleFreq,
134        variant: &Variant,
135    ) -> LogProb {
136        // af_tumor = af_normal + af_somatic
137        let af_somatic = af_tumor - af_normal;
138        let p = self.somatic_prior_prob(af_somatic, variant)
139            + self.normal_prior_prob(af_normal, variant);
140        assert!(*p <= 0.0);
141        p
142    }
143
144    fn joint_prob(
145        &self,
146        af_tumor: &ContinuousAlleleFreqs,
147        af_normal: &DiscreteAlleleFreqs,
148        pileup: &mut PairPileup<ContinuousAlleleFreqs, DiscreteAlleleFreqs, Self>,
149    ) -> LogProb {
150        let prob = LogProb::ln_sum_exp(
151            &af_normal
152                .iter()
153                .map(|&af_normal| {
154                    let p_tumor: LogProb;
155                    {
156                        let mut density = |af_tumor| {
157                            let af_tumor = AlleleFreq(af_tumor);
158                            self.prior_prob(af_tumor, af_normal, &pileup.variant)
159                                + pileup.case_likelihood(af_tumor, Some(af_normal))
160                        };
161
162                        p_tumor = if af_tumor.start == af_tumor.end {
163                            density(*af_tumor.start)
164                        } else {
165                            LogProb::ln_simpsons_integrate_exp(
166                                density,
167                                *af_tumor.start,
168                                *af_tumor.end,
169                                self.grid_points,
170                            )
171                        };
172                    }
173                    let p_normal = pileup.control_likelihood(af_normal, None);
174                    println!(
175                        "prior model probs: af={} {:?} {:?}",
176                        af_normal, p_tumor, p_normal
177                    );
178                    let prob = p_tumor + p_normal;
179
180                    prob
181                })
182                .collect_vec(),
183        );
184
185        prob
186    }
187
188    fn marginal_prob(
189        &self,
190        pileup: &mut PairPileup<ContinuousAlleleFreqs, DiscreteAlleleFreqs, Self>,
191    ) -> LogProb {
192        let p = self
193            .joint_prob(self.allele_freqs().0, self.allele_freqs().1, pileup)
194            .ln_add_exp(
195                // add prob for allele frequency zero (the density is non-continuous there)
196                self.joint_prob(
197                    &ContinuousAlleleFreqs::inclusive(0.0..0.0),
198                    &DiscreteAlleleFreqs::new(vec![AlleleFreq(0.0)]),
199                    pileup,
200                ),
201            );
202        p
203    }
204
205    fn map(
206        &self,
207        pileup: &mut PairPileup<ContinuousAlleleFreqs, DiscreteAlleleFreqs, Self>,
208    ) -> (AlleleFreq, AlleleFreq) {
209        let af_case = linspace(
210            *self.allele_freqs_tumor.start,
211            *self.allele_freqs_tumor.end,
212            self.grid_points,
213        );
214        let (_, (map_normal, map_tumor)) = self
215            .allele_freqs_normal
216            .iter()
217            .cartesian_product(af_case)
218            .minmax_by_key(|&(&af_normal, af_tumor)| {
219                let af_tumor = AlleleFreq(af_tumor);
220                let p = self.prior_prob(af_tumor, af_normal, &pileup.variant)
221                    + pileup.case_likelihood(af_tumor, Some(af_normal))
222                    + pileup.control_likelihood(af_normal, None);
223                NotNan::new(*p).expect("posterior probability is NaN")
224            })
225            .into_option()
226            .expect("prior has empty allele frequency spectrum");
227
228        (AlleleFreq(map_tumor), *map_normal)
229    }
230
231    fn allele_freqs(&self) -> (&ContinuousAlleleFreqs, &DiscreteAlleleFreqs) {
232        (&self.allele_freqs_tumor, &self.allele_freqs_normal)
233    }
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use bio::stats::{LogProb, Prob};
240    use itertools_num::linspace;
241    use model::priors::tests::create_obs_vector;
242    use model::priors::PairModel;
243    use model::{likelihood, AlleleFreq, ContinuousAlleleFreqs, PairPileup, Variant};
244
245    #[test]
246    fn print_priors() {
247        let variant = Variant::SNV(b'A');
248        let model = TumorNormalModel::new(2, 30000.0, 0.5, 0.5, 3e9 as u64, Prob(1.25E-4));
249        for af_normal in &[0.0, 0.5, 1.0] {
250            println!("af_normal={}:", af_normal);
251            print!("[");
252            for p in linspace(0.0, 1.0, 20).map(|af_tumor| {
253                model.prior_prob(AlleleFreq(af_tumor), AlleleFreq(*af_normal), &variant)
254            }) {
255                print!("{}, ", p.exp());
256            }
257            println!("]");
258        }
259    }
260
261    #[test]
262    fn test_tnm_het_zero() {
263        let heterozygosity = Prob(0.0); //Prob(1.25E-4);
264        let model = TumorNormalModel::new(2, 3000.0, 0.5, 0.5, 3e9 as u64, heterozygosity);
265
266        // tumor and normal both hom ref
267        let af_tumor = ContinuousAlleleFreqs::inclusive(0.0..0.0);
268        let af_normal = DiscreteAlleleFreqs::new(vec![AlleleFreq(0.0)]);
269
270        let variant = Variant::SNV(b'T');
271
272        let tumor_sample_model = likelihood::LatentVariableModel::new(1.0);
273        let normal_sample_model = likelihood::LatentVariableModel::new(1.0);
274
275        let tumor_obs = create_obs_vector(5, 0);
276        let normal_obs = create_obs_vector(5, 0);
277
278        let mut pileup = PairPileup::new(
279            tumor_obs.clone(),
280            normal_obs.clone(),
281            variant.clone(),
282            &model,
283            tumor_sample_model,
284            normal_sample_model,
285        );
286        assert_eq!(
287            model.prior_prob(af_tumor.start, af_normal[0], &variant),
288            LogProb::ln_one()
289        );
290        assert_eq!(pileup.joint_prob(&af_tumor, &af_normal), LogProb::ln_one());
291        assert_relative_eq!(
292            pileup.posterior_prob(&af_tumor, &af_normal).exp(),
293            1.0,
294            epsilon = 0.008
295        );
296        assert_eq!(
297            pileup.map_allele_freqs(),
298            (AlleleFreq(0.0), AlleleFreq(0.0))
299        );
300    }
301
302    #[test]
303    fn test_tnm_het_real() {
304        let heterozygosity = Prob(1.25E-4);
305        let model = TumorNormalModel::new(2, 3000.0, 0.5, 0.5, 3e9 as u64, heterozygosity);
306
307        // tumor and normal both hom ref
308        let af_tumor = ContinuousAlleleFreqs::inclusive(0.0..0.0);
309        let af_normal = DiscreteAlleleFreqs::new(vec![AlleleFreq(0.0)]);
310
311        let variant = Variant::SNV(b'T');
312
313        let tumor_sample_model = likelihood::LatentVariableModel::new(1.0);
314        let normal_sample_model = likelihood::LatentVariableModel::with_single_sample();
315
316        let tumor_obs = create_obs_vector(5, 0);
317        let normal_obs = create_obs_vector(5, 0);
318
319        let mut pileup = PairPileup::new(
320            tumor_obs.clone(),
321            normal_obs.clone(),
322            variant.clone(),
323            &model,
324            tumor_sample_model,
325            normal_sample_model,
326        );
327
328        // priors assuming: heterozygosity = 1.25E-4, ploidy = 2
329        let normal_prior_prob = 0.9998125;
330        println!(
331            "TNM.somatic_prior_prob(af_tumor.start = {}): {}",
332            af_tumor.start,
333            model.somatic_prior_prob(af_tumor.start, &variant).exp()
334        );
335        assert_eq!(
336            model.somatic_prior_prob(af_tumor.start, &variant),
337            LogProb::ln_one()
338        );
339        println!(
340            "TNM.normal_prior_prob(af_normal[0] = {}): {}",
341            af_normal[0],
342            model.normal_prior_prob(af_normal[0], &variant).exp()
343        );
344        assert_relative_eq!(
345            model.normal_prior_prob(af_normal[0], &variant).exp(),
346            normal_prior_prob,
347            epsilon = 0.000001
348        );
349        println!(
350            "TNM.prior_prob(af_tumor.start = {}, af_normal[0] = {}): {}",
351            af_tumor.start,
352            af_normal[0],
353            model
354                .prior_prob(af_tumor.start, af_normal[0], &variant)
355                .exp()
356        );
357        assert_relative_eq!(
358            model
359                .prior_prob(af_tumor.start, af_normal[0], &variant)
360                .exp(),
361            normal_prior_prob,
362            epsilon = 0.000001
363        );
364        let aft_full = model.allele_freqs().0;
365        let afn_full = model.allele_freqs().1;
366        assert_relative_eq!(
367            pileup.joint_prob(&af_tumor, &af_normal).exp(),
368            normal_prior_prob,
369            epsilon = 0.0000001
370        );
371        println!(
372            "pileup.joint_prob(af_tumor, af_normal): {}",
373            pileup.joint_prob(&af_tumor, &af_normal).exp()
374        );
375        println!(
376            "pileup.joint_prob(full spectrum): {}",
377            pileup.joint_prob(&aft_full, &afn_full).exp()
378        );
379        println!("pileup.marginal_prob: {}", pileup.marginal_prob().exp());
380        println!(
381            "pileup.posterior_prob: {}",
382            pileup.posterior_prob(&af_tumor, &af_normal).exp()
383        );
384        assert_relative_eq!(
385            pileup.posterior_prob(&af_tumor, &af_normal).exp(),
386            0.9933008088509733,
387            epsilon = 0.00000001
388        );
389        assert_eq!(
390            pileup.map_allele_freqs(),
391            (AlleleFreq(0.0), AlleleFreq(0.0))
392        );
393    }
394}