ecdna_lib/
abc.rs

1use anyhow::{ensure, Context};
2use hdrhistogram::Histogram;
3use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
4use std::{fs, path::Path};
5
6use derive_builder::Builder;
7
8use crate::distribution::EcDNADistribution;
9
10#[derive(Debug, Deserialize)]
11pub struct ABCResultFitness {
12    pub result: ABCResult,
13    pub rates: Vec<f32>,
14}
15
16impl Serialize for ABCResultFitness {
17    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
18    where
19        S: Serializer,
20    {
21        let mut state = serializer.serialize_struct("ABCResultFitness", 12)?;
22        state.serialize_field("idx", &self.result.idx)?;
23        state.serialize_field("mean", &self.result.mean)?;
24        state.serialize_field("mean_stat", &self.result.mean_stat)?;
25        state.serialize_field("frequency", &self.result.frequency)?;
26        state.serialize_field("frequency_stat", &self.result.frequency_stat)?;
27        state.serialize_field("entropy", &self.result.entropy)?;
28        state.serialize_field("entropy_stat", &self.result.entropy_stat)?;
29        state.serialize_field("ecdna_stat", &self.result.ecdna_stat)?;
30        state.serialize_field("kmax", &self.result.kmax)?;
31        state.serialize_field("kmax_stat", &self.result.kmax_stat)?;
32        state.serialize_field("pop_size", &self.result.pop_size)?;
33        state.serialize_field("b0", &self.rates[0])?;
34        state.serialize_field("b1", &self.rates[1])?;
35        if self.rates.len() > 2 {
36            state.serialize_field("d0", &self.rates[2])?;
37            state.serialize_field("d1", &self.rates[3])?;
38            if self.rates.len() > 4 {
39                unreachable!()
40            }
41        } else {
42            state.serialize_field("d0", &0f32)?;
43            state.serialize_field("d1", &0f32)?;
44        }
45        state.serialize_field("dropped_nminus", &self.result.dropped_nminus)?;
46
47        state.end()
48    }
49}
50
51pub struct ABCResultsFitness(pub Vec<ABCResultFitness>);
52
53impl ABCResultsFitness {
54    pub fn save(self, path2folder: &Path, verbosity: u8) -> anyhow::Result<()> {
55        fs::create_dir_all(path2folder).with_context(|| "Cannot create dir".to_string())?;
56
57        let mut abc = path2folder.join("abc");
58        abc.set_extension("csv");
59        if verbosity > 1 {
60            println!("Saving ABC results to {:#?}", abc);
61        }
62        let mut wtr = csv::Writer::from_path(abc)?;
63
64        for res in self.0 {
65            if verbosity > 0 {
66                println!(
67                    "Statistics of run {}: Mean: {}, Freq: {}, Entropy: {}",
68                    res.result.idx, res.result.mean, res.result.frequency, res.result.entropy
69                );
70            }
71            wtr.serialize(&res)
72                .with_context(|| "Cannot serialize the results from ABC inference".to_string())?;
73        }
74        wtr.flush()?;
75        Ok(())
76    }
77
78    pub fn from_csv(path2csv: &Path) -> anyhow::Result<Self> {
79        ensure!(path2csv.is_file());
80        ensure!(path2csv.extension().unwrap() == "csv");
81
82        let mut records: Vec<ABCResultFitness> = Vec::new();
83
84        let mut rdr = csv::Reader::from_path(path2csv)
85            .with_context(|| format!("cannot open csv file {:#?}", path2csv))?;
86        for result in rdr.deserialize() {
87            let record = result?;
88            records.push(record);
89        }
90        Ok(ABCResultsFitness(records))
91    }
92}
93
94/// The data used to perform ABC.
95#[derive(Debug)]
96pub struct Data {
97    pub distribution: Option<EcDNADistribution>,
98    pub mean: Option<f32>,
99    pub frequency: Option<f32>,
100    pub entropy: Option<f32>,
101}
102
103/// Perform the ABC rejection algorithm for one run to infer the most probable
104/// values of the rates based on the patient's data.
105///
106/// ABC infer the most probable values of the birth-death rates (proliferation
107/// rates and death rates) by comparing the summary statistics of the run
108/// generated by the birth-death process against the summary statistics of the
109/// patient's data.
110///
111/// When testing multiple statistics with ABC, save runs only if all statistics
112/// pass the tests
113pub struct ABCRejection;
114
115impl ABCRejection {
116    pub fn run(
117        mut builder: ABCResultBuilder,
118        distribution: &EcDNADistribution,
119        target: &Data,
120        drop_nminus: bool,
121        verbosity: u8,
122    ) -> ABCResult {
123        //! Run the ABC rejection method by comparing a ecDNA `distribution`
124        //! against the `target`.
125        let pop_size = distribution.get_nminus() + distribution.compute_nplus();
126        builder.pop_size(pop_size);
127        if let Some(target_distribution) = &target.distribution {
128            if target_distribution.compute_nplus() <= 7 || distribution.compute_nplus() <= 7 {
129                if verbosity > 0 {
130                    println!(
131                        "Not enough samples in the distributions: {} and {}",
132                        target_distribution.compute_nplus(),
133                        distribution.compute_nplus()
134                    );
135                }
136            } else {
137                builder.ecdna_stat(Some(target_distribution.ks_distance(distribution)));
138            }
139        };
140
141        let frequency = distribution.compute_frequency();
142        builder.frequency(frequency);
143        let frequency_stat = target.frequency.as_ref().map(|target_frequency| {
144            if !drop_nminus {
145                relative_change(target_frequency, &frequency)
146            } else {
147                f32::NAN
148            }
149        });
150        builder.frequency_stat(frequency_stat);
151
152        let entropy = distribution.compute_entropy();
153        builder.entropy(entropy);
154        let entropy_stat = target
155            .entropy
156            .as_ref()
157            .map(|target_entropy| relative_change(target_entropy, &entropy));
158        builder.entropy_stat(entropy_stat);
159
160        let k_upper_bound = 4000;
161        let mut hist = Histogram::<u64>::new_with_max(k_upper_bound, 2).unwrap();
162        for (ecdna, nb_cells) in distribution.create_histogram().into_iter() {
163            hist.record_n(ecdna as u64, nb_cells)
164                .expect("should be in range");
165        }
166
167        let mean = hist.mean() as f32;
168        builder.mean(mean);
169        let mean_stat = target
170            .mean
171            .as_ref()
172            .map(|target_mean| relative_change(target_mean, &mean));
173        builder.mean_stat(mean_stat);
174
175        if let Some(target_distribution) = target.distribution.as_ref() {
176            // take the quantile such that there are 10 cells in the quantile
177            let quantile_target = Self::compute_quantile_to_get_10_cells(
178                *target_distribution.get_nminus() + target_distribution.compute_nplus(),
179            );
180            // different quantiles because we dont assume same population size
181            let quantile = Self::compute_quantile_to_get_10_cells(
182                *distribution.get_nminus() + distribution.compute_nplus(),
183            );
184
185            let mut hist_target = Histogram::<u64>::new_with_max(k_upper_bound, 2).unwrap();
186            for (ecdna, nb_cells) in target_distribution.create_histogram().into_iter() {
187                hist_target
188                    .record_n(ecdna as u64, nb_cells)
189                    .expect("should be in range");
190            }
191            // else it means no cells left
192            if let Some(q) = quantile {
193                let value_at_q = hist.value_at_quantile(q) as u16;
194                builder.kmax(Some(value_at_q));
195
196                if let Some(q_target) = quantile_target {
197                    builder.kmax_stat(Some(relative_change(
198                        &(hist_target.value_at_quantile(q_target) as f32),
199                        &(value_at_q as f32),
200                    )));
201                }
202            }
203        }
204
205        if verbosity > 0 {
206            println!(
207                "The stats are: ks:{:#?}, mean: {:#?}, freq: {:#?}, entropy: {:#?}, k_max: {:#?}",
208                builder.ecdna_stat.unwrap_or(None),
209                mean_stat,
210                frequency_stat,
211                entropy_stat,
212                builder.kmax_stat.unwrap_or(None)
213            );
214        }
215        builder.dropped_nminus(drop_nminus);
216
217        builder.build().expect("Cannot build ABC results")
218    }
219
220    fn compute_quantile_to_get_10_cells(nb_cells: u64) -> Option<f64> {
221        //! Returns `None` if `nb_cells` is 0;
222        //! returns 0.999 if `nb_cells` <= `10`;
223        //! else returns Some 1 - 10/`nb_cells`.
224        if nb_cells == 0u64 {
225            return None;
226        } else if nb_cells <= 10 {
227            return Some(0.999);
228        }
229        Some(1. - 10f64 / (nb_cells as f64))
230    }
231}
232
233#[derive(Builder, Debug, Deserialize)]
234pub struct ABCResult {
235    pub idx: usize,
236    pub mean: f32,
237    #[builder(default)]
238    pub mean_stat: Option<f32>,
239    pub frequency: f32,
240    #[builder(default)]
241    pub frequency_stat: Option<f32>,
242    pub entropy: f32,
243    #[builder(default)]
244    pub entropy_stat: Option<f32>,
245    #[builder(default)]
246    pub ecdna_stat: Option<f32>,
247    #[builder(default)]
248    pub kmax: Option<u16>,
249    #[builder(default)]
250    pub kmax_stat: Option<f32>,
251    pub pop_size: u64,
252    pub dropped_nminus: bool,
253}
254
255/// Relative change between two scalars
256pub fn relative_change(x1: &f32, &x2: &f32) -> f32 {
257    (x1 - x2).abs() / x1
258}
259
260#[cfg(test)]
261mod tests {
262    use quickcheck_macros::quickcheck;
263
264    use crate::test_util::NonEmptyDistribtionWithNPlusCells;
265
266    use super::*;
267
268    #[quickcheck]
269    fn abc_run_small_sample_with_nminus_target(
270        distribution: NonEmptyDistribtionWithNPlusCells,
271        drop_nminus: bool,
272    ) -> bool {
273        let target_distr = EcDNADistribution::from(vec![1, 2, 0]);
274        let mut builder = ABCResultBuilder::default();
275        builder.idx(1);
276        let target = Data {
277            distribution: Some(target_distr),
278            mean: None,
279            frequency: None,
280            entropy: None,
281        };
282        let results = ABCRejection::run(builder, &distribution.0, &target, drop_nminus, 0);
283        results.ecdna_stat.is_none() && results.dropped_nminus == drop_nminus
284    }
285
286    #[quickcheck]
287    fn abc_run_small_sample_with_nminus(
288        target_distr: NonEmptyDistribtionWithNPlusCells,
289        drop_nminus: bool,
290    ) -> bool {
291        let distribution = EcDNADistribution::from(vec![1, 2, 0]);
292        let mut builder = ABCResultBuilder::default();
293        builder.idx(1);
294        let target = Data {
295            distribution: Some(target_distr.0),
296            mean: None,
297            frequency: None,
298            entropy: None,
299        };
300        let results = ABCRejection::run(builder, &distribution, &target, drop_nminus, 0);
301        results.ecdna_stat.is_none() && results.dropped_nminus == drop_nminus
302    }
303
304    #[quickcheck]
305    fn abc_run_test(
306        distribution: NonEmptyDistribtionWithNPlusCells,
307        idx: usize,
308        drop_nminus: bool,
309    ) -> bool {
310        let mut builder = ABCResultBuilder::default();
311        builder.idx(idx);
312        let mean = distribution.0.compute_mean();
313        let frequency = distribution.0.compute_frequency();
314        let entropy = distribution.0.compute_entropy();
315        builder.mean(mean);
316        builder.frequency(frequency);
317        builder.entropy(entropy);
318        let target = Data {
319            distribution: Some(distribution.0.clone()),
320            mean: Some(mean),
321            frequency: Some(frequency),
322            entropy: Some(entropy),
323        };
324
325        let results = ABCRejection::run(builder, &distribution.0, &target, drop_nminus, 0);
326        let freq_test = if !drop_nminus {
327            (results.frequency_stat.unwrap() - 0f32).abs() < f32::EPSILON
328        } else {
329            results.frequency_stat.unwrap().is_nan()
330        };
331        (results.ecdna_stat.unwrap() - 0f32).abs() < f32::EPSILON
332            && (results.mean_stat.unwrap() - 0f32).abs() < f32::EPSILON
333            && freq_test
334            && (results.entropy_stat.unwrap() - 0f32).abs() < 10f32 * f32::EPSILON
335            && results.dropped_nminus == drop_nminus
336            && (results.kmax_stat.unwrap() - 0f32).abs() < f32::EPSILON
337    }
338
339    #[quickcheck]
340    fn abc_run_no_distribution_test(
341        distribution: NonEmptyDistribtionWithNPlusCells,
342        idx: usize,
343        drop_nminus: bool,
344    ) -> bool {
345        let mut builder = ABCResultBuilder::default();
346        builder.idx(idx);
347        let mean = distribution.0.compute_mean();
348        let frequency = distribution.0.compute_frequency();
349        let entropy = distribution.0.compute_entropy();
350        builder.mean(mean);
351        builder.frequency(frequency);
352        builder.entropy(entropy);
353        let target = Data {
354            distribution: None,
355            mean: Some(mean),
356            frequency: Some(frequency),
357            entropy: Some(entropy),
358        };
359
360        let results = ABCRejection::run(builder, &distribution.0, &target, drop_nminus, 0);
361        let freq_test = if !drop_nminus {
362            (results.frequency_stat.unwrap() - 0f32).abs() < f32::EPSILON
363        } else {
364            results.frequency_stat.unwrap().is_nan()
365        };
366        results.ecdna_stat.is_none()
367            && (results.mean_stat.unwrap() - 0f32).abs() < f32::EPSILON
368            && freq_test
369            && (results.entropy_stat.unwrap() - 0f32).abs() < 10f32 * f32::EPSILON
370            && results.dropped_nminus == drop_nminus
371            && results.kmax_stat.is_none()
372    }
373
374    #[quickcheck]
375    fn abc_run_distribution_only_test(
376        distribution: NonEmptyDistribtionWithNPlusCells,
377        idx: usize,
378        drop_nminus: bool,
379    ) -> bool {
380        let mut builder = ABCResultBuilder::default();
381        builder.idx(idx);
382        let target = Data {
383            distribution: Some(distribution.0.clone()),
384            mean: None,
385            frequency: None,
386            entropy: None,
387        };
388
389        let results = ABCRejection::run(builder, &distribution.0, &target, drop_nminus, 0);
390        (results.ecdna_stat.unwrap() - 0f32).abs() < f32::EPSILON
391            && results.mean_stat.is_none()
392            && results.frequency_stat.is_none()
393            && results.entropy_stat.is_none()
394            && results.dropped_nminus == drop_nminus
395    }
396
397    #[quickcheck]
398    fn compute_quantile_to_get_10_cells_test_overflow(nb_cells: u64) -> bool {
399        let res = ABCRejection::compute_quantile_to_get_10_cells(nb_cells);
400        if nb_cells == 0 {
401            res.is_none()
402        } else {
403            res.unwrap().is_normal()
404        }
405    }
406
407    #[test]
408    fn compute_quantile_to_get_10_cells_test() {
409        assert!(
410            (ABCRejection::compute_quantile_to_get_10_cells(10).unwrap() - 0.999).abs()
411                < f64::EPSILON
412        );
413        assert!(
414            (ABCRejection::compute_quantile_to_get_10_cells(100).unwrap() - (1. - 0.1f64)).abs()
415                < f64::EPSILON
416        );
417        assert!(
418            (ABCRejection::compute_quantile_to_get_10_cells(1_000_000).unwrap()
419                - (1. - (1f64 / 100_000f64)))
420                .abs()
421                < f64::EPSILON
422        );
423    }
424}