1use anyhow::{ensure, Context};
2use hdrhistogram::Histogram;
3use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
4use std::{fs, path::Path};
5
6use derive_builder::Builder;
7
8use crate::distribution::EcDNADistribution;
9
10#[derive(Debug, Deserialize)]
11pub struct ABCResultFitness {
12 pub result: ABCResult,
13 pub rates: Vec<f32>,
14}
15
16impl Serialize for ABCResultFitness {
17 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
18 where
19 S: Serializer,
20 {
21 let mut state = serializer.serialize_struct("ABCResultFitness", 12)?;
22 state.serialize_field("idx", &self.result.idx)?;
23 state.serialize_field("mean", &self.result.mean)?;
24 state.serialize_field("mean_stat", &self.result.mean_stat)?;
25 state.serialize_field("frequency", &self.result.frequency)?;
26 state.serialize_field("frequency_stat", &self.result.frequency_stat)?;
27 state.serialize_field("entropy", &self.result.entropy)?;
28 state.serialize_field("entropy_stat", &self.result.entropy_stat)?;
29 state.serialize_field("ecdna_stat", &self.result.ecdna_stat)?;
30 state.serialize_field("kmax", &self.result.kmax)?;
31 state.serialize_field("kmax_stat", &self.result.kmax_stat)?;
32 state.serialize_field("pop_size", &self.result.pop_size)?;
33 state.serialize_field("b0", &self.rates[0])?;
34 state.serialize_field("b1", &self.rates[1])?;
35 if self.rates.len() > 2 {
36 state.serialize_field("d0", &self.rates[2])?;
37 state.serialize_field("d1", &self.rates[3])?;
38 if self.rates.len() > 4 {
39 unreachable!()
40 }
41 } else {
42 state.serialize_field("d0", &0f32)?;
43 state.serialize_field("d1", &0f32)?;
44 }
45 state.serialize_field("dropped_nminus", &self.result.dropped_nminus)?;
46
47 state.end()
48 }
49}
50
51pub struct ABCResultsFitness(pub Vec<ABCResultFitness>);
52
53impl ABCResultsFitness {
54 pub fn save(self, path2folder: &Path, verbosity: u8) -> anyhow::Result<()> {
55 fs::create_dir_all(path2folder).with_context(|| "Cannot create dir".to_string())?;
56
57 let mut abc = path2folder.join("abc");
58 abc.set_extension("csv");
59 if verbosity > 1 {
60 println!("Saving ABC results to {:#?}", abc);
61 }
62 let mut wtr = csv::Writer::from_path(abc)?;
63
64 for res in self.0 {
65 if verbosity > 0 {
66 println!(
67 "Statistics of run {}: Mean: {}, Freq: {}, Entropy: {}",
68 res.result.idx, res.result.mean, res.result.frequency, res.result.entropy
69 );
70 }
71 wtr.serialize(&res)
72 .with_context(|| "Cannot serialize the results from ABC inference".to_string())?;
73 }
74 wtr.flush()?;
75 Ok(())
76 }
77
78 pub fn from_csv(path2csv: &Path) -> anyhow::Result<Self> {
79 ensure!(path2csv.is_file());
80 ensure!(path2csv.extension().unwrap() == "csv");
81
82 let mut records: Vec<ABCResultFitness> = Vec::new();
83
84 let mut rdr = csv::Reader::from_path(path2csv)
85 .with_context(|| format!("cannot open csv file {:#?}", path2csv))?;
86 for result in rdr.deserialize() {
87 let record = result?;
88 records.push(record);
89 }
90 Ok(ABCResultsFitness(records))
91 }
92}
93
94#[derive(Debug)]
96pub struct Data {
97 pub distribution: Option<EcDNADistribution>,
98 pub mean: Option<f32>,
99 pub frequency: Option<f32>,
100 pub entropy: Option<f32>,
101}
102
103pub struct ABCRejection;
114
115impl ABCRejection {
116 pub fn run(
117 mut builder: ABCResultBuilder,
118 distribution: &EcDNADistribution,
119 target: &Data,
120 drop_nminus: bool,
121 verbosity: u8,
122 ) -> ABCResult {
123 let pop_size = distribution.get_nminus() + distribution.compute_nplus();
126 builder.pop_size(pop_size);
127 if let Some(target_distribution) = &target.distribution {
128 if target_distribution.compute_nplus() <= 7 || distribution.compute_nplus() <= 7 {
129 if verbosity > 0 {
130 println!(
131 "Not enough samples in the distributions: {} and {}",
132 target_distribution.compute_nplus(),
133 distribution.compute_nplus()
134 );
135 }
136 } else {
137 builder.ecdna_stat(Some(target_distribution.ks_distance(distribution)));
138 }
139 };
140
141 let frequency = distribution.compute_frequency();
142 builder.frequency(frequency);
143 let frequency_stat = target.frequency.as_ref().map(|target_frequency| {
144 if !drop_nminus {
145 relative_change(target_frequency, &frequency)
146 } else {
147 f32::NAN
148 }
149 });
150 builder.frequency_stat(frequency_stat);
151
152 let entropy = distribution.compute_entropy();
153 builder.entropy(entropy);
154 let entropy_stat = target
155 .entropy
156 .as_ref()
157 .map(|target_entropy| relative_change(target_entropy, &entropy));
158 builder.entropy_stat(entropy_stat);
159
160 let k_upper_bound = 4000;
161 let mut hist = Histogram::<u64>::new_with_max(k_upper_bound, 2).unwrap();
162 for (ecdna, nb_cells) in distribution.create_histogram().into_iter() {
163 hist.record_n(ecdna as u64, nb_cells)
164 .expect("should be in range");
165 }
166
167 let mean = hist.mean() as f32;
168 builder.mean(mean);
169 let mean_stat = target
170 .mean
171 .as_ref()
172 .map(|target_mean| relative_change(target_mean, &mean));
173 builder.mean_stat(mean_stat);
174
175 if let Some(target_distribution) = target.distribution.as_ref() {
176 let quantile_target = Self::compute_quantile_to_get_10_cells(
178 *target_distribution.get_nminus() + target_distribution.compute_nplus(),
179 );
180 let quantile = Self::compute_quantile_to_get_10_cells(
182 *distribution.get_nminus() + distribution.compute_nplus(),
183 );
184
185 let mut hist_target = Histogram::<u64>::new_with_max(k_upper_bound, 2).unwrap();
186 for (ecdna, nb_cells) in target_distribution.create_histogram().into_iter() {
187 hist_target
188 .record_n(ecdna as u64, nb_cells)
189 .expect("should be in range");
190 }
191 if let Some(q) = quantile {
193 let value_at_q = hist.value_at_quantile(q) as u16;
194 builder.kmax(Some(value_at_q));
195
196 if let Some(q_target) = quantile_target {
197 builder.kmax_stat(Some(relative_change(
198 &(hist_target.value_at_quantile(q_target) as f32),
199 &(value_at_q as f32),
200 )));
201 }
202 }
203 }
204
205 if verbosity > 0 {
206 println!(
207 "The stats are: ks:{:#?}, mean: {:#?}, freq: {:#?}, entropy: {:#?}, k_max: {:#?}",
208 builder.ecdna_stat.unwrap_or(None),
209 mean_stat,
210 frequency_stat,
211 entropy_stat,
212 builder.kmax_stat.unwrap_or(None)
213 );
214 }
215 builder.dropped_nminus(drop_nminus);
216
217 builder.build().expect("Cannot build ABC results")
218 }
219
220 fn compute_quantile_to_get_10_cells(nb_cells: u64) -> Option<f64> {
221 if nb_cells == 0u64 {
225 return None;
226 } else if nb_cells <= 10 {
227 return Some(0.999);
228 }
229 Some(1. - 10f64 / (nb_cells as f64))
230 }
231}
232
233#[derive(Builder, Debug, Deserialize)]
234pub struct ABCResult {
235 pub idx: usize,
236 pub mean: f32,
237 #[builder(default)]
238 pub mean_stat: Option<f32>,
239 pub frequency: f32,
240 #[builder(default)]
241 pub frequency_stat: Option<f32>,
242 pub entropy: f32,
243 #[builder(default)]
244 pub entropy_stat: Option<f32>,
245 #[builder(default)]
246 pub ecdna_stat: Option<f32>,
247 #[builder(default)]
248 pub kmax: Option<u16>,
249 #[builder(default)]
250 pub kmax_stat: Option<f32>,
251 pub pop_size: u64,
252 pub dropped_nminus: bool,
253}
254
255pub fn relative_change(x1: &f32, &x2: &f32) -> f32 {
257 (x1 - x2).abs() / x1
258}
259
260#[cfg(test)]
261mod tests {
262 use quickcheck_macros::quickcheck;
263
264 use crate::test_util::NonEmptyDistribtionWithNPlusCells;
265
266 use super::*;
267
268 #[quickcheck]
269 fn abc_run_small_sample_with_nminus_target(
270 distribution: NonEmptyDistribtionWithNPlusCells,
271 drop_nminus: bool,
272 ) -> bool {
273 let target_distr = EcDNADistribution::from(vec![1, 2, 0]);
274 let mut builder = ABCResultBuilder::default();
275 builder.idx(1);
276 let target = Data {
277 distribution: Some(target_distr),
278 mean: None,
279 frequency: None,
280 entropy: None,
281 };
282 let results = ABCRejection::run(builder, &distribution.0, &target, drop_nminus, 0);
283 results.ecdna_stat.is_none() && results.dropped_nminus == drop_nminus
284 }
285
286 #[quickcheck]
287 fn abc_run_small_sample_with_nminus(
288 target_distr: NonEmptyDistribtionWithNPlusCells,
289 drop_nminus: bool,
290 ) -> bool {
291 let distribution = EcDNADistribution::from(vec![1, 2, 0]);
292 let mut builder = ABCResultBuilder::default();
293 builder.idx(1);
294 let target = Data {
295 distribution: Some(target_distr.0),
296 mean: None,
297 frequency: None,
298 entropy: None,
299 };
300 let results = ABCRejection::run(builder, &distribution, &target, drop_nminus, 0);
301 results.ecdna_stat.is_none() && results.dropped_nminus == drop_nminus
302 }
303
304 #[quickcheck]
305 fn abc_run_test(
306 distribution: NonEmptyDistribtionWithNPlusCells,
307 idx: usize,
308 drop_nminus: bool,
309 ) -> bool {
310 let mut builder = ABCResultBuilder::default();
311 builder.idx(idx);
312 let mean = distribution.0.compute_mean();
313 let frequency = distribution.0.compute_frequency();
314 let entropy = distribution.0.compute_entropy();
315 builder.mean(mean);
316 builder.frequency(frequency);
317 builder.entropy(entropy);
318 let target = Data {
319 distribution: Some(distribution.0.clone()),
320 mean: Some(mean),
321 frequency: Some(frequency),
322 entropy: Some(entropy),
323 };
324
325 let results = ABCRejection::run(builder, &distribution.0, &target, drop_nminus, 0);
326 let freq_test = if !drop_nminus {
327 (results.frequency_stat.unwrap() - 0f32).abs() < f32::EPSILON
328 } else {
329 results.frequency_stat.unwrap().is_nan()
330 };
331 (results.ecdna_stat.unwrap() - 0f32).abs() < f32::EPSILON
332 && (results.mean_stat.unwrap() - 0f32).abs() < f32::EPSILON
333 && freq_test
334 && (results.entropy_stat.unwrap() - 0f32).abs() < 10f32 * f32::EPSILON
335 && results.dropped_nminus == drop_nminus
336 && (results.kmax_stat.unwrap() - 0f32).abs() < f32::EPSILON
337 }
338
339 #[quickcheck]
340 fn abc_run_no_distribution_test(
341 distribution: NonEmptyDistribtionWithNPlusCells,
342 idx: usize,
343 drop_nminus: bool,
344 ) -> bool {
345 let mut builder = ABCResultBuilder::default();
346 builder.idx(idx);
347 let mean = distribution.0.compute_mean();
348 let frequency = distribution.0.compute_frequency();
349 let entropy = distribution.0.compute_entropy();
350 builder.mean(mean);
351 builder.frequency(frequency);
352 builder.entropy(entropy);
353 let target = Data {
354 distribution: None,
355 mean: Some(mean),
356 frequency: Some(frequency),
357 entropy: Some(entropy),
358 };
359
360 let results = ABCRejection::run(builder, &distribution.0, &target, drop_nminus, 0);
361 let freq_test = if !drop_nminus {
362 (results.frequency_stat.unwrap() - 0f32).abs() < f32::EPSILON
363 } else {
364 results.frequency_stat.unwrap().is_nan()
365 };
366 results.ecdna_stat.is_none()
367 && (results.mean_stat.unwrap() - 0f32).abs() < f32::EPSILON
368 && freq_test
369 && (results.entropy_stat.unwrap() - 0f32).abs() < 10f32 * f32::EPSILON
370 && results.dropped_nminus == drop_nminus
371 && results.kmax_stat.is_none()
372 }
373
374 #[quickcheck]
375 fn abc_run_distribution_only_test(
376 distribution: NonEmptyDistribtionWithNPlusCells,
377 idx: usize,
378 drop_nminus: bool,
379 ) -> bool {
380 let mut builder = ABCResultBuilder::default();
381 builder.idx(idx);
382 let target = Data {
383 distribution: Some(distribution.0.clone()),
384 mean: None,
385 frequency: None,
386 entropy: None,
387 };
388
389 let results = ABCRejection::run(builder, &distribution.0, &target, drop_nminus, 0);
390 (results.ecdna_stat.unwrap() - 0f32).abs() < f32::EPSILON
391 && results.mean_stat.is_none()
392 && results.frequency_stat.is_none()
393 && results.entropy_stat.is_none()
394 && results.dropped_nminus == drop_nminus
395 }
396
397 #[quickcheck]
398 fn compute_quantile_to_get_10_cells_test_overflow(nb_cells: u64) -> bool {
399 let res = ABCRejection::compute_quantile_to_get_10_cells(nb_cells);
400 if nb_cells == 0 {
401 res.is_none()
402 } else {
403 res.unwrap().is_normal()
404 }
405 }
406
407 #[test]
408 fn compute_quantile_to_get_10_cells_test() {
409 assert!(
410 (ABCRejection::compute_quantile_to_get_10_cells(10).unwrap() - 0.999).abs()
411 < f64::EPSILON
412 );
413 assert!(
414 (ABCRejection::compute_quantile_to_get_10_cells(100).unwrap() - (1. - 0.1f64)).abs()
415 < f64::EPSILON
416 );
417 assert!(
418 (ABCRejection::compute_quantile_to_get_10_cells(1_000_000).unwrap()
419 - (1. - (1f64 / 100_000f64)))
420 .abs()
421 < f64::EPSILON
422 );
423 }
424}