1use std::collections::{BTreeMap, BTreeSet, HashMap};
2use std::convert::TryFrom;
3use std::fmt;
4use std::fs::File;
5use std::io::Read;
6use std::ops::{Deref, DerefMut};
7use std::path::Path;
8use std::sync::Mutex;
9
10use anyhow::{Context, Result};
11use vec_map::VecMap;
12
13pub(crate) mod formula;
14pub(crate) mod vaftree;
15
16use crate::errors;
17use crate::grammar::formula::FormulaTerminal;
18pub(crate) use crate::grammar::formula::{Formula, VAFRange, VAFSpectrum, VAFUniverse};
19pub(crate) use crate::grammar::vaftree::VAFTree;
20use crate::variants::model::{AlleleFreq, VariantType};
21use itertools::Itertools;
22use serde::{de, Deserializer};
23
24#[derive(Clone, Debug)]
27pub(crate) struct SampleInfo<T> {
28 inner: Vec<T>,
29}
30
31impl<T> SampleInfo<T> {
32 pub(crate) fn map<U, F: Fn(&T) -> U>(&self, f: F) -> SampleInfo<U> {
34 SampleInfo {
35 inner: self.inner.iter().map(f).collect(),
36 }
37 }
38
39 pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = &mut T> {
40 self.inner.iter_mut()
41 }
42}
43
44impl<T> SampleInfo<Option<T>> {
45 pub(crate) fn first_not_none(&self) -> Result<&T> {
46 self.iter_not_none()
47 .next()
48 .ok_or_else(|| errors::Error::EmptyObservations.into())
49 }
50
51 pub(crate) fn first_not_none_mut(&mut self) -> Result<&mut T> {
52 self.iter_not_none_mut()
53 .next()
54 .ok_or_else(|| errors::Error::EmptyObservations.into())
55 }
56
57 pub(crate) fn iter_not_none(&self) -> impl Iterator<Item = &T> {
58 self.inner.iter().filter_map(|item| item.as_ref())
59 }
60
61 pub(crate) fn iter_not_none_mut(&mut self) -> impl Iterator<Item = &mut T> {
62 self.inner.iter_mut().filter_map(|item| item.as_mut())
63 }
64}
65
66impl<T> Default for SampleInfo<T> {
67 fn default() -> Self {
68 SampleInfo {
69 inner: Vec::default(),
70 }
71 }
72}
73
74impl<T> Deref for SampleInfo<T> {
75 type Target = Vec<T>;
76
77 fn deref(&self) -> &Self::Target {
78 &self.inner
79 }
80}
81
82impl<T> DerefMut for SampleInfo<T> {
83 fn deref_mut(&mut self) -> &mut Self::Target {
84 &mut self.inner
85 }
86}
87
88impl<T> From<Vec<T>> for SampleInfo<T> {
89 fn from(vec: Vec<T>) -> Self {
90 SampleInfo { inner: vec }
91 }
92}
93
94#[derive(new, Debug)]
96pub(crate) struct SampleInfoBuilder<T> {
97 #[new(default)]
98 inner: VecMap<T>,
99 sample_idx: HashMap<String, usize>,
100}
101
102impl<T> SampleInfoBuilder<T> {
103 pub(crate) fn push(mut self, sample_name: &str, value: T) -> Self {
104 let idx = *self
105 .sample_idx
106 .get(sample_name)
107 .expect("unknown sample name, it does not occur in the scenario");
108 self.inner.insert(idx, value);
109
110 self
111 }
112
113 pub(crate) fn build(self) -> SampleInfo<T> {
114 SampleInfo {
115 inner: self.inner.into_iter().map(|(_, v)| v).collect(),
116 }
117 }
118}
119
120#[derive(Derefable, PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Hash, Deserialize)]
121pub(crate) struct ExpressionIdentifier(#[deref] String);
122
123impl std::fmt::Display for ExpressionIdentifier {
124 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125 write!(f, "{}", self.0)
126 }
127}
128
129#[derive(Deserialize, Getters)]
130#[get = "pub(crate)"]
131#[serde(deny_unknown_fields)]
132pub(crate) struct Scenario {
133 #[serde(default)]
135 expressions: HashMap<ExpressionIdentifier, Formula>,
136 events: BTreeMap<String, Formula>,
138 samples: BTreeMap<String, Sample>,
140 #[serde(skip)]
141 sample_idx: Mutex<Option<HashMap<String, usize>>>,
142 #[serde(default)]
143 species: Option<Species>,
144}
145
146impl Scenario {
147 pub(crate) fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
148 let mut scenario_content = String::new();
149 File::open(path)?.read_to_string(&mut scenario_content)?;
150
151 let mut scenario: Self = serde_yaml::from_str(&scenario_content)?;
152
153 let mut event_expressions = HashMap::new();
154
155 for (name, formula) in scenario.events() {
157 let identifier = ExpressionIdentifier(name.clone());
158 if !scenario.expressions().contains_key(&identifier) {
159 event_expressions.insert(identifier, formula.clone());
160 }
161 }
162 let absent_identifier = ExpressionIdentifier("absent".to_owned());
163 if !scenario.expressions.contains_key(&absent_identifier) {
164 event_expressions.insert(absent_identifier, Formula::absent(&scenario));
165 }
166 scenario.expressions.extend(event_expressions);
167 Ok(scenario)
168 }
169
170 pub(crate) fn variant_type_fractions(&self) -> VariantTypeFraction {
171 self.species()
172 .as_ref()
173 .map_or(VariantTypeFraction::default(), |species| {
174 species.variant_type_fractions().clone()
175 })
176 }
177
178 pub(crate) fn sample_info<T>(&self) -> SampleInfoBuilder<T> {
179 let mut sample_idx = self.sample_idx.lock().unwrap();
180 if sample_idx.is_none() {
181 sample_idx.get_or_insert(
182 self.samples()
183 .keys()
184 .enumerate()
185 .map(|(i, s)| (s.to_owned(), i))
186 .collect(),
187 );
188 }
189 SampleInfoBuilder::new(sample_idx.as_ref().unwrap().clone())
190 }
191
192 pub(crate) fn idx(&self, sample: &str) -> Option<usize> {
193 let mut sample_idx = self.sample_idx.lock().unwrap();
194 if sample_idx.is_none() {
195 sample_idx.get_or_insert(
196 self.samples()
197 .keys()
198 .enumerate()
199 .map(|(i, s)| (s.to_owned(), i))
200 .collect(),
201 );
202 }
203 sample_idx.as_ref().unwrap().get(sample).copied()
204 }
205
206 pub(crate) fn vaftrees(&self, contig: &str) -> Result<HashMap<String, VAFTree>> {
207 info!("Preprocessing events for contig {}", contig);
208 let trees = self
209 .events()
210 .iter()
211 .map(|(name, formula)| {
212 let normalized = formula
213 .normalize(self, contig)
214 .with_context(|| format!("invalid event definition for {}", name))?;
215 info!(" {}: {}", name, normalized);
216 let vaftree = VAFTree::new(&normalized, self, contig)?;
217 Ok((name.to_owned(), vaftree))
218 })
219 .collect();
220 self.validate(contig)?;
221 trees
222 }
223
224 pub(crate) fn validate(&self, contig: &str) -> Result<()> {
225 let names = self
226 .events()
227 .iter()
228 .filter(|(name, _)| *name != "absent")
229 .map(|(name, formula)| {
230 (
231 formula.normalize(self, contig).map(Formula::from).unwrap(),
234 name,
235 )
236 })
237 .into_group_map();
238 let mut overlapping = vec![];
239 let events: Vec<_> = names.keys().sorted().cloned().collect();
240 for (e1, e2) in events.iter().tuple_combinations() {
241 if e1 == e2 {
243 continue;
244 }
245 let terms = [e1, e2]
246 .iter()
247 .filter(|e| !matches!(e.to_terminal(), Some(FormulaTerminal::False)))
248 .map(|&v| v.clone())
249 .collect_vec();
250
251 if terms.len() != 2 {
253 continue;
254 }
255
256 let disjunction =
258 Formula::from(Formula::Disjunction { operands: terms }.normalize(self, contig)?);
259 if events.contains(&disjunction) {
260 overlapping.push((
261 names[e1].clone(),
262 names[e2].clone(),
263 names[&disjunction].clone(),
264 ));
265 }
266 }
267 if !overlapping.is_empty() {
268 return Err(crate::errors::Error::OverlappingEvents {
269 expressions: overlapping
270 .iter()
271 .map(|(a1, a2, f)| format!("({:?} | {:?}) = {:?}", a1, a2, f))
272 .join(", "),
273 }
274 .into());
275 } else {
276 Ok(())
277 }
278 }
279}
280
281impl<'a> TryFrom<&'a str> for Scenario {
282 type Error = serde_yaml::Error;
283
284 fn try_from(yaml: &str) -> Result<Self, Self::Error> {
285 serde_yaml::from_str(yaml)
286 }
287}
288
289#[derive(Deserialize)]
290#[serde(untagged)]
291pub(crate) enum PloidyDefinition {
292 Simple(u32),
293 Map(HashMap<String, u32>),
294}
295
296impl PloidyDefinition {
297 pub(crate) fn contig_ploidy(&self, contig: &str) -> Result<u32> {
298 Ok(match self {
299 PloidyDefinition::Simple(ploidy) => *ploidy,
300 PloidyDefinition::Map(map) => {
301 match map.get(contig) {
302 Some(ploidy) => *ploidy,
303 None => map.get("all").copied().ok_or_else(|| {
304 errors::Error::PloidyContigNotFound {
305 contig: contig.to_owned(),
306 }
307 })?,
308 }
309 }
310 })
311 }
312}
313
314#[derive(Deserialize)]
315#[serde(untagged)]
316pub(crate) enum SexPloidyDefinition {
317 Generic(PloidyDefinition),
318 Specific(HashMap<Sex, PloidyDefinition>),
319}
320
321impl SexPloidyDefinition {
322 pub(crate) fn contig_ploidy(&self, sex: Option<Sex>, contig: &str) -> Result<u32> {
323 match (self, sex) {
324 (SexPloidyDefinition::Generic(p), _) => p.contig_ploidy(contig),
325 (SexPloidyDefinition::Specific(p), Some(s)) => p.get(&s).map_or_else(
326 || {
327 Err(errors::Error::InvalidPriorConfiguration {
328 msg: format!("ploidy definition for {} not found", s),
329 }
330 .into())
331 },
332 |p| p.contig_ploidy(contig),
333 ),
334 (SexPloidyDefinition::Specific(_), None) => {
335 Err(errors::Error::InvalidPriorConfiguration {
336 msg: "sex specific ploidy definition found but no sex specified in sample"
337 .to_owned(),
338 }
339 .into())
340 }
341 }
342 }
343}
344
345#[derive(Deserialize, Getters)]
346#[get = "pub(crate)"]
347#[serde(deny_unknown_fields)]
348pub(crate) struct Species {
349 #[serde(default)]
350 heterozygosity: Option<f64>,
351 #[serde(default, rename = "germline-mutation-rate")]
352 germline_mutation_rate: Option<f64>,
353 #[serde(default, rename = "somatic-effective-mutation-rate")]
354 somatic_effective_mutation_rate: Option<f64>,
355 #[serde(default, rename = "variant-fractions")]
356 variant_type_fractions: VariantTypeFraction,
357 #[serde(default)]
358 ploidy: Option<SexPloidyDefinition>,
359 #[serde(default, rename = "genome-size")]
360 #[allow(dead_code)]
361 genome_size: Option<f64>,
363}
364
365impl Species {
366 pub(crate) fn contig_ploidy(&self, contig: &str, sex: Option<Sex>) -> Result<Option<u32>> {
367 if let Some(ploidy) = &self.ploidy {
368 Ok(Some(ploidy.contig_ploidy(sex, contig)?))
369 } else {
370 Ok(None)
371 }
372 }
373}
374
375fn default_indel_fraction() -> f64 {
376 0.0125
377}
378
379fn default_mnv_fraction() -> f64 {
380 0.001
381}
382
383fn default_sv_fraction() -> f64 {
384 0.01
385}
386
387#[derive(Deserialize, Getters, Clone, Debug)]
393#[get = "pub(crate)"]
394#[serde(deny_unknown_fields)]
395pub(crate) struct VariantTypeFraction {
396 #[serde(default = "default_indel_fraction")]
397 indel: f64,
398 #[serde(default = "default_mnv_fraction")]
399 mnv: f64,
400 #[serde(default = "default_sv_fraction")]
401 sv: f64,
402}
403
404impl VariantTypeFraction {
405 pub(crate) fn get(&self, variant_type: &VariantType) -> f64 {
406 match variant_type {
407 VariantType::Insertion(_) | VariantType::Deletion(_) | VariantType::Replacement => {
408 self.indel
409 }
410 VariantType::Mnv => self.mnv,
411 VariantType::Inversion | VariantType::Breakend | VariantType::Duplication => self.sv,
412 _ => 1.0,
413 }
414 }
415}
416
417impl Default for VariantTypeFraction {
418 fn default() -> Self {
419 VariantTypeFraction {
420 indel: default_indel_fraction(),
421 mnv: default_mnv_fraction(),
422 sv: default_sv_fraction(),
423 }
424 }
425}
426
427fn default_resolution() -> Resolution {
428 Resolution(AlleleFreq(0.01))
429}
430
431#[derive(Derefable, Debug, Clone)]
432pub(crate) struct Resolution(#[deref] AlleleFreq);
433
434impl<'de> de::Deserialize<'de> for Resolution {
435 fn deserialize<D>(deserializer: D) -> Result<Resolution, D::Error>
436 where
437 D: Deserializer<'de>,
438 {
439 deserializer.deserialize_str(ResolutionVisitor)
440 }
441}
442
443struct ResolutionVisitor;
444
445impl<'de> de::Visitor<'de> for ResolutionVisitor {
446 type Value = Resolution;
447
448 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
449 formatter.write_str(
450 "an allele frequency resolution given as floating point value between 0.0 and 1.0 (exclusive, e.g. 0.01, 0.1, etc.)",
451 )
452 }
453
454 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
455 where
456 E: de::Error,
457 {
458 if let Ok(vaf) = v.parse::<f64>() {
459 if vaf > 0.0 && vaf < 1.0 {
460 return Ok(Resolution(AlleleFreq(vaf)));
461 }
462 }
463
464 Err(de::Error::invalid_value(
465 serde::de::Unexpected::Other("VAF resolution"),
466 &self,
467 ))
468 }
469}
470
471#[derive(Deserialize, Getters)]
472#[serde(deny_unknown_fields)]
473pub(crate) struct Sample {
474 #[get = "pub(crate)"]
476 contamination: Option<Contamination>,
477 #[serde(default = "default_resolution")]
479 #[get = "pub(crate)"]
480 resolution: Resolution,
481 #[serde(default)]
483 #[get = "pub(crate)"]
484 universe: Option<UniverseDefinition>,
485 #[serde(default, rename = "somatic-effective-mutation-rate")]
486 somatic_effective_mutation_rate: Option<f64>,
487 #[serde(default, rename = "germline-mutation-rate")]
488 germline_mutation_rate: Option<f64>,
489 #[serde(default)]
490 #[get = "pub(crate)"]
491 ploidy: Option<PloidyDefinition>,
492 #[get = "pub(crate)"]
493 inheritance: Option<Inheritance>,
494 #[serde(default)]
495 sex: Option<Sex>,
496}
497
498impl Sample {
499 pub(crate) fn has_uniform_prior(&self) -> bool {
500 self.universe.is_some()
501 }
502
503 pub(crate) fn contig_universe(
504 &self,
505 contig: &str,
506 species: &Option<Species>,
507 ) -> Result<VAFUniverse> {
508 if let Some(universe) = &self.universe {
509 Ok(match universe {
510 UniverseDefinition::Simple(ref universe) => universe.clone(),
511 UniverseDefinition::Map(ref map) => match map.get(contig) {
512 Some(universe) => universe.clone(),
513 None => map
514 .get("all")
515 .ok_or_else(|| errors::Error::UniverseContigNotFound {
516 contig: contig.to_owned(),
517 })?
518 .clone(),
519 },
520 })
521 } else {
522 let ploidy_derived_spectrum = |ploidy| -> BTreeSet<AlleleFreq> {
523 (0..=ploidy)
524 .map(|n_alt| {
525 if ploidy > 0 {
526 AlleleFreq(n_alt as f64 / ploidy as f64)
527 } else {
528 assert_eq!(n_alt, 0);
529 AlleleFreq(0.0)
530 }
531 })
532 .collect()
533 };
534 Ok(
535 match (
536 self.contig_ploidy(contig, species)?,
537 self.somatic_effective_mutation_rate.is_some(),
538 ) {
539 (Some(ploidy), false) => {
540 let mut universe = VAFUniverse::default();
541 universe.insert(VAFSpectrum::Set(ploidy_derived_spectrum(ploidy)));
542 universe
543 }
544 (Some(ploidy), true) => {
545 let ploidy_spectrum = ploidy_derived_spectrum(ploidy);
546
547 let mut universe = VAFUniverse::default();
548
549 let mut last = ploidy_spectrum.iter().next().unwrap();
550 for vaf in ploidy_spectrum.iter().skip(1) {
551 universe.insert(VAFSpectrum::Range(VAFRange::builder()
552 .inner(*last..*vaf)
553 .left_exclusive(true)
554 .right_exclusive(true)
555 .build()
556 ));
557 last = vaf;
558 }
559 universe.insert(VAFSpectrum::Set(ploidy_spectrum));
560 universe
561 }
562 (None, true) => {
563 let mut universe = VAFUniverse::default();
564 universe.insert(VAFSpectrum::Range(VAFRange::builder()
565 .inner(AlleleFreq(0.0)..AlleleFreq(1.0))
566 .left_exclusive(false)
567 .right_exclusive(false)
568 .build()
569 ));
570 universe
571 }
572 (None, false) => return Err(errors::Error::InvalidPriorConfiguration {
573 msg: "sample needs to define either universe, ploidy or somatic_mutation_rate".to_owned(),
574 }
575 .into()),
576 },
577 )
578 }
579 }
580
581 pub(crate) fn contig_ploidy(
582 &self,
583 contig: &str,
584 species: &Option<Species>,
585 ) -> Result<Option<u32>> {
586 if let Some(ploidy) = &self.ploidy {
587 Ok(Some(ploidy.contig_ploidy(contig)?))
588 } else {
589 species.as_ref().map_or(Ok(None), |species| {
590 species.contig_ploidy(contig, self.sex.clone())
591 })
592 }
593 }
594
595 pub(crate) fn germline_mutation_rate(&self, species: &Option<Species>) -> Option<f64> {
596 if let Some(rate) = self.germline_mutation_rate {
597 Some(rate)
598 } else {
599 species
600 .as_ref()
601 .and_then(|species| species.germline_mutation_rate)
602 }
603 }
604
605 pub(crate) fn somatic_effective_mutation_rate(&self, species: &Option<Species>) -> Option<f64> {
606 if let Some(rate) = self.somatic_effective_mutation_rate {
607 Some(rate)
608 } else {
609 species
610 .as_ref()
611 .and_then(|species| species.somatic_effective_mutation_rate)
612 }
613 }
614}
615
616#[derive(Deserialize, Getters)]
617#[get = "pub(crate)"]
618#[serde(deny_unknown_fields)]
619pub(crate) struct Contamination {
620 by: String,
622 fraction: f64,
624}
625
626#[derive(
627 Display, Debug, Clone, Serialize, Deserialize, IntoStaticStr, VariantNames, PartialEq, Eq, Hash,
628)]
629#[strum(serialize_all = "kebab_case")]
630pub(crate) enum Inheritance {
631 #[serde(rename = "mendelian")]
632 Mendelian { from: (String, String) },
633 #[serde(rename = "clonal")]
634 Clonal { from: String, somatic: bool },
635 #[serde(rename = "subclonal")]
636 Subclonal { from: String },
637}
638
639#[derive(
640 Display, Debug, Clone, Serialize, Deserialize, IntoStaticStr, VariantNames, PartialEq, Eq, Hash,
641)]
642#[serde(untagged, rename_all = "lowercase")]
643pub(crate) enum Sex {
644 Male,
645 Female,
646 Other(String),
647}
648
649#[derive(Deserialize, Debug)]
650#[serde(untagged)]
651pub(crate) enum UniverseDefinition {
652 Map(BTreeMap<String, VAFUniverse>),
653 Simple(VAFUniverse),
654}