1use std::cmp::{Ord, Ordering, PartialOrd};
2use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
3use std::fmt;
4use std::ops;
5
6use anyhow::Result;
7use boolean_expression::Expr;
8use itertools::Itertools;
9use pest::iterators::{Pair, Pairs};
10use pest::Parser;
11use serde::de;
12use serde::Deserialize;
13
14use crate::errors;
15use crate::grammar::{ExpressionIdentifier, Scenario};
16use crate::utils::comparison::ComparisonOperator;
17use crate::utils::log2_fold_change::Log2FoldChangePredicate;
18use crate::variants::model::AlleleFreq;
19
20#[derive(Shrinkwrap, Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
21pub(crate) struct Iupac(u8);
22
23impl Iupac {
24 pub(crate) fn contains(&self, base: u8) -> bool {
25 if base == **self {
26 return true;
27 }
28 match **self {
29 b'R' if base == b'A' || base == b'G' => true,
30 b'Y' if base == b'C' || base == b'T' => true,
31 b'S' if base == b'G' || base == b'C' => true,
32 b'W' if base == b'A' || base == b'T' => true,
33 b'K' if base == b'G' || base == b'T' => true,
34 b'M' if base == b'A' || base == b'C' => true,
35 b'B' if base == b'C' || base == b'G' || base == b'T' => true,
36 b'D' if base == b'A' || base == b'G' || base == b'T' => true,
37 b'H' if base == b'A' || base == b'C' || base == b'T' => true,
38 b'V' if base == b'A' || base == b'C' || base == b'G' => true,
39 b'N' => true,
40 _ => false,
41 }
42 }
43}
44
45impl std::fmt::Display for Iupac {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 write!(f, "{}", std::char::from_u32(**self as u32).unwrap())
48 }
49}
50
51#[derive(Parser)]
52#[grammar = "grammar/formula.pest"]
53pub(crate) struct FormulaParser;
54
55#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]
56pub(crate) enum Formula {
57 Conjunction { operands: Vec<Formula> },
58 Disjunction { operands: Vec<Formula> },
59 Negation { operand: Box<Formula> },
60 Terminal(FormulaTerminal),
61}
62
63impl From<NormalizedFormula> for Formula {
64 fn from(formula: NormalizedFormula) -> Self {
65 fn from_normalized(formula: NormalizedFormula) -> Formula {
66 match formula {
67 NormalizedFormula::Conjunction { operands } => Formula::Conjunction {
68 operands: operands.into_iter().map(from_normalized).collect(),
69 },
70 NormalizedFormula::Disjunction { operands } => Formula::Disjunction {
71 operands: operands.into_iter().map(from_normalized).collect(),
72 },
73 NormalizedFormula::Atom { sample, vafs } => {
74 Formula::Terminal(FormulaTerminal::Atom { sample, vafs })
75 }
76 NormalizedFormula::Variant {
77 altbase,
78 positive,
79 refbase,
80 } => Formula::Terminal(FormulaTerminal::Variant {
81 altbase,
82 positive,
83 refbase,
84 }),
85 NormalizedFormula::False => Formula::Terminal(FormulaTerminal::False),
86 NormalizedFormula::True => Formula::Terminal(FormulaTerminal::True),
87 NormalizedFormula::Log2FoldChange {
88 sample_a,
89 sample_b,
90 predicate,
91 } => Formula::Terminal(FormulaTerminal::Log2FoldChange {
92 sample_a,
93 sample_b,
94 predicate,
95 }),
96 }
97 }
98 from_normalized(formula)
99 }
100}
101
102#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]
103pub(crate) enum FormulaTerminal {
104 Atom {
105 sample: String,
106 vafs: VAFSpectrum,
107 },
108 Variant {
109 positive: bool,
110 refbase: Iupac,
111 altbase: Iupac,
112 },
113 Expression {
114 identifier: ExpressionIdentifier,
115 negated: bool,
116 },
117 Log2FoldChange {
118 sample_a: String,
119 sample_b: String,
120 predicate: Log2FoldChangePredicate,
121 },
122 False,
123 True,
124}
125
126impl FormulaTerminal {
127 fn merge_conjunctions(&mut self, other: &FormulaTerminal) {
128 match (self, other) {
129 (
130 FormulaTerminal::Atom {
131 sample: sample_a,
132 vafs: ref mut vafs_a,
133 },
134 FormulaTerminal::Atom {
135 sample: sample_b,
136 vafs: vafs_b,
137 },
138 ) if sample_a == sample_b => match (&vafs_a, &vafs_b) {
139 (VAFSpectrum::Range(a), VAFSpectrum::Range(b)) => {
140 let intersection = a & b;
141 if intersection.is_singleton() {
142 *vafs_a = VAFSpectrum::singleton(a.start)
143 } else {
144 *vafs_a = VAFSpectrum::Range(a & b)
145 }
146 }
147 (VAFSpectrum::Range(a), VAFSpectrum::Set(b)) => {
148 *vafs_a = VAFSpectrum::Set(
149 b.iter().filter(|vaf| a.contains(**vaf)).cloned().collect(),
150 );
151 }
152 (VAFSpectrum::Set(a), VAFSpectrum::Range(b)) => {
153 *vafs_a = VAFSpectrum::Set(
154 a.iter().filter(|vaf| b.contains(**vaf)).cloned().collect(),
155 );
156 }
157 (VAFSpectrum::Set(a), VAFSpectrum::Set(b)) => {
158 *vafs_a = VAFSpectrum::Set(a.intersection(b).cloned().collect());
159 }
160 },
161 _ => {
162 panic!("bug: trying to merge FormulaTerminals that are not both atoms and for the same sample")
163 }
164 }
165 }
166
167 fn try_merge_disjunction(&self, other: &FormulaTerminal) -> Option<VAFSpectrum> {
171 match (self, other) {
172 (
173 FormulaTerminal::Atom {
174 sample: sample_a,
175 vafs: vafs_a,
176 },
177 FormulaTerminal::Atom {
178 sample: sample_b,
179 vafs: vafs_b,
180 },
181 ) if sample_a == sample_b => match (&vafs_a, &vafs_b) {
182 (VAFSpectrum::Range(a), VAFSpectrum::Range(b)) => match a.overlap(b) {
183 VAFRangeOverlap::None => None,
184 _ => Some(VAFSpectrum::Range((a | b).0)),
185 },
186 (VAFSpectrum::Range(a), VAFSpectrum::Set(b)) => {
187 if b.iter().all(|v| a.contains(*v)) {
188 Some(VAFSpectrum::Range(a.clone()))
189 } else {
190 None
191 }
192 }
193 (VAFSpectrum::Set(a), VAFSpectrum::Range(b)) => {
194 if a.iter().all(|v| b.contains(*v)) {
195 Some(VAFSpectrum::Range(b.clone()))
196 } else {
197 None
198 }
199 }
200 (VAFSpectrum::Set(a), VAFSpectrum::Set(b)) => {
201 Some(VAFSpectrum::Set(a.union(b).cloned().collect()))
202 }
203 },
204 _ => {
205 panic!("bug: trying to merge FormulaTerminals that are not both atoms and for the same sample")
206 }
207 }
208 }
209}
210
211impl std::fmt::Display for Formula {
212 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213 let fmt_operand = |formula: &Formula| match formula {
214 Formula::Terminal(_) => format!("{}", formula),
215 Formula::Negation { operand } if operand.is_terminal() => format!("{}", formula),
216 _ => format!("({})", formula),
217 };
218
219 let formatted = match self {
220 Formula::Terminal(FormulaTerminal::False) => "false".to_owned(),
221 Formula::Terminal(FormulaTerminal::True) => "true".to_owned(),
222 Formula::Terminal(FormulaTerminal::Atom {
223 sample,
224 vafs: VAFSpectrum::Set(vafs),
225 }) => match vafs.len() {
226 1 => format!("{}:{}", sample, vafs.iter().next().unwrap()),
227 x if x > 1 => format!(
228 "{}:{{{}}}",
229 sample,
230 vafs.iter().map(|vaf| format!("{:.3}", vaf)).join(", "),
231 ),
232 _ => "false".to_owned(),
233 },
234 Formula::Terminal(FormulaTerminal::Atom {
235 sample,
236 vafs: VAFSpectrum::Range(vafrange),
237 }) => {
238 let left_bracket = if vafrange.left_exclusive { ']' } else { '[' };
239 let right_bracket = if vafrange.right_exclusive { '[' } else { ']' };
240 format!(
241 "{}:{}{},{}{}",
242 sample, left_bracket, vafrange.start, vafrange.end, right_bracket
243 )
244 }
245 Formula::Terminal(FormulaTerminal::Variant {
246 positive,
247 refbase,
248 altbase,
249 }) => format!(
250 "{negate}({refbase}>{altbase})",
251 negate = if *positive { "" } else { "!" },
252 refbase = refbase,
253 altbase = altbase,
254 ),
255 Formula::Terminal(FormulaTerminal::Expression {
256 identifier,
257 negated,
258 }) => format!(
259 "{negate}${expr}",
260 negate = if *negated { "!" } else { "" },
261 expr = **identifier
262 ),
263 Formula::Negation { operand } => format!("!{operand}", operand = fmt_operand(operand)),
264 Formula::Conjunction { operands } => operands.iter().map(&fmt_operand).join(" & "),
265 Formula::Disjunction { operands } => operands.iter().map(&fmt_operand).join(" | "),
266 Formula::Terminal(FormulaTerminal::Log2FoldChange {
267 sample_a,
268 sample_b,
269 predicate,
270 }) => {
271 format!("l2fc({}, {}) {}", sample_a, sample_b, predicate)
272 }
273 };
274 write!(f, "{}", formatted)
275 }
276}
277
278impl From<Expr<FormulaTerminal>> for Formula {
279 fn from(expr: Expr<FormulaTerminal>) -> Self {
280 match expr {
281 Expr::Terminal(terminal) => Formula::Terminal(terminal),
282 Expr::Not(a) => Formula::Negation {
283 operand: Box::new((*a).into()),
284 },
285 Expr::And(a, b) => match ((*a).into(), (*b).into()) {
286 (
287 Formula::Conjunction {
288 operands: mut left_operands,
289 },
290 Formula::Conjunction {
291 operands: right_operands,
292 },
293 ) => {
294 left_operands.extend(right_operands);
295 Formula::Conjunction {
296 operands: left_operands,
297 }
298 }
299 (
300 Formula::Conjunction {
301 operands: mut left_operands,
302 },
303 right,
304 ) => {
305 left_operands.push(right);
306 Formula::Conjunction {
307 operands: left_operands,
308 }
309 }
310 (
311 left,
312 Formula::Conjunction {
313 operands: mut right_operands,
314 },
315 ) => {
316 right_operands.push(left);
317 Formula::Conjunction {
318 operands: right_operands,
319 }
320 }
321 (left, right) => Formula::Conjunction {
322 operands: vec![left, right],
323 },
324 },
325 Expr::Or(a, b) => match ((*a).into(), (*b).into()) {
326 (
327 Formula::Disjunction {
328 operands: mut left_operands,
329 },
330 Formula::Disjunction {
331 operands: right_operands,
332 },
333 ) => {
334 left_operands.extend(right_operands);
335 Formula::Disjunction {
336 operands: left_operands,
337 }
338 }
339 (
340 Formula::Disjunction {
341 operands: mut left_operands,
342 },
343 right,
344 ) => {
345 left_operands.push(right);
346 Formula::Disjunction {
347 operands: left_operands,
348 }
349 }
350 (
351 left,
352 Formula::Disjunction {
353 operands: mut right_operands,
354 },
355 ) => {
356 right_operands.push(left);
357 Formula::Disjunction {
358 operands: right_operands,
359 }
360 }
361 (left, right) => Formula::Disjunction {
362 operands: vec![left, right],
363 },
364 },
365 Expr::Const(false) => Formula::Terminal(FormulaTerminal::False),
366 Expr::Const(true) => Formula::Terminal(FormulaTerminal::True),
367 }
368 }
369}
370
371impl From<Formula> for Expr<FormulaTerminal> {
372 fn from(val: Formula) -> Self {
373 if val.is_terminal_false() {
374 return Expr::Const(false);
375 }
376 if val.is_terminal_true() {
377 return Expr::Const(true);
378 }
379 match val {
380 Formula::Terminal(terminal) => Expr::Terminal(terminal),
381 Formula::Conjunction { mut operands } => {
382 let mut expr = operands.pop().unwrap().into();
383 for operand in operands {
384 expr &= operand.into();
385 }
386 expr
387 }
388 Formula::Disjunction { mut operands } => {
389 let mut expr = operands.pop().unwrap().into();
390 for operand in operands {
391 expr |= operand.into();
392 }
393 expr
394 }
395 Formula::Negation { operand } => Expr::Not(Box::new((*operand).into())),
396 }
397 }
398}
399
400impl Formula {
401 pub(crate) fn is_terminal(&self) -> bool {
402 matches!(self, Formula::Terminal(_))
403 }
404
405 pub(crate) fn to_terminal(&self) -> Option<&FormulaTerminal> {
406 if let Formula::Terminal(terminal) = self {
407 Some(terminal)
408 } else {
409 None
410 }
411 }
412
413 pub(crate) fn is_terminal_false(&self) -> bool {
415 match self {
416 Formula::Terminal(FormulaTerminal::Atom { vafs, .. }) => vafs.is_empty(),
417 Formula::Terminal(FormulaTerminal::False) => true,
418 _ => false,
419 }
420 }
421
422 pub(crate) fn is_terminal_true(&self) -> bool {
424 match self {
425 Formula::Terminal(FormulaTerminal::Atom { vafs, .. }) => vafs.is_complete(),
426 Formula::Terminal(FormulaTerminal::True) => true,
427 _ => false,
428 }
429 }
430
431 pub(crate) fn into_terminal(self) -> Option<FormulaTerminal> {
432 if let Formula::Terminal(terminal) = self {
433 Some(terminal)
434 } else {
435 None
436 }
437 }
438
439 pub(crate) fn absent(scenario: &Scenario) -> Self {
441 Formula::Conjunction {
442 operands: scenario
443 .samples()
444 .keys()
445 .map(|sample| {
446 Formula::Terminal(FormulaTerminal::Atom {
447 sample: sample.to_owned(),
448 vafs: VAFSpectrum::singleton(AlleleFreq(0.0)),
449 })
450 })
451 .collect(),
452 }
453 }
454
455 fn sort(&mut self) {
456 match self {
457 Formula::Conjunction { operands } | Formula::Disjunction { operands } => {
458 for operand in operands.iter_mut() {
459 operand.sort();
460 }
461 operands.sort();
462
463 operands.sort_by_key(|f| match f {
464 Formula::Terminal(FormulaTerminal::Log2FoldChange { .. }) => 0,
465
466 _ => 1,
467 });
468 }
469 _ => (),
470 }
471 }
472
473 pub(crate) fn normalize(&self, scenario: &Scenario, contig: &str) -> Result<NormalizedFormula> {
474 let mut simplified = self
477 .expand_expressions(scenario)?
478 .apply_negations(scenario, contig)?
479 .simplify()
480 .merge_atoms()
481 .simplify();
482 simplified.strip_false();
483 simplified.sort();
484 Ok(simplified.to_normalized_formula())
485 }
486
487 fn expand_expressions(&self, scenario: &Scenario) -> Result<Self> {
488 Ok(match self {
489 Formula::Conjunction { operands } => Formula::Conjunction {
490 operands: operands
491 .iter()
492 .map(|operand| operand.expand_expressions(scenario))
493 .collect::<Result<Vec<_>>>()?,
494 },
495 Formula::Disjunction { operands } => Formula::Disjunction {
496 operands: operands
497 .iter()
498 .map(|operand| operand.expand_expressions(scenario))
499 .collect::<Result<Vec<_>>>()?,
500 },
501 Formula::Negation { operand } => Formula::Negation {
502 operand: Box::new(operand.expand_expressions(scenario)?),
503 },
504 Formula::Terminal(terminal) => {
505 if let FormulaTerminal::Expression {
506 identifier,
507 negated,
508 } = terminal
509 {
510 if let Some(formula) = scenario.expressions().get(identifier) {
511 if *negated {
512 Formula::Negation {
513 operand: Box::new(formula.clone()),
514 }
515 } else {
516 formula.clone()
517 }
518 } else {
519 return Err(errors::Error::UndefinedExpression {
520 identifier: identifier.to_string(),
521 }
522 .into());
523 }
524 } else {
525 Formula::Terminal(terminal.clone())
526 }
527 }
528 })
529 }
530
531 fn to_normalized_formula(&self) -> NormalizedFormula {
532 match self {
533 Formula::Terminal(FormulaTerminal::Atom { sample, vafs }) => NormalizedFormula::Atom {
534 sample: sample.to_owned(),
535 vafs: vafs.to_owned(),
536 },
537 Formula::Conjunction { operands } => NormalizedFormula::Conjunction {
538 operands: operands.iter().map(|o| o.to_normalized_formula()).collect(),
539 },
540 Formula::Disjunction { operands } => NormalizedFormula::Disjunction {
541 operands: operands.iter().map(|o| o.to_normalized_formula()).collect(),
542 },
543 &Formula::Terminal(FormulaTerminal::Variant {
544 positive,
545 refbase,
546 altbase,
547 }) => NormalizedFormula::Variant {
548 positive,
549 refbase,
550 altbase,
551 },
552 &Formula::Terminal(FormulaTerminal::Expression {
553 identifier: _,
554 negated: _,
555 }) => {
556 panic!("bug: expressions should be expanded before normalization");
557 }
558 Formula::Negation { operand: _ } => {
559 panic!("bug: negations should have been applied before normalization")
560 }
561 Formula::Terminal(FormulaTerminal::False) => NormalizedFormula::False,
562 Formula::Terminal(FormulaTerminal::True) => NormalizedFormula::True,
563 Formula::Terminal(FormulaTerminal::Log2FoldChange {
564 sample_a,
565 sample_b,
566 predicate,
567 }) => NormalizedFormula::Log2FoldChange {
568 sample_a: sample_a.into(),
569 sample_b: sample_b.into(),
570 predicate: *predicate,
571 },
572 }
573 }
574
575 fn merge_atoms(&self) -> Self {
576 let group_operands = |operands: &Vec<Formula>| -> HashMap<Option<String>, Vec<Formula>> {
577 operands.iter().cloned().into_group_map_by(|operand| {
578 if let Formula::Terminal(FormulaTerminal::Atom { sample, vafs: _ }) = operand {
579 Some(sample.to_owned())
580 } else {
581 None
583 }
584 })
585 };
586 match self {
587 Formula::Conjunction { operands } => {
588 let mut grouped_operands = group_operands(operands);
590
591 for (sample, statements) in &mut grouped_operands {
593 if let Some(_sample) = sample {
594 let mut merged_statement =
595 statements.pop().unwrap().into_terminal().unwrap();
596 for statement in statements.iter() {
597 merged_statement.merge_conjunctions(statement.to_terminal().unwrap());
598 }
599 if let FormulaTerminal::Atom { vafs, .. } = &merged_statement {
602 if vafs.is_empty() {
603 return Formula::Terminal(FormulaTerminal::False);
604 }
605 }
606
607 *statements = vec![Formula::Terminal(merged_statement)];
608 } else {
609 for statement in statements {
611 *statement = statement.merge_atoms();
612 }
613 }
614 }
615
616 let operands = grouped_operands.into_values().flatten().collect();
617
618 Formula::Conjunction { operands }
619 }
620 Formula::Disjunction { operands } => {
621 let mut grouped_operands = group_operands(operands);
623
624 for (sample, statements) in &mut grouped_operands {
626 if let Some(sample) = sample {
627 statements.sort_unstable_by_key(|stmt| {
630 if let Formula::Terminal(FormulaTerminal::Atom { sample: _, vafs }) =
631 stmt
632 {
633 match vafs {
634 VAFSpectrum::Set(s) => s.iter().min().copied(),
635 VAFSpectrum::Range(r) => Some(r.start),
636 }
637 } else {
638 None
639 }
640 });
641 let mut merged_statements = vec![];
642 let mut current_statement = statements.remove(0).into_terminal().unwrap();
644 for statement in statements.iter() {
645 let other_statement = statement.to_terminal().unwrap();
649
650 if let Some(merged) =
652 current_statement.try_merge_disjunction(other_statement)
653 {
654 current_statement = FormulaTerminal::Atom {
656 sample: sample.into(),
657 vafs: merged,
658 };
659 } else {
660 merged_statements.push(Formula::Terminal(current_statement));
664 current_statement = other_statement.clone();
665 }
666 }
667 merged_statements.push(Formula::Terminal(current_statement));
669 *statements = merged_statements;
670 } else {
671 for statement in statements {
673 *statement = statement.merge_atoms();
674 }
675 }
676 }
677 let operands = grouped_operands
678 .into_iter()
679 .flat_map(|(_, statements)| statements)
680 .collect();
681
682 Formula::Disjunction { operands }
683 }
684 Formula::Negation { operand } => Formula::Negation {
685 operand: Box::new(operand.merge_atoms()),
686 },
687 terminal => terminal.clone(),
688 }
689 }
690
691 fn strip_false(&mut self) {
692 if let Formula::Disjunction { ref mut operands } = self {
693 *operands = operands
694 .iter()
695 .filter(|operand| {
696 if operand.is_terminal_false() {
697 false
698 } else if let Formula::Conjunction { operands } = operand {
699 !operands.iter().any(|operand| operand.is_terminal_false())
700 } else {
701 true
702 }
703 })
704 .cloned()
705 .collect();
706 }
707 }
708
709 fn simplify(self) -> Self {
711 let expr: Expr<FormulaTerminal> = self.into();
712 let simplified = expr.simplify_via_bdd();
713 simplified.into()
714 }
715
716 fn negate(&self, scenario: &Scenario, contig: &str) -> Result<Self> {
718 Ok(match self {
719 Formula::Terminal(FormulaTerminal::False) => Formula::Terminal(FormulaTerminal::True),
720 Formula::Terminal(FormulaTerminal::True) => Formula::Terminal(FormulaTerminal::False),
721 Formula::Conjunction { operands } => Formula::Disjunction {
722 operands: operands
723 .iter()
724 .map(|o| o.negate(scenario, contig))
725 .collect::<Result<Vec<Formula>>>()?,
726 },
727 Formula::Disjunction { operands } => Formula::Conjunction {
728 operands: operands
729 .iter()
730 .map(|o| o.negate(scenario, contig))
731 .collect::<Result<Vec<Formula>>>()?,
732 },
733 Formula::Negation { operand } => operand.as_ref().clone(),
734 &Formula::Terminal(FormulaTerminal::Variant {
735 positive,
736 refbase,
737 altbase,
738 }) => Formula::Terminal(FormulaTerminal::Variant {
739 positive: !positive,
740 refbase,
741 altbase,
742 }),
743 Formula::Terminal(FormulaTerminal::Expression {
744 identifier,
745 negated,
746 }) => Formula::Terminal(FormulaTerminal::Expression {
747 identifier: identifier.clone(),
748 negated: !negated,
749 }),
750 Formula::Terminal(FormulaTerminal::Log2FoldChange {
751 sample_a,
752 sample_b,
753 predicate,
754 }) => Formula::Terminal(FormulaTerminal::Log2FoldChange {
755 sample_a: sample_a.into(),
756 sample_b: sample_b.into(),
757 predicate: !*predicate,
758 }),
759 Formula::Terminal(FormulaTerminal::Atom { sample, vafs }) => {
760 let universe = scenario
761 .samples()
762 .get(sample)
763 .ok_or_else(|| errors::Error::InvalidSampleName {
764 name: sample.to_owned(),
765 })?
766 .contig_universe(contig, scenario.species())?;
767
768 let mut disjunction = Vec::new();
769 match vafs {
770 VAFSpectrum::Set(vafs) => {
771 let mut uvaf_stack: VecDeque<_> = universe.iter().cloned().collect();
772 while let Some(uvafs) = uvaf_stack.pop_front() {
773 match uvafs {
774 VAFSpectrum::Set(uvafs) => {
775 let difference: BTreeSet<_> =
776 uvafs.difference(vafs).cloned().collect();
777 if !difference.is_empty() {
778 disjunction.push(VAFSpectrum::Set(difference));
779 }
780 }
781 VAFSpectrum::Range(urange) => {
782 for &vaf in vafs {
783 if urange.contains(vaf) {
784 let (left_urange, right_urange) = urange.split_at(vaf);
785 if let Some(right_urange) = right_urange {
786 uvaf_stack.push_back(right_urange);
787 }
788 if let Some(left_urange) = left_urange {
789 disjunction.push(left_urange);
790 }
791 } else {
792 disjunction.push(VAFSpectrum::Range(urange.clone()));
793 }
794 }
795 }
796 }
797 }
798 }
799 VAFSpectrum::Range(range) => {
800 for uvafs in universe.iter() {
801 match uvafs {
802 VAFSpectrum::Set(uvafs) => {
803 let set: BTreeSet<_> = uvafs
804 .iter()
805 .filter(|uvaf| !range.contains(**uvaf))
806 .cloned()
807 .collect();
808 if !set.is_empty() {
809 disjunction.push(VAFSpectrum::Set(set));
810 }
811 }
812 VAFSpectrum::Range(urange) => match range.overlap(urange) {
813 VAFRangeOverlap::Equal => {
814 }
816 VAFRangeOverlap::Contained => {
817 if let Some(left) = urange.split_at(range.start).0 {
818 disjunction.push(left);
819 }
820 if let Some(right) = urange.split_at(range.end).1 {
821 disjunction.push(right);
822 }
823 }
824 VAFRangeOverlap::End => {
825 if let Some(spec) = urange.split_at(range.end).1 {
826 disjunction.push(spec);
827 }
828 }
829 VAFRangeOverlap::Start => {
830 if let Some(spec) = urange.split_at(range.start).0 {
831 disjunction.push(spec);
832 }
833 }
834 VAFRangeOverlap::None => {
835 disjunction.push(VAFSpectrum::Range(urange.clone()))
836 }
837 VAFRangeOverlap::Contains => (),
838 },
839 }
840 }
841 }
842 }
843
844 if disjunction.is_empty() {
845 Formula::Terminal(FormulaTerminal::Atom {
847 sample: sample.to_owned(),
848 vafs: VAFSpectrum::empty(),
849 })
850 } else {
851 Formula::Disjunction {
852 operands: disjunction
853 .into_iter()
854 .map(|vafs| {
855 Formula::Terminal(FormulaTerminal::Atom {
856 sample: sample.clone(),
857 vafs,
858 })
859 })
860 .collect(),
861 }
862 }
863 }
864 })
865 }
866
867 fn apply_negations(&self, scenario: &Scenario, contig: &str) -> Result<Self> {
869 Ok(match self {
870 Formula::Negation { operand } => operand
871 .negate(scenario, contig)?
872 .apply_negations(scenario, contig)?,
873 Formula::Terminal(FormulaTerminal::Atom { sample, vafs }) => {
874 Formula::Terminal(FormulaTerminal::Atom {
875 sample: sample.to_owned(),
876 vafs: vafs.to_owned(),
877 })
878 }
879 Formula::Conjunction { operands } => {
880 let operands = operands
881 .iter()
882 .map(|o| o.apply_negations(scenario, contig))
883 .collect::<Result<Vec<Formula>>>()?;
884
885 Formula::Conjunction { operands }
886 }
887 Formula::Disjunction { operands } => Formula::Disjunction {
888 operands: operands
889 .iter()
890 .map(|o| o.apply_negations(scenario, contig))
891 .collect::<Result<Vec<Formula>>>()?,
892 },
893 &Formula::Terminal(FormulaTerminal::Variant {
894 positive,
895 refbase,
896 altbase,
897 }) => Formula::Terminal(FormulaTerminal::Variant {
898 positive,
899 refbase,
900 altbase,
901 }),
902 &Formula::Terminal(FormulaTerminal::Expression {
903 identifier: _,
904 negated: _,
905 }) => {
906 panic!("bug: expressions should be expanded before applying negations");
907 }
908 Formula::Terminal(FormulaTerminal::False) => Formula::Terminal(FormulaTerminal::False),
909 Formula::Terminal(FormulaTerminal::True) => Formula::Terminal(FormulaTerminal::True),
910 Formula::Terminal(FormulaTerminal::Log2FoldChange {
911 sample_a,
912 sample_b,
913 predicate,
914 }) => Formula::Terminal(FormulaTerminal::Log2FoldChange {
915 sample_a: sample_a.into(),
916 sample_b: sample_b.into(),
917 predicate: *predicate,
918 }),
919 })
920 }
921}
922
923#[derive(PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
924pub(crate) enum NormalizedFormula {
925 Conjunction {
926 operands: Vec<NormalizedFormula>,
927 },
928 Disjunction {
929 operands: Vec<NormalizedFormula>,
930 },
931 Atom {
932 sample: String,
933 vafs: VAFSpectrum,
934 },
935 Variant {
936 positive: bool,
937 refbase: Iupac,
938 altbase: Iupac,
939 },
940 Log2FoldChange {
941 sample_a: String,
942 sample_b: String,
943 predicate: Log2FoldChangePredicate,
944 },
945 False,
946 True,
947}
948
949impl std::fmt::Debug for NormalizedFormula {
950 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
951 std::fmt::Display::fmt(self, f)
952 }
953}
954
955impl std::fmt::Display for NormalizedFormula {
956 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
957 let fmt_operand = |formula: &NormalizedFormula| match formula {
958 NormalizedFormula::Atom { .. } | NormalizedFormula::Variant { .. } => {
959 format!("{}", formula)
960 }
961 _ => format!("({})", formula),
962 };
963
964 let formatted = match self {
965 NormalizedFormula::Atom {
966 sample,
967 vafs: VAFSpectrum::Set(vafs),
968 } => match vafs.len() {
969 1 => format!("{}:{}", sample, vafs.iter().next().unwrap()),
970 x if x > 1 => format!(
971 "{}:{{{}}}",
972 sample,
973 vafs.iter().map(|vaf| format!("{:.3}", vaf)).join(", "),
974 ),
975 _ => "false".to_owned(),
976 },
977 NormalizedFormula::Atom {
978 sample,
979 vafs: VAFSpectrum::Range(vafrange),
980 } => {
981 let left_bracket = if vafrange.left_exclusive { ']' } else { '[' };
982 let right_bracket = if vafrange.right_exclusive { '[' } else { ']' };
983 format!(
984 "{}:{}{:.3},{:.3}{}",
985 sample, left_bracket, vafrange.start, vafrange.end, right_bracket
986 )
987 }
988 NormalizedFormula::Variant {
989 positive,
990 refbase,
991 altbase,
992 } => format!(
993 "{negate}({refbase}>{altbase})",
994 negate = if *positive { "" } else { "!" },
995 refbase = refbase,
996 altbase = altbase,
997 ),
998 NormalizedFormula::Conjunction { operands } => {
999 operands.iter().map(&fmt_operand).join(" & ")
1000 }
1001 NormalizedFormula::Disjunction { operands } => {
1002 operands.iter().map(&fmt_operand).join(" | ")
1003 }
1004 NormalizedFormula::False => "false".to_owned(),
1005 NormalizedFormula::True => "true".to_owned(),
1006 NormalizedFormula::Log2FoldChange {
1007 sample_a,
1008 sample_b,
1009 predicate,
1010 } => {
1011 format!("l2fc({}, {}) {}", sample_a, sample_b, predicate)
1012 }
1013 };
1014 write!(f, "{}", formatted)
1015 }
1016}
1017
1018#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
1019pub(crate) enum VAFSpectrum {
1020 Set(BTreeSet<AlleleFreq>),
1021 Range(VAFRange),
1022}
1023
1024impl VAFSpectrum {
1025 pub(crate) fn singleton(vaf: AlleleFreq) -> Self {
1026 let mut set = BTreeSet::new();
1027 set.insert(vaf);
1028 VAFSpectrum::Set(set)
1029 }
1030
1031 pub(crate) fn empty() -> Self {
1032 VAFSpectrum::Set(BTreeSet::new())
1033 }
1034
1035 pub(crate) fn contains(&self, vaf: AlleleFreq) -> bool {
1036 match self {
1037 VAFSpectrum::Set(ref set) => set.contains(&vaf),
1038 VAFSpectrum::Range(ref range) => range.contains(vaf),
1039 }
1040 }
1041
1042 pub(crate) fn is_empty(&self) -> bool {
1043 match self {
1044 VAFSpectrum::Set(set) => set.is_empty(),
1045 VAFSpectrum::Range(range) => range.is_empty(),
1046 }
1047 }
1048
1049 pub(crate) fn is_complete(&self) -> bool {
1050 match self {
1051 VAFSpectrum::Set(_) => false,
1052 VAFSpectrum::Range(range) => range.is_complete(),
1053 }
1054 }
1055}
1056
1057#[derive(Clone, Debug, PartialEq, Eq, TypedBuilder, Hash, CopyGetters)]
1058pub(crate) struct VAFRange {
1059 inner: ops::Range<AlleleFreq>,
1060 #[getset(get_copy = "pub")]
1061 left_exclusive: bool,
1062 #[getset(get_copy = "pub")]
1063 right_exclusive: bool,
1064}
1065
1066#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
1067pub(crate) enum VAFRangeOverlap {
1068 Contained,
1069 Contains,
1070 End,
1071 Start,
1072 Equal,
1073 None,
1074}
1075
1076impl VAFRange {
1077 pub(crate) fn empty() -> Self {
1078 Self {
1079 inner: AlleleFreq(0.0)..AlleleFreq(0.0),
1080 left_exclusive: true,
1081 right_exclusive: true,
1082 }
1083 }
1084
1085 pub(crate) fn is_empty(&self) -> bool {
1086 self.start == self.end && (self.left_exclusive || self.right_exclusive)
1087 }
1088
1089 pub(crate) fn is_complete(&self) -> bool {
1090 *self.start == 0.0 && *self.end == 1.0 && !self.left_exclusive && !self.right_exclusive
1091 }
1092
1093 pub(crate) fn is_singleton(&self) -> bool {
1094 self.start == self.end && !(self.left_exclusive || self.right_exclusive)
1095 }
1096
1097 pub(crate) fn contains(&self, vaf: AlleleFreq) -> bool {
1098 match (self.left_exclusive, self.right_exclusive) {
1099 (true, true) => self.start < vaf && self.end > vaf,
1100 (true, false) => self.start < vaf && self.end >= vaf,
1101 (false, true) => self.start <= vaf && self.end > vaf,
1102 (false, false) => self.start <= vaf && self.end >= vaf,
1103 }
1104 }
1105
1106 pub(crate) fn split_at(&self, vaf: AlleleFreq) -> (Option<VAFSpectrum>, Option<VAFSpectrum>) {
1107 assert!(
1108 self.contains(vaf),
1109 "bug: split_at is only defined if given VAF is contained in range"
1110 );
1111 let left = VAFRange {
1112 inner: self.start..vaf,
1113 left_exclusive: self.left_exclusive,
1114 right_exclusive: true,
1115 };
1116 let right = VAFRange {
1117 inner: vaf..self.end,
1118 left_exclusive: true,
1119 right_exclusive: self.right_exclusive,
1120 };
1121
1122 let to_spectrum = |range: VAFRange| {
1123 if range.start == range.end {
1124 if !(range.left_exclusive && self.right_exclusive) {
1125 Some(VAFSpectrum::singleton(range.start))
1126 } else {
1127 None
1128 }
1129 } else {
1130 Some(VAFSpectrum::Range(range))
1131 }
1132 };
1133
1134 (to_spectrum(left), to_spectrum(right))
1135 }
1136
1137 pub(crate) fn overlap(&self, vafs: &VAFRange) -> VAFRangeOverlap {
1138 if self == vafs {
1139 return VAFRangeOverlap::Equal;
1140 }
1141 let range = self;
1142 let other_range = vafs;
1143 let start_is_right_of_start = match (self.left_exclusive, other_range.left_exclusive) {
1144 (true, true) => self.start > other_range.start,
1145 (true, false) => self.start >= other_range.start,
1146 (false, true) => self.start > other_range.start,
1147 (false, false) => self.start > other_range.start,
1148 };
1149 let end_is_left_of_end = match (self.right_exclusive, other_range.right_exclusive) {
1150 (true, true) => range.end < other_range.end,
1151 (true, false) => range.end <= other_range.end,
1152 (false, true) => range.end < other_range.end,
1153 (false, false) => range.end < other_range.end,
1154 };
1155 if (range.end < other_range.start || range.start > other_range.end)
1156 || (range.end <= other_range.start
1157 && (range.right_exclusive || other_range.left_exclusive))
1158 || (range.start >= other_range.end
1159 && (range.left_exclusive || other_range.right_exclusive))
1160 {
1161 VAFRangeOverlap::None
1162 } else {
1163 match (start_is_right_of_start, end_is_left_of_end) {
1164 (true, true) => VAFRangeOverlap::Contained,
1165 (true, false) => VAFRangeOverlap::Start,
1166 (false, true) => VAFRangeOverlap::End,
1167 (false, false) => VAFRangeOverlap::Contains,
1168 }
1169 }
1170 }
1171
1172 pub(crate) fn observable_min(&self, n_obs: usize) -> AlleleFreq {
1173 let min_vaf = if n_obs < 10 || !self.is_adjustment_possible(n_obs) {
1174 self.start
1175 } else {
1176 let obs_count = Self::expected_observation_count(self.start, n_obs);
1177 let adjust_allelefreq = |obs_count: f64| AlleleFreq(obs_count.ceil() / n_obs as f64);
1178
1179 if self.left_exclusive && obs_count % 1.0 == 0.0 {
1180 let adjusted_end = self.observable_max(n_obs);
1183
1184 for offset in &[1.0, 0.0] {
1185 let adjusted_obs_count = obs_count + offset;
1186 let adjusted_start = adjust_allelefreq(adjusted_obs_count);
1187 if *adjusted_start <= 1.0 && adjusted_start <= adjusted_end {
1188 return adjusted_start;
1189 }
1190 }
1191 }
1192
1193 adjust_allelefreq(obs_count)
1194 };
1195 if min_vaf >= self.observable_max(n_obs) {
1196 self.start
1199 } else {
1200 min_vaf
1201 }
1202 }
1203
1204 pub(crate) fn observable_max(&self, n_obs: usize) -> AlleleFreq {
1205 assert!(
1206 *self.end != 0.0,
1207 "bug: observable_max may not be called if end=0.0."
1208 );
1209 if n_obs < 10 || !self.is_adjustment_possible(n_obs) {
1210 self.end
1211 } else {
1212 let mut obs_count = Self::expected_observation_count(self.end, n_obs);
1213 if self.right_exclusive && obs_count % 1.0 == 0.0 {
1214 obs_count -= 1.0;
1215 }
1216 obs_count = obs_count.floor();
1217 if obs_count == 0.0 {
1218 self.end
1220 } else {
1221 AlleleFreq(obs_count.floor() / n_obs as f64)
1222 }
1223 }
1224 }
1225
1226 fn expected_observation_count(freq: AlleleFreq, n_obs: usize) -> f64 {
1227 n_obs as f64 * *freq
1228 }
1229
1230 fn is_adjustment_possible(&self, n_obs: usize) -> bool {
1231 Self::expected_observation_count(self.end - self.start, n_obs) > 1.0
1232 }
1233
1234 pub(crate) fn intersect(&self, other: &Self) -> Self {
1235 let overlap = self.overlap(other);
1236 if overlap == VAFRangeOverlap::None {
1237 return VAFRange::empty();
1238 }
1239
1240 let inner = self.inner.start.max(other.inner.start)..self.inner.end.min(other.inner.end);
1241
1242 let left_exclusive = if self.start > other.start {
1243 self.left_exclusive
1244 } else if self.start < other.start {
1245 other.left_exclusive
1246 } else {
1247 self.left_exclusive || other.left_exclusive
1248 };
1249 let right_exclusive = if self.end < other.end {
1250 self.right_exclusive
1251 } else if self.end > other.end {
1252 other.right_exclusive
1253 } else {
1254 self.right_exclusive || other.right_exclusive
1255 };
1256
1257 VAFRange::builder()
1258 .inner(inner)
1259 .left_exclusive(left_exclusive)
1260 .right_exclusive(right_exclusive)
1261 .build()
1262 }
1263}
1264
1265use auto_ops::impl_op_ex;
1266use ordered_float::NotNan;
1267
1268impl_op_ex!(&|a: &VAFRange, b: &VAFRange| -> VAFRange {
1269 match a.overlap(b) {
1270 VAFRangeOverlap::Contained => a.clone(),
1271 VAFRangeOverlap::Contains => b.clone(),
1272 VAFRangeOverlap::Start => VAFRange {
1273 inner: a.inner.start..b.inner.end,
1274 left_exclusive: a.left_exclusive,
1275 right_exclusive: b.right_exclusive,
1276 },
1277 VAFRangeOverlap::End => VAFRange {
1278 inner: b.inner.start..a.inner.end,
1279 left_exclusive: b.left_exclusive,
1280 right_exclusive: a.right_exclusive,
1281 },
1282 VAFRangeOverlap::Equal => a.clone(),
1283 VAFRangeOverlap::None => VAFRange::empty(),
1284 }
1285});
1286
1287impl_op_ex!(| |a: &VAFRange, b: &VAFRange| -> (VAFRange, Option<VAFRange>) {
1288 match a.overlap(b) {
1289 VAFRangeOverlap::Contained => (b.clone(), None),
1290 VAFRangeOverlap::Contains => (a.clone(), None),
1291 VAFRangeOverlap::Start => (VAFRange {
1292 inner: b.inner.start..a.inner.end,
1293 left_exclusive: b.left_exclusive,
1294 right_exclusive: a.right_exclusive,
1295 }, None),
1296 VAFRangeOverlap::End => (VAFRange {
1297 inner: a.inner.start..b.inner.end,
1298 left_exclusive: a.left_exclusive,
1299 right_exclusive: b.right_exclusive,
1300 }, None),
1301 VAFRangeOverlap::Equal => (a.clone(), None),
1302 VAFRangeOverlap::None => (a.clone(), Some(b.clone()))
1303 }
1304});
1305
1306impl ops::Deref for VAFRange {
1307 type Target = ops::Range<AlleleFreq>;
1308
1309 fn deref(&self) -> &ops::Range<AlleleFreq> {
1310 &self.inner
1311 }
1312}
1313
1314impl Ord for VAFRange {
1315 fn cmp(&self, other: &Self) -> Ordering {
1316 match self.start.cmp(&other.start) {
1317 Ordering::Equal => self.end.cmp(&other.end),
1318 ord => ord,
1319 }
1320 }
1321}
1322
1323impl PartialOrd for VAFRange {
1324 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1325 Some(self.cmp(other))
1326 }
1327}
1328
1329#[derive(Debug, Clone, Default, Derefable)]
1330pub(crate) struct VAFUniverse(#[deref(mutable)] HashSet<VAFSpectrum>);
1331
1332impl VAFUniverse {
1333 pub(crate) fn contains(&self, vaf: AlleleFreq) -> bool {
1334 for atom in &**self {
1335 if atom.contains(vaf) {
1336 return true;
1337 }
1338 }
1339 false
1340 }
1341}
1342
1343impl<'de> Deserialize<'de> for VAFUniverse {
1344 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1345 where
1346 D: de::Deserializer<'de>,
1347 {
1348 deserializer.deserialize_string(VAFUniverseVisitor)
1349 }
1350}
1351
1352struct VAFUniverseVisitor;
1353
1354impl<'de> de::Visitor<'de> for VAFUniverseVisitor {
1355 type Value = VAFUniverse;
1356
1357 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1358 formatter.write_str(
1359 "a disjunction of possible VAFs (see https://varlociraptor.github.io/docs/calling)",
1360 )
1361 }
1362
1363 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1364 where
1365 E: de::Error,
1366 {
1367 let res = FormulaParser::parse(Rule::universe, v);
1368 match res {
1369 Ok(pairs) => {
1370 let mut operands = HashSet::new();
1371 for pair in pairs {
1372 match pair.as_rule() {
1373 Rule::vaf => {
1374 operands.insert(parse_vaf(pair));
1375 }
1376 Rule::vafrange => {
1377 let inner = pair.into_inner();
1378 operands.insert(parse_vafrange(inner));
1379 }
1380 Rule::vafset => {
1381 let inner = pair.into_inner();
1382 operands.insert(parse_vafset(inner));
1383 }
1384 Rule::EOI => (),
1385 _ => unreachable!(),
1386 }
1387 }
1388 Ok(VAFUniverse(operands))
1389 }
1390 Err(e) => {
1391 eprintln!("{}", e);
1392 Err(de::Error::invalid_value(
1393 serde::de::Unexpected::Other("invalid VAF formula"),
1394 &self,
1395 ))
1396 }
1397 }
1398 }
1399}
1400
1401impl<'de> Deserialize<'de> for Formula {
1402 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1403 where
1404 D: de::Deserializer<'de>,
1405 {
1406 deserializer.deserialize_string(FormulaVisitor)
1407 }
1408}
1409
1410struct FormulaVisitor;
1411
1412impl<'de> de::Visitor<'de> for FormulaVisitor {
1413 type Value = Formula;
1414
1415 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1416 formatter
1417 .write_str("a valid VAF formula (see https://varlociraptor.github.io/docs/calling)")
1418 }
1419
1420 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1421 where
1422 E: de::Error,
1423 {
1424 let res = FormulaParser::parse(Rule::formula, v);
1425 match res {
1426 Ok(mut pairs) => {
1427 let pair = pairs.next().expect("bug: expecting formula");
1428 parse_formula(pair)
1429 }
1430 Err(e) => Err(de::Error::invalid_value(
1431 serde::de::Unexpected::Other(&format!("invalid VAF formula:\n{}", e)),
1432 &self,
1433 )),
1434 }
1435 }
1436}
1437
1438fn parse_vaf(pair: Pair<Rule>) -> VAFSpectrum {
1439 let vaf = pair.as_str().parse().expect("bug: unable to parse VAF");
1440 VAFSpectrum::singleton(AlleleFreq(vaf))
1441}
1442
1443fn parse_vafrange(mut inner: Pairs<Rule>) -> VAFSpectrum {
1444 let left = inner.next().unwrap().as_str();
1445 let lower = inner.next().unwrap().as_str().parse().unwrap();
1446 let upper = inner.next().unwrap().as_str().parse().unwrap();
1447 let right = inner.next().unwrap().as_str();
1448
1449 let range = VAFRange {
1450 inner: lower..upper,
1451 left_exclusive: left == "]",
1452 right_exclusive: right == "[",
1453 };
1454
1455 VAFSpectrum::Range(range)
1456}
1457
1458fn parse_vafset(inner: Pairs<Rule>) -> VAFSpectrum {
1459 let set = inner.map(|vaf| vaf.as_str().parse().unwrap()).collect();
1460 VAFSpectrum::Set(set)
1461}
1462
1463fn parse_formula<E>(pair: Pair<Rule>) -> Result<Formula, E>
1464where
1465 E: de::Error,
1466{
1467 Ok(match pair.as_rule() {
1468 Rule::expression => {
1469 let mut inner = pair.into_inner();
1470 let identifier = inner.next().unwrap().as_str();
1471 Formula::Terminal(FormulaTerminal::Expression {
1472 identifier: ExpressionIdentifier(identifier.to_owned()),
1473 negated: false,
1474 })
1475 }
1476 Rule::variant => {
1477 let mut inner = pair.into_inner();
1478 let refbase = inner.next().unwrap().as_str().as_bytes()[0];
1479 let altbase = inner.next().unwrap().as_str().as_bytes()[0];
1480 Formula::Terminal(FormulaTerminal::Variant {
1481 refbase: Iupac(refbase),
1482 altbase: Iupac(altbase),
1483 positive: true,
1484 })
1485 }
1486 Rule::sample_vaf => {
1487 let mut inner = pair.into_inner();
1488 let sample = inner.next().unwrap().as_str().to_owned();
1489 Formula::Terminal(FormulaTerminal::Atom {
1490 sample,
1491 vafs: parse_vaf(inner.next().unwrap()),
1492 })
1493 }
1494 Rule::sample_vafrange => {
1495 let mut inner = pair.into_inner();
1496 let sample = inner.next().unwrap().as_str().to_owned();
1497 Formula::Terminal(FormulaTerminal::Atom {
1498 sample,
1499 vafs: parse_vafrange(inner.next().unwrap().into_inner()),
1500 })
1501 }
1502 Rule::sample_vafset => {
1503 let mut inner = pair.into_inner();
1504 let sample = inner.next().unwrap().as_str().to_owned();
1505 Formula::Terminal(FormulaTerminal::Atom {
1506 sample,
1507 vafs: parse_vafset(inner.next().unwrap().into_inner()),
1508 })
1509 }
1510 Rule::conjunction => {
1511 let inner = pair.into_inner();
1512 let mut operands = Vec::new();
1513 for operand in inner {
1514 operands.push(parse_formula(operand)?);
1515 }
1516 Formula::Conjunction { operands }
1517 }
1518 Rule::disjunction => {
1519 let inner = pair.into_inner();
1520 let mut operands = Vec::new();
1521 for operand in inner {
1522 operands.push(parse_formula(operand)?);
1523 }
1524 Formula::Disjunction { operands }
1525 }
1526 Rule::negation => {
1527 let mut inner = pair.into_inner();
1528 Formula::Negation {
1529 operand: Box::new(parse_formula(inner.next().unwrap())?),
1530 }
1531 }
1532 Rule::cmp => {
1533 let mut inner = pair.into_inner();
1534 let sample_a = inner.next().unwrap().as_str().to_owned();
1535 let op = parse_cmp_op(inner.next().unwrap());
1536 let sample_b = inner.next().unwrap().as_str().to_owned();
1537 Formula::Terminal(FormulaTerminal::Log2FoldChange {
1538 sample_a,
1539 sample_b,
1540 predicate: Log2FoldChangePredicate {
1541 comparison: op,
1542 value: NotNan::new(0.0).unwrap(),
1543 },
1544 })
1545 }
1546 Rule::lfc => {
1547 let mut inner = pair.into_inner();
1548 let sample_a = inner.next().unwrap().as_str().to_owned();
1549 let sample_b = inner.next().unwrap().as_str().to_owned();
1550 let operand = parse_cmp_op(inner.next().unwrap());
1551 let value = inner.next().unwrap().as_str().parse().unwrap();
1552 let predicate = Log2FoldChangePredicate {
1553 comparison: operand,
1554 value,
1555 };
1556 Formula::Terminal(FormulaTerminal::Log2FoldChange {
1557 sample_a,
1558 sample_b,
1559 predicate,
1560 })
1561 }
1562 Rule::false_literal => Formula::Terminal(FormulaTerminal::False),
1563 Rule::true_literal => Formula::Terminal(FormulaTerminal::True),
1564 Rule::cmp_ops => unreachable!(),
1565 Rule::formula => unreachable!(),
1566 Rule::subformula => unreachable!(),
1567 Rule::vafdef => unreachable!(),
1568 Rule::bound => unreachable!(),
1569 Rule::universe => unreachable!(),
1570 Rule::vafrange => unreachable!(),
1571 Rule::vafset => unreachable!(),
1572 Rule::identifier => unreachable!(),
1573 Rule::number => unreachable!(),
1574 Rule::vaf => unreachable!(),
1575 Rule::sample_vafdef => unreachable!(),
1576 Rule::EOI => unreachable!(),
1577 Rule::WHITESPACE => unreachable!(),
1578 Rule::COMMENT => unreachable!(),
1579 Rule::iupac => unreachable!(),
1580 })
1581}
1582
1583fn parse_cmp_op(pair: Pair<Rule>) -> ComparisonOperator {
1584 match pair.as_str() {
1585 "==" => ComparisonOperator::Equal,
1586 "!=" => ComparisonOperator::NotEqual,
1587 ">" => ComparisonOperator::Greater,
1588 ">=" => ComparisonOperator::GreaterEqual,
1589 "<" => ComparisonOperator::Less,
1590 "<=" => ComparisonOperator::LessEqual,
1591 _ => panic!(),
1592 }
1593}
1594
1595#[cfg(test)]
1596mod test {
1597 use crate::grammar::Scenario;
1598 use crate::grammar::{Formula, VAFRange};
1599 use crate::variants::model::AlleleFreq;
1600
1601 #[test]
1602 fn test_vaf_range_overlap() {
1603 let r1 = VAFRange {
1604 inner: AlleleFreq(0.0)..AlleleFreq(0.7),
1605 left_exclusive: false,
1606 right_exclusive: true,
1607 };
1608 let r2 = VAFRange {
1609 inner: AlleleFreq(0.3)..AlleleFreq(1.0),
1610 left_exclusive: false,
1611 right_exclusive: false,
1612 };
1613 let expected = VAFRange {
1614 inner: AlleleFreq(0.3)..AlleleFreq(0.7),
1615 left_exclusive: false,
1616 right_exclusive: true,
1617 };
1618 assert_eq!(expected, r1 & r2);
1619 }
1620
1621 #[test]
1622 fn test_merge_atoms() {
1623 let scenario: Scenario = serde_yaml::from_str(
1624 r#"
1625species:
1626 heterozygosity: 0.001
1627 germline-mutation-rate: 1e-3
1628 ploidy:
1629 male:
1630 all: 2
1631 X: 1
1632 Y: 1
1633 female:
1634 all: 2
1635 X: 2
1636 Y: 0
1637 genome-size: 3.5e9
1638
1639samples:
1640 tumor:
1641 sex: female
1642 somatic-effective-mutation-rate: 1e-6
1643 inheritance:
1644 clonal:
1645 from: normal
1646 somatic: false
1647 contamination:
1648 by: normal
1649 fraction: 0.11
1650 normal:
1651 sex: female
1652 somatic-effective-mutation-rate: 1e-10
1653
1654expressions:
1655 loh: "normal:0.5 & tumor:1.0"
1656 loh_or_amplification: "normal:0.5 & tumor:[0.9,1.0["
1657events:
1658 germline: "(normal:0.5 | normal:1.0) & !($loh | $loh_or_amplification)"
1659 expected: "(normal:0.5 & tumor:{0.0, 0.5}) | (normal:0.5 & tumor:]0.0,0.5[) | (normal:0.5 & tumor:]0.5,0.9[) | normal:1.0""#,
1660 )
1661 .unwrap();
1662 let expected = scenario.events["expected"]
1663 .clone()
1664 .normalize(&scenario, "all")
1665 .unwrap();
1666 let germline = scenario.events["germline"]
1667 .clone()
1668 .normalize(&scenario, "all")
1669 .unwrap();
1670 assert_eq!(germline, expected);
1671 }
1672
1673 #[test]
1674 fn test_range_conjunction() {
1675 let scenario: Scenario = serde_yaml::from_str(
1676 r#"samples:
1677 normal:
1678 resolution: 0.01
1679 universe: "[0.0,1.0]"
1680events:
1681 full: "normal:[0.0,1.0]"
1682 part1: "normal:[0.0,0.7]"
1683 part2: "normal:[0.3,1.0]"
1684 expected: "normal:[0.3,0.7]""#,
1685 )
1686 .unwrap();
1687 let expected = scenario.events["expected"].clone();
1688 let full = scenario.events["full"].clone();
1689 let part1 = scenario.events["part1"].clone();
1690 let part2 = scenario.events["part2"].clone();
1691 let conjunction = Formula::Conjunction {
1692 operands: vec![part1, part2],
1693 }
1694 .normalize(&scenario, "all")
1695 .unwrap();
1696 assert_eq!(conjunction, expected.normalize(&scenario, "all").unwrap());
1697 assert_ne!(conjunction, full.normalize(&scenario, "all").unwrap());
1698 }
1699
1700 #[test]
1701 fn test_nested_range_disjunction() {
1702 let scenario: Scenario = serde_yaml::from_str(
1703 r#"samples:
1704 normal:
1705 resolution: 0.01
1706 universe: "[0.0,1.0]"
1707events:
1708 full: "(normal:[0.0, 0.25] | normal:[0.5,0.75]) | (normal:[0.25,0.5] | normal:[0.75,1.0]) | normal:[0.1,0.4] | normal:0.1"
1709 expected: "normal:[0.0,1.0]""#,
1710 )
1711 .unwrap();
1712 let expected = scenario.events["expected"].clone();
1713 let full = scenario.events["full"].clone();
1714 let full = full.normalize(&scenario, "all").unwrap();
1715 assert_eq!(full, expected.normalize(&scenario, "all").unwrap());
1716 }
1717
1718 #[test]
1719 fn test_two_separate_range_disjunctions() {
1720 let scenario: Scenario = serde_yaml::from_str(
1721 r#"samples:
1722 normal:
1723 resolution: 0.01
1724 universe: "[0.0,1.0]"
1725events:
1726 full: "(normal:[0.0, 0.25] | normal:[0.5,0.6]) | ((normal:[0.25,0.5] | normal:[0.7,0.9]) | normal:[0.9,1.0]) | normal:]0.8,0.9[ | normal:0.75"
1727 expected: "normal:[0.0,0.6] | normal:[0.7,1.0]""#,
1728 )
1729 .unwrap();
1730 let expected = scenario.events["expected"].clone();
1731 let full = scenario.events["full"].clone();
1732 let full = full.normalize(&scenario, "all").unwrap();
1733 assert_eq!(full, expected.normalize(&scenario, "all").unwrap());
1734 }
1735}