Skip to main content

varlociraptor/grammar/
formula.rs

1use std::cmp::{Ord, Ordering, PartialOrd};
2use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
3use std::fmt;
4use std::ops;
5
6use anyhow::Result;
7use boolean_expression::Expr;
8use itertools::Itertools;
9use pest::iterators::{Pair, Pairs};
10use pest::Parser;
11use serde::de;
12use serde::Deserialize;
13
14use crate::errors;
15use crate::grammar::{ExpressionIdentifier, Scenario};
16use crate::utils::comparison::ComparisonOperator;
17use crate::utils::log2_fold_change::Log2FoldChangePredicate;
18use crate::variants::model::AlleleFreq;
19
20#[derive(Shrinkwrap, Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
21pub(crate) struct Iupac(u8);
22
23impl Iupac {
24    pub(crate) fn contains(&self, base: u8) -> bool {
25        if base == **self {
26            return true;
27        }
28        match **self {
29            b'R' if base == b'A' || base == b'G' => true,
30            b'Y' if base == b'C' || base == b'T' => true,
31            b'S' if base == b'G' || base == b'C' => true,
32            b'W' if base == b'A' || base == b'T' => true,
33            b'K' if base == b'G' || base == b'T' => true,
34            b'M' if base == b'A' || base == b'C' => true,
35            b'B' if base == b'C' || base == b'G' || base == b'T' => true,
36            b'D' if base == b'A' || base == b'G' || base == b'T' => true,
37            b'H' if base == b'A' || base == b'C' || base == b'T' => true,
38            b'V' if base == b'A' || base == b'C' || base == b'G' => true,
39            b'N' => true,
40            _ => false,
41        }
42    }
43}
44
45impl std::fmt::Display for Iupac {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        write!(f, "{}", std::char::from_u32(**self as u32).unwrap())
48    }
49}
50
51#[derive(Parser)]
52#[grammar = "grammar/formula.pest"]
53pub(crate) struct FormulaParser;
54
55#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]
56pub(crate) enum Formula {
57    Conjunction { operands: Vec<Formula> },
58    Disjunction { operands: Vec<Formula> },
59    Negation { operand: Box<Formula> },
60    Terminal(FormulaTerminal),
61}
62
63impl From<NormalizedFormula> for Formula {
64    fn from(formula: NormalizedFormula) -> Self {
65        fn from_normalized(formula: NormalizedFormula) -> Formula {
66            match formula {
67                NormalizedFormula::Conjunction { operands } => Formula::Conjunction {
68                    operands: operands.into_iter().map(from_normalized).collect(),
69                },
70                NormalizedFormula::Disjunction { operands } => Formula::Disjunction {
71                    operands: operands.into_iter().map(from_normalized).collect(),
72                },
73                NormalizedFormula::Atom { sample, vafs } => {
74                    Formula::Terminal(FormulaTerminal::Atom { sample, vafs })
75                }
76                NormalizedFormula::Variant {
77                    altbase,
78                    positive,
79                    refbase,
80                } => Formula::Terminal(FormulaTerminal::Variant {
81                    altbase,
82                    positive,
83                    refbase,
84                }),
85                NormalizedFormula::False => Formula::Terminal(FormulaTerminal::False),
86                NormalizedFormula::True => Formula::Terminal(FormulaTerminal::True),
87                NormalizedFormula::Log2FoldChange {
88                    sample_a,
89                    sample_b,
90                    predicate,
91                } => Formula::Terminal(FormulaTerminal::Log2FoldChange {
92                    sample_a,
93                    sample_b,
94                    predicate,
95                }),
96            }
97        }
98        from_normalized(formula)
99    }
100}
101
102#[derive(PartialEq, Eq, Clone, Debug, Hash, PartialOrd, Ord)]
103pub(crate) enum FormulaTerminal {
104    Atom {
105        sample: String,
106        vafs: VAFSpectrum,
107    },
108    Variant {
109        positive: bool,
110        refbase: Iupac,
111        altbase: Iupac,
112    },
113    Expression {
114        identifier: ExpressionIdentifier,
115        negated: bool,
116    },
117    Log2FoldChange {
118        sample_a: String,
119        sample_b: String,
120        predicate: Log2FoldChangePredicate,
121    },
122    False,
123    True,
124}
125
126impl FormulaTerminal {
127    fn merge_conjunctions(&mut self, other: &FormulaTerminal) {
128        match (self, other) {
129            (
130                FormulaTerminal::Atom {
131                    sample: sample_a,
132                    vafs: ref mut vafs_a,
133                },
134                FormulaTerminal::Atom {
135                    sample: sample_b,
136                    vafs: vafs_b,
137                },
138            ) if sample_a == sample_b => match (&vafs_a, &vafs_b) {
139                (VAFSpectrum::Range(a), VAFSpectrum::Range(b)) => {
140                    let intersection = a & b;
141                    if intersection.is_singleton() {
142                        *vafs_a = VAFSpectrum::singleton(a.start)
143                    } else {
144                        *vafs_a = VAFSpectrum::Range(a & b)
145                    }
146                }
147                (VAFSpectrum::Range(a), VAFSpectrum::Set(b)) => {
148                    *vafs_a = VAFSpectrum::Set(
149                        b.iter().filter(|vaf| a.contains(**vaf)).cloned().collect(),
150                    );
151                }
152                (VAFSpectrum::Set(a), VAFSpectrum::Range(b)) => {
153                    *vafs_a = VAFSpectrum::Set(
154                        a.iter().filter(|vaf| b.contains(**vaf)).cloned().collect(),
155                    );
156                }
157                (VAFSpectrum::Set(a), VAFSpectrum::Set(b)) => {
158                    *vafs_a = VAFSpectrum::Set(a.intersection(b).cloned().collect());
159                }
160            },
161            _ => {
162                panic!("bug: trying to merge FormulaTerminals that are not both atoms and for the same sample")
163            }
164        }
165    }
166
167    /// Try building the union of two instances of VAFSpectrum.
168    /// Returns None in case ranges do not overlap or a set is not fully contained within a range,
169    /// otherwise returns the union.
170    fn try_merge_disjunction(&self, other: &FormulaTerminal) -> Option<VAFSpectrum> {
171        match (self, other) {
172            (
173                FormulaTerminal::Atom {
174                    sample: sample_a,
175                    vafs: vafs_a,
176                },
177                FormulaTerminal::Atom {
178                    sample: sample_b,
179                    vafs: vafs_b,
180                },
181            ) if sample_a == sample_b => match (&vafs_a, &vafs_b) {
182                (VAFSpectrum::Range(a), VAFSpectrum::Range(b)) => match a.overlap(b) {
183                    VAFRangeOverlap::None => None,
184                    _ => Some(VAFSpectrum::Range((a | b).0)),
185                },
186                (VAFSpectrum::Range(a), VAFSpectrum::Set(b)) => {
187                    if b.iter().all(|v| a.contains(*v)) {
188                        Some(VAFSpectrum::Range(a.clone()))
189                    } else {
190                        None
191                    }
192                }
193                (VAFSpectrum::Set(a), VAFSpectrum::Range(b)) => {
194                    if a.iter().all(|v| b.contains(*v)) {
195                        Some(VAFSpectrum::Range(b.clone()))
196                    } else {
197                        None
198                    }
199                }
200                (VAFSpectrum::Set(a), VAFSpectrum::Set(b)) => {
201                    Some(VAFSpectrum::Set(a.union(b).cloned().collect()))
202                }
203            },
204            _ => {
205                panic!("bug: trying to merge FormulaTerminals that are not both atoms and for the same sample")
206            }
207        }
208    }
209}
210
211impl std::fmt::Display for Formula {
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        let fmt_operand = |formula: &Formula| match formula {
214            Formula::Terminal(_) => format!("{}", formula),
215            Formula::Negation { operand } if operand.is_terminal() => format!("{}", formula),
216            _ => format!("({})", formula),
217        };
218
219        let formatted = match self {
220            Formula::Terminal(FormulaTerminal::False) => "false".to_owned(),
221            Formula::Terminal(FormulaTerminal::True) => "true".to_owned(),
222            Formula::Terminal(FormulaTerminal::Atom {
223                sample,
224                vafs: VAFSpectrum::Set(vafs),
225            }) => match vafs.len() {
226                1 => format!("{}:{}", sample, vafs.iter().next().unwrap()),
227                x if x > 1 => format!(
228                    "{}:{{{}}}",
229                    sample,
230                    vafs.iter().map(|vaf| format!("{:.3}", vaf)).join(", "),
231                ),
232                _ => "false".to_owned(),
233            },
234            Formula::Terminal(FormulaTerminal::Atom {
235                sample,
236                vafs: VAFSpectrum::Range(vafrange),
237            }) => {
238                let left_bracket = if vafrange.left_exclusive { ']' } else { '[' };
239                let right_bracket = if vafrange.right_exclusive { '[' } else { ']' };
240                format!(
241                    "{}:{}{},{}{}",
242                    sample, left_bracket, vafrange.start, vafrange.end, right_bracket
243                )
244            }
245            Formula::Terminal(FormulaTerminal::Variant {
246                positive,
247                refbase,
248                altbase,
249            }) => format!(
250                "{negate}({refbase}>{altbase})",
251                negate = if *positive { "" } else { "!" },
252                refbase = refbase,
253                altbase = altbase,
254            ),
255            Formula::Terminal(FormulaTerminal::Expression {
256                identifier,
257                negated,
258            }) => format!(
259                "{negate}${expr}",
260                negate = if *negated { "!" } else { "" },
261                expr = **identifier
262            ),
263            Formula::Negation { operand } => format!("!{operand}", operand = fmt_operand(operand)),
264            Formula::Conjunction { operands } => operands.iter().map(&fmt_operand).join(" & "),
265            Formula::Disjunction { operands } => operands.iter().map(&fmt_operand).join(" | "),
266            Formula::Terminal(FormulaTerminal::Log2FoldChange {
267                sample_a,
268                sample_b,
269                predicate,
270            }) => {
271                format!("l2fc({}, {}) {}", sample_a, sample_b, predicate)
272            }
273        };
274        write!(f, "{}", formatted)
275    }
276}
277
278impl From<Expr<FormulaTerminal>> for Formula {
279    fn from(expr: Expr<FormulaTerminal>) -> Self {
280        match expr {
281            Expr::Terminal(terminal) => Formula::Terminal(terminal),
282            Expr::Not(a) => Formula::Negation {
283                operand: Box::new((*a).into()),
284            },
285            Expr::And(a, b) => match ((*a).into(), (*b).into()) {
286                (
287                    Formula::Conjunction {
288                        operands: mut left_operands,
289                    },
290                    Formula::Conjunction {
291                        operands: right_operands,
292                    },
293                ) => {
294                    left_operands.extend(right_operands);
295                    Formula::Conjunction {
296                        operands: left_operands,
297                    }
298                }
299                (
300                    Formula::Conjunction {
301                        operands: mut left_operands,
302                    },
303                    right,
304                ) => {
305                    left_operands.push(right);
306                    Formula::Conjunction {
307                        operands: left_operands,
308                    }
309                }
310                (
311                    left,
312                    Formula::Conjunction {
313                        operands: mut right_operands,
314                    },
315                ) => {
316                    right_operands.push(left);
317                    Formula::Conjunction {
318                        operands: right_operands,
319                    }
320                }
321                (left, right) => Formula::Conjunction {
322                    operands: vec![left, right],
323                },
324            },
325            Expr::Or(a, b) => match ((*a).into(), (*b).into()) {
326                (
327                    Formula::Disjunction {
328                        operands: mut left_operands,
329                    },
330                    Formula::Disjunction {
331                        operands: right_operands,
332                    },
333                ) => {
334                    left_operands.extend(right_operands);
335                    Formula::Disjunction {
336                        operands: left_operands,
337                    }
338                }
339                (
340                    Formula::Disjunction {
341                        operands: mut left_operands,
342                    },
343                    right,
344                ) => {
345                    left_operands.push(right);
346                    Formula::Disjunction {
347                        operands: left_operands,
348                    }
349                }
350                (
351                    left,
352                    Formula::Disjunction {
353                        operands: mut right_operands,
354                    },
355                ) => {
356                    right_operands.push(left);
357                    Formula::Disjunction {
358                        operands: right_operands,
359                    }
360                }
361                (left, right) => Formula::Disjunction {
362                    operands: vec![left, right],
363                },
364            },
365            Expr::Const(false) => Formula::Terminal(FormulaTerminal::False),
366            Expr::Const(true) => Formula::Terminal(FormulaTerminal::True),
367        }
368    }
369}
370
371impl From<Formula> for Expr<FormulaTerminal> {
372    fn from(val: Formula) -> Self {
373        if val.is_terminal_false() {
374            return Expr::Const(false);
375        }
376        if val.is_terminal_true() {
377            return Expr::Const(true);
378        }
379        match val {
380            Formula::Terminal(terminal) => Expr::Terminal(terminal),
381            Formula::Conjunction { mut operands } => {
382                let mut expr = operands.pop().unwrap().into();
383                for operand in operands {
384                    expr &= operand.into();
385                }
386                expr
387            }
388            Formula::Disjunction { mut operands } => {
389                let mut expr = operands.pop().unwrap().into();
390                for operand in operands {
391                    expr |= operand.into();
392                }
393                expr
394            }
395            Formula::Negation { operand } => Expr::Not(Box::new((*operand).into())),
396        }
397    }
398}
399
400impl Formula {
401    pub(crate) fn is_terminal(&self) -> bool {
402        matches!(self, Formula::Terminal(_))
403    }
404
405    pub(crate) fn to_terminal(&self) -> Option<&FormulaTerminal> {
406        if let Formula::Terminal(terminal) = self {
407            Some(terminal)
408        } else {
409            None
410        }
411    }
412
413    /// Return true if this formula is a terminal that is always false (an empty VAF set).
414    pub(crate) fn is_terminal_false(&self) -> bool {
415        match self {
416            Formula::Terminal(FormulaTerminal::Atom { vafs, .. }) => vafs.is_empty(),
417            Formula::Terminal(FormulaTerminal::False) => true,
418            _ => false,
419        }
420    }
421
422    /// Return true if this formula is a terminal that is always true (an full VAF range or "true").
423    pub(crate) fn is_terminal_true(&self) -> bool {
424        match self {
425            Formula::Terminal(FormulaTerminal::Atom { vafs, .. }) => vafs.is_complete(),
426            Formula::Terminal(FormulaTerminal::True) => true,
427            _ => false,
428        }
429    }
430
431    pub(crate) fn into_terminal(self) -> Option<FormulaTerminal> {
432        if let Formula::Terminal(terminal) = self {
433            Some(terminal)
434        } else {
435            None
436        }
437    }
438
439    /// Generate formula representing the absent event
440    pub(crate) fn absent(scenario: &Scenario) -> Self {
441        Formula::Conjunction {
442            operands: scenario
443                .samples()
444                .keys()
445                .map(|sample| {
446                    Formula::Terminal(FormulaTerminal::Atom {
447                        sample: sample.to_owned(),
448                        vafs: VAFSpectrum::singleton(AlleleFreq(0.0)),
449                    })
450                })
451                .collect(),
452        }
453    }
454
455    fn sort(&mut self) {
456        match self {
457            Formula::Conjunction { operands } | Formula::Disjunction { operands } => {
458                for operand in operands.iter_mut() {
459                    operand.sort();
460                }
461                operands.sort();
462
463                operands.sort_by_key(|f| match f {
464                    Formula::Terminal(FormulaTerminal::Log2FoldChange { .. }) => 0,
465
466                    _ => 1,
467                });
468            }
469            _ => (),
470        }
471    }
472
473    pub(crate) fn normalize(&self, scenario: &Scenario, contig: &str) -> Result<NormalizedFormula> {
474        // METHOD: Expand all expressions and move negations down to atoms. Then, simplify via BDDs,
475        // merge atoms (VAF intervals) of same sample in the same conjuction, and simplify again.
476        let mut simplified = self
477            .expand_expressions(scenario)?
478            .apply_negations(scenario, contig)?
479            .simplify()
480            .merge_atoms()
481            .simplify();
482        simplified.strip_false();
483        simplified.sort();
484        Ok(simplified.to_normalized_formula())
485    }
486
487    fn expand_expressions(&self, scenario: &Scenario) -> Result<Self> {
488        Ok(match self {
489            Formula::Conjunction { operands } => Formula::Conjunction {
490                operands: operands
491                    .iter()
492                    .map(|operand| operand.expand_expressions(scenario))
493                    .collect::<Result<Vec<_>>>()?,
494            },
495            Formula::Disjunction { operands } => Formula::Disjunction {
496                operands: operands
497                    .iter()
498                    .map(|operand| operand.expand_expressions(scenario))
499                    .collect::<Result<Vec<_>>>()?,
500            },
501            Formula::Negation { operand } => Formula::Negation {
502                operand: Box::new(operand.expand_expressions(scenario)?),
503            },
504            Formula::Terminal(terminal) => {
505                if let FormulaTerminal::Expression {
506                    identifier,
507                    negated,
508                } = terminal
509                {
510                    if let Some(formula) = scenario.expressions().get(identifier) {
511                        if *negated {
512                            Formula::Negation {
513                                operand: Box::new(formula.clone()),
514                            }
515                        } else {
516                            formula.clone()
517                        }
518                    } else {
519                        return Err(errors::Error::UndefinedExpression {
520                            identifier: identifier.to_string(),
521                        }
522                        .into());
523                    }
524                } else {
525                    Formula::Terminal(terminal.clone())
526                }
527            }
528        })
529    }
530
531    fn to_normalized_formula(&self) -> NormalizedFormula {
532        match self {
533            Formula::Terminal(FormulaTerminal::Atom { sample, vafs }) => NormalizedFormula::Atom {
534                sample: sample.to_owned(),
535                vafs: vafs.to_owned(),
536            },
537            Formula::Conjunction { operands } => NormalizedFormula::Conjunction {
538                operands: operands.iter().map(|o| o.to_normalized_formula()).collect(),
539            },
540            Formula::Disjunction { operands } => NormalizedFormula::Disjunction {
541                operands: operands.iter().map(|o| o.to_normalized_formula()).collect(),
542            },
543            &Formula::Terminal(FormulaTerminal::Variant {
544                positive,
545                refbase,
546                altbase,
547            }) => NormalizedFormula::Variant {
548                positive,
549                refbase,
550                altbase,
551            },
552            &Formula::Terminal(FormulaTerminal::Expression {
553                identifier: _,
554                negated: _,
555            }) => {
556                panic!("bug: expressions should be expanded before normalization");
557            }
558            Formula::Negation { operand: _ } => {
559                panic!("bug: negations should have been applied before normalization")
560            }
561            Formula::Terminal(FormulaTerminal::False) => NormalizedFormula::False,
562            Formula::Terminal(FormulaTerminal::True) => NormalizedFormula::True,
563            Formula::Terminal(FormulaTerminal::Log2FoldChange {
564                sample_a,
565                sample_b,
566                predicate,
567            }) => NormalizedFormula::Log2FoldChange {
568                sample_a: sample_a.into(),
569                sample_b: sample_b.into(),
570                predicate: *predicate,
571            },
572        }
573    }
574
575    fn merge_atoms(&self) -> Self {
576        let group_operands = |operands: &Vec<Formula>| -> HashMap<Option<String>, Vec<Formula>> {
577            operands.iter().cloned().into_group_map_by(|operand| {
578                if let Formula::Terminal(FormulaTerminal::Atom { sample, vafs: _ }) = operand {
579                    Some(sample.to_owned())
580                } else {
581                    // group all non-atoms together
582                    None
583                }
584            })
585        };
586        match self {
587            Formula::Conjunction { operands } => {
588                // collect statements per sample
589                let mut grouped_operands = group_operands(operands);
590
591                // merge atoms of the same sample
592                for (sample, statements) in &mut grouped_operands {
593                    if let Some(_sample) = sample {
594                        let mut merged_statement =
595                            statements.pop().unwrap().into_terminal().unwrap();
596                        for statement in statements.iter() {
597                            merged_statement.merge_conjunctions(statement.to_terminal().unwrap());
598                        }
599                        // Return false overall if any operand contains an empty spectrum (as this will evaluate to
600                        // a probability of zero).
601                        if let FormulaTerminal::Atom { vafs, .. } = &merged_statement {
602                            if vafs.is_empty() {
603                                return Formula::Terminal(FormulaTerminal::False);
604                            }
605                        }
606
607                        *statements = vec![Formula::Terminal(merged_statement)];
608                    } else {
609                        // non-atoms, apply recursively
610                        for statement in statements {
611                            *statement = statement.merge_atoms();
612                        }
613                    }
614                }
615
616                let operands = grouped_operands.into_values().flatten().collect();
617
618                Formula::Conjunction { operands }
619            }
620            Formula::Disjunction { operands } => {
621                // collect statements per sample
622                let mut grouped_operands = group_operands(operands);
623
624                // merge atoms of the same sample
625                for (sample, statements) in &mut grouped_operands {
626                    if let Some(sample) = sample {
627                        // Sort by start position of VAFRange or minimum of VAFSet.
628                        // The idea is to try to keep merging neighbouring ranges/sets, greedily.
629                        statements.sort_unstable_by_key(|stmt| {
630                            if let Formula::Terminal(FormulaTerminal::Atom { sample: _, vafs }) =
631                                stmt
632                            {
633                                match vafs {
634                                    VAFSpectrum::Set(s) => s.iter().min().copied(),
635                                    VAFSpectrum::Range(r) => Some(r.start),
636                                }
637                            } else {
638                                None
639                            }
640                        });
641                        let mut merged_statements = vec![];
642                        // Pick off the first terminal from the list of statements..
643                        let mut current_statement = statements.remove(0).into_terminal().unwrap();
644                        for statement in statements.iter() {
645                            // then look at the (current) next one if the merge was
646                            // successful. Otherwise, `other_statement` will be the last statement
647                            // that could *not* be merged with `current_statement`
648                            let other_statement = statement.to_terminal().unwrap();
649
650                            // Try merging (i.e. unionise!) the two spectra
651                            if let Some(merged) =
652                                current_statement.try_merge_disjunction(other_statement)
653                            {
654                                // if it succeeds, use the merge result as `current_statement`
655                                current_statement = FormulaTerminal::Atom {
656                                    sample: sample.into(),
657                                    vafs: merged,
658                                };
659                            } else {
660                                // if it fails, stash our `current_statement` and replace it with
661                                // the other statement which couldn't be merged with
662                                // `current_statement`.
663                                merged_statements.push(Formula::Terminal(current_statement));
664                                current_statement = other_statement.clone();
665                            }
666                        }
667                        // don't forget to push the last result.
668                        merged_statements.push(Formula::Terminal(current_statement));
669                        *statements = merged_statements;
670                    } else {
671                        // non-atoms, apply recursively
672                        for statement in statements {
673                            *statement = statement.merge_atoms();
674                        }
675                    }
676                }
677                let operands = grouped_operands
678                    .into_iter()
679                    .flat_map(|(_, statements)| statements)
680                    .collect();
681
682                Formula::Disjunction { operands }
683            }
684            Formula::Negation { operand } => Formula::Negation {
685                operand: Box::new(operand.merge_atoms()),
686            },
687            terminal => terminal.clone(),
688        }
689    }
690
691    fn strip_false(&mut self) {
692        if let Formula::Disjunction { ref mut operands } = self {
693            *operands = operands
694                .iter()
695                .filter(|operand| {
696                    if operand.is_terminal_false() {
697                        false
698                    } else if let Formula::Conjunction { operands } = operand {
699                        !operands.iter().any(|operand| operand.is_terminal_false())
700                    } else {
701                        true
702                    }
703                })
704                .cloned()
705                .collect();
706        }
707    }
708
709    /// Simplify formula via a BDD
710    fn simplify(self) -> Self {
711        let expr: Expr<FormulaTerminal> = self.into();
712        let simplified = expr.simplify_via_bdd();
713        simplified.into()
714    }
715
716    /// Negate formula.
717    fn negate(&self, scenario: &Scenario, contig: &str) -> Result<Self> {
718        Ok(match self {
719            Formula::Terminal(FormulaTerminal::False) => Formula::Terminal(FormulaTerminal::True),
720            Formula::Terminal(FormulaTerminal::True) => Formula::Terminal(FormulaTerminal::False),
721            Formula::Conjunction { operands } => Formula::Disjunction {
722                operands: operands
723                    .iter()
724                    .map(|o| o.negate(scenario, contig))
725                    .collect::<Result<Vec<Formula>>>()?,
726            },
727            Formula::Disjunction { operands } => Formula::Conjunction {
728                operands: operands
729                    .iter()
730                    .map(|o| o.negate(scenario, contig))
731                    .collect::<Result<Vec<Formula>>>()?,
732            },
733            Formula::Negation { operand } => operand.as_ref().clone(),
734            &Formula::Terminal(FormulaTerminal::Variant {
735                positive,
736                refbase,
737                altbase,
738            }) => Formula::Terminal(FormulaTerminal::Variant {
739                positive: !positive,
740                refbase,
741                altbase,
742            }),
743            Formula::Terminal(FormulaTerminal::Expression {
744                identifier,
745                negated,
746            }) => Formula::Terminal(FormulaTerminal::Expression {
747                identifier: identifier.clone(),
748                negated: !negated,
749            }),
750            Formula::Terminal(FormulaTerminal::Log2FoldChange {
751                sample_a,
752                sample_b,
753                predicate,
754            }) => Formula::Terminal(FormulaTerminal::Log2FoldChange {
755                sample_a: sample_a.into(),
756                sample_b: sample_b.into(),
757                predicate: !*predicate,
758            }),
759            Formula::Terminal(FormulaTerminal::Atom { sample, vafs }) => {
760                let universe = scenario
761                    .samples()
762                    .get(sample)
763                    .ok_or_else(|| errors::Error::InvalidSampleName {
764                        name: sample.to_owned(),
765                    })?
766                    .contig_universe(contig, scenario.species())?;
767
768                let mut disjunction = Vec::new();
769                match vafs {
770                    VAFSpectrum::Set(vafs) => {
771                        let mut uvaf_stack: VecDeque<_> = universe.iter().cloned().collect();
772                        while let Some(uvafs) = uvaf_stack.pop_front() {
773                            match uvafs {
774                                VAFSpectrum::Set(uvafs) => {
775                                    let difference: BTreeSet<_> =
776                                        uvafs.difference(vafs).cloned().collect();
777                                    if !difference.is_empty() {
778                                        disjunction.push(VAFSpectrum::Set(difference));
779                                    }
780                                }
781                                VAFSpectrum::Range(urange) => {
782                                    for &vaf in vafs {
783                                        if urange.contains(vaf) {
784                                            let (left_urange, right_urange) = urange.split_at(vaf);
785                                            if let Some(right_urange) = right_urange {
786                                                uvaf_stack.push_back(right_urange);
787                                            }
788                                            if let Some(left_urange) = left_urange {
789                                                disjunction.push(left_urange);
790                                            }
791                                        } else {
792                                            disjunction.push(VAFSpectrum::Range(urange.clone()));
793                                        }
794                                    }
795                                }
796                            }
797                        }
798                    }
799                    VAFSpectrum::Range(range) => {
800                        for uvafs in universe.iter() {
801                            match uvafs {
802                                VAFSpectrum::Set(uvafs) => {
803                                    let set: BTreeSet<_> = uvafs
804                                        .iter()
805                                        .filter(|uvaf| !range.contains(**uvaf))
806                                        .cloned()
807                                        .collect();
808                                    if !set.is_empty() {
809                                        disjunction.push(VAFSpectrum::Set(set));
810                                    }
811                                }
812                                VAFSpectrum::Range(urange) => match range.overlap(urange) {
813                                    VAFRangeOverlap::Equal => {
814                                        // range is already covered entirely, nothing to add
815                                    }
816                                    VAFRangeOverlap::Contained => {
817                                        if let Some(left) = urange.split_at(range.start).0 {
818                                            disjunction.push(left);
819                                        }
820                                        if let Some(right) = urange.split_at(range.end).1 {
821                                            disjunction.push(right);
822                                        }
823                                    }
824                                    VAFRangeOverlap::End => {
825                                        if let Some(spec) = urange.split_at(range.end).1 {
826                                            disjunction.push(spec);
827                                        }
828                                    }
829                                    VAFRangeOverlap::Start => {
830                                        if let Some(spec) = urange.split_at(range.start).0 {
831                                            disjunction.push(spec);
832                                        }
833                                    }
834                                    VAFRangeOverlap::None => {
835                                        disjunction.push(VAFSpectrum::Range(urange.clone()))
836                                    }
837                                    VAFRangeOverlap::Contains => (),
838                                },
839                            }
840                        }
841                    }
842                }
843
844                if disjunction.is_empty() {
845                    // impossible, return empty set
846                    Formula::Terminal(FormulaTerminal::Atom {
847                        sample: sample.to_owned(),
848                        vafs: VAFSpectrum::empty(),
849                    })
850                } else {
851                    Formula::Disjunction {
852                        operands: disjunction
853                            .into_iter()
854                            .map(|vafs| {
855                                Formula::Terminal(FormulaTerminal::Atom {
856                                    sample: sample.clone(),
857                                    vafs,
858                                })
859                            })
860                            .collect(),
861                    }
862                }
863            }
864        })
865    }
866
867    /// Move Negation operators into the atoms.
868    fn apply_negations(&self, scenario: &Scenario, contig: &str) -> Result<Self> {
869        Ok(match self {
870            Formula::Negation { operand } => operand
871                .negate(scenario, contig)?
872                .apply_negations(scenario, contig)?,
873            Formula::Terminal(FormulaTerminal::Atom { sample, vafs }) => {
874                Formula::Terminal(FormulaTerminal::Atom {
875                    sample: sample.to_owned(),
876                    vafs: vafs.to_owned(),
877                })
878            }
879            Formula::Conjunction { operands } => {
880                let operands = operands
881                    .iter()
882                    .map(|o| o.apply_negations(scenario, contig))
883                    .collect::<Result<Vec<Formula>>>()?;
884
885                Formula::Conjunction { operands }
886            }
887            Formula::Disjunction { operands } => Formula::Disjunction {
888                operands: operands
889                    .iter()
890                    .map(|o| o.apply_negations(scenario, contig))
891                    .collect::<Result<Vec<Formula>>>()?,
892            },
893            &Formula::Terminal(FormulaTerminal::Variant {
894                positive,
895                refbase,
896                altbase,
897            }) => Formula::Terminal(FormulaTerminal::Variant {
898                positive,
899                refbase,
900                altbase,
901            }),
902            &Formula::Terminal(FormulaTerminal::Expression {
903                identifier: _,
904                negated: _,
905            }) => {
906                panic!("bug: expressions should be expanded before applying negations");
907            }
908            Formula::Terminal(FormulaTerminal::False) => Formula::Terminal(FormulaTerminal::False),
909            Formula::Terminal(FormulaTerminal::True) => Formula::Terminal(FormulaTerminal::True),
910            Formula::Terminal(FormulaTerminal::Log2FoldChange {
911                sample_a,
912                sample_b,
913                predicate,
914            }) => Formula::Terminal(FormulaTerminal::Log2FoldChange {
915                sample_a: sample_a.into(),
916                sample_b: sample_b.into(),
917                predicate: *predicate,
918            }),
919        })
920    }
921}
922
923#[derive(PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
924pub(crate) enum NormalizedFormula {
925    Conjunction {
926        operands: Vec<NormalizedFormula>,
927    },
928    Disjunction {
929        operands: Vec<NormalizedFormula>,
930    },
931    Atom {
932        sample: String,
933        vafs: VAFSpectrum,
934    },
935    Variant {
936        positive: bool,
937        refbase: Iupac,
938        altbase: Iupac,
939    },
940    Log2FoldChange {
941        sample_a: String,
942        sample_b: String,
943        predicate: Log2FoldChangePredicate,
944    },
945    False,
946    True,
947}
948
949impl std::fmt::Debug for NormalizedFormula {
950    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
951        std::fmt::Display::fmt(self, f)
952    }
953}
954
955impl std::fmt::Display for NormalizedFormula {
956    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
957        let fmt_operand = |formula: &NormalizedFormula| match formula {
958            NormalizedFormula::Atom { .. } | NormalizedFormula::Variant { .. } => {
959                format!("{}", formula)
960            }
961            _ => format!("({})", formula),
962        };
963
964        let formatted = match self {
965            NormalizedFormula::Atom {
966                sample,
967                vafs: VAFSpectrum::Set(vafs),
968            } => match vafs.len() {
969                1 => format!("{}:{}", sample, vafs.iter().next().unwrap()),
970                x if x > 1 => format!(
971                    "{}:{{{}}}",
972                    sample,
973                    vafs.iter().map(|vaf| format!("{:.3}", vaf)).join(", "),
974                ),
975                _ => "false".to_owned(),
976            },
977            NormalizedFormula::Atom {
978                sample,
979                vafs: VAFSpectrum::Range(vafrange),
980            } => {
981                let left_bracket = if vafrange.left_exclusive { ']' } else { '[' };
982                let right_bracket = if vafrange.right_exclusive { '[' } else { ']' };
983                format!(
984                    "{}:{}{:.3},{:.3}{}",
985                    sample, left_bracket, vafrange.start, vafrange.end, right_bracket
986                )
987            }
988            NormalizedFormula::Variant {
989                positive,
990                refbase,
991                altbase,
992            } => format!(
993                "{negate}({refbase}>{altbase})",
994                negate = if *positive { "" } else { "!" },
995                refbase = refbase,
996                altbase = altbase,
997            ),
998            NormalizedFormula::Conjunction { operands } => {
999                operands.iter().map(&fmt_operand).join(" & ")
1000            }
1001            NormalizedFormula::Disjunction { operands } => {
1002                operands.iter().map(&fmt_operand).join(" | ")
1003            }
1004            NormalizedFormula::False => "false".to_owned(),
1005            NormalizedFormula::True => "true".to_owned(),
1006            NormalizedFormula::Log2FoldChange {
1007                sample_a,
1008                sample_b,
1009                predicate,
1010            } => {
1011                format!("l2fc({}, {}) {}", sample_a, sample_b, predicate)
1012            }
1013        };
1014        write!(f, "{}", formatted)
1015    }
1016}
1017
1018#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
1019pub(crate) enum VAFSpectrum {
1020    Set(BTreeSet<AlleleFreq>),
1021    Range(VAFRange),
1022}
1023
1024impl VAFSpectrum {
1025    pub(crate) fn singleton(vaf: AlleleFreq) -> Self {
1026        let mut set = BTreeSet::new();
1027        set.insert(vaf);
1028        VAFSpectrum::Set(set)
1029    }
1030
1031    pub(crate) fn empty() -> Self {
1032        VAFSpectrum::Set(BTreeSet::new())
1033    }
1034
1035    pub(crate) fn contains(&self, vaf: AlleleFreq) -> bool {
1036        match self {
1037            VAFSpectrum::Set(ref set) => set.contains(&vaf),
1038            VAFSpectrum::Range(ref range) => range.contains(vaf),
1039        }
1040    }
1041
1042    pub(crate) fn is_empty(&self) -> bool {
1043        match self {
1044            VAFSpectrum::Set(set) => set.is_empty(),
1045            VAFSpectrum::Range(range) => range.is_empty(),
1046        }
1047    }
1048
1049    pub(crate) fn is_complete(&self) -> bool {
1050        match self {
1051            VAFSpectrum::Set(_) => false,
1052            VAFSpectrum::Range(range) => range.is_complete(),
1053        }
1054    }
1055}
1056
1057#[derive(Clone, Debug, PartialEq, Eq, TypedBuilder, Hash, CopyGetters)]
1058pub(crate) struct VAFRange {
1059    inner: ops::Range<AlleleFreq>,
1060    #[getset(get_copy = "pub")]
1061    left_exclusive: bool,
1062    #[getset(get_copy = "pub")]
1063    right_exclusive: bool,
1064}
1065
1066#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
1067pub(crate) enum VAFRangeOverlap {
1068    Contained,
1069    Contains,
1070    End,
1071    Start,
1072    Equal,
1073    None,
1074}
1075
1076impl VAFRange {
1077    pub(crate) fn empty() -> Self {
1078        Self {
1079            inner: AlleleFreq(0.0)..AlleleFreq(0.0),
1080            left_exclusive: true,
1081            right_exclusive: true,
1082        }
1083    }
1084
1085    pub(crate) fn is_empty(&self) -> bool {
1086        self.start == self.end && (self.left_exclusive || self.right_exclusive)
1087    }
1088
1089    pub(crate) fn is_complete(&self) -> bool {
1090        *self.start == 0.0 && *self.end == 1.0 && !self.left_exclusive && !self.right_exclusive
1091    }
1092
1093    pub(crate) fn is_singleton(&self) -> bool {
1094        self.start == self.end && !(self.left_exclusive || self.right_exclusive)
1095    }
1096
1097    pub(crate) fn contains(&self, vaf: AlleleFreq) -> bool {
1098        match (self.left_exclusive, self.right_exclusive) {
1099            (true, true) => self.start < vaf && self.end > vaf,
1100            (true, false) => self.start < vaf && self.end >= vaf,
1101            (false, true) => self.start <= vaf && self.end > vaf,
1102            (false, false) => self.start <= vaf && self.end >= vaf,
1103        }
1104    }
1105
1106    pub(crate) fn split_at(&self, vaf: AlleleFreq) -> (Option<VAFSpectrum>, Option<VAFSpectrum>) {
1107        assert!(
1108            self.contains(vaf),
1109            "bug: split_at is only defined if given VAF is contained in range"
1110        );
1111        let left = VAFRange {
1112            inner: self.start..vaf,
1113            left_exclusive: self.left_exclusive,
1114            right_exclusive: true,
1115        };
1116        let right = VAFRange {
1117            inner: vaf..self.end,
1118            left_exclusive: true,
1119            right_exclusive: self.right_exclusive,
1120        };
1121
1122        let to_spectrum = |range: VAFRange| {
1123            if range.start == range.end {
1124                if !(range.left_exclusive && self.right_exclusive) {
1125                    Some(VAFSpectrum::singleton(range.start))
1126                } else {
1127                    None
1128                }
1129            } else {
1130                Some(VAFSpectrum::Range(range))
1131            }
1132        };
1133
1134        (to_spectrum(left), to_spectrum(right))
1135    }
1136
1137    pub(crate) fn overlap(&self, vafs: &VAFRange) -> VAFRangeOverlap {
1138        if self == vafs {
1139            return VAFRangeOverlap::Equal;
1140        }
1141        let range = self;
1142        let other_range = vafs;
1143        let start_is_right_of_start = match (self.left_exclusive, other_range.left_exclusive) {
1144            (true, true) => self.start > other_range.start,
1145            (true, false) => self.start >= other_range.start,
1146            (false, true) => self.start > other_range.start,
1147            (false, false) => self.start > other_range.start,
1148        };
1149        let end_is_left_of_end = match (self.right_exclusive, other_range.right_exclusive) {
1150            (true, true) => range.end < other_range.end,
1151            (true, false) => range.end <= other_range.end,
1152            (false, true) => range.end < other_range.end,
1153            (false, false) => range.end < other_range.end,
1154        };
1155        if (range.end < other_range.start || range.start > other_range.end)
1156            || (range.end <= other_range.start
1157                && (range.right_exclusive || other_range.left_exclusive))
1158            || (range.start >= other_range.end
1159                && (range.left_exclusive || other_range.right_exclusive))
1160        {
1161            VAFRangeOverlap::None
1162        } else {
1163            match (start_is_right_of_start, end_is_left_of_end) {
1164                (true, true) => VAFRangeOverlap::Contained,
1165                (true, false) => VAFRangeOverlap::Start,
1166                (false, true) => VAFRangeOverlap::End,
1167                (false, false) => VAFRangeOverlap::Contains,
1168            }
1169        }
1170    }
1171
1172    pub(crate) fn observable_min(&self, n_obs: usize) -> AlleleFreq {
1173        let min_vaf = if n_obs < 10 || !self.is_adjustment_possible(n_obs) {
1174            self.start
1175        } else {
1176            let obs_count = Self::expected_observation_count(self.start, n_obs);
1177            let adjust_allelefreq = |obs_count: f64| AlleleFreq(obs_count.ceil() / n_obs as f64);
1178
1179            if self.left_exclusive && obs_count % 1.0 == 0.0 {
1180                // We are left exclusive and need to find a supremum from the right.
1181
1182                let adjusted_end = self.observable_max(n_obs);
1183
1184                for offset in &[1.0, 0.0] {
1185                    let adjusted_obs_count = obs_count + offset;
1186                    let adjusted_start = adjust_allelefreq(adjusted_obs_count);
1187                    if *adjusted_start <= 1.0 && adjusted_start <= adjusted_end {
1188                        return adjusted_start;
1189                    }
1190                }
1191            }
1192
1193            adjust_allelefreq(obs_count)
1194        };
1195        if min_vaf >= self.observable_max(n_obs) {
1196            // If the adjustments destroys the order of the boundaries, we don't do it.
1197            // This can happen if the two boundaries are close together and we have only few observations.
1198            self.start
1199        } else {
1200            min_vaf
1201        }
1202    }
1203
1204    pub(crate) fn observable_max(&self, n_obs: usize) -> AlleleFreq {
1205        assert!(
1206            *self.end != 0.0,
1207            "bug: observable_max may not be called if end=0.0."
1208        );
1209        if n_obs < 10 || !self.is_adjustment_possible(n_obs) {
1210            self.end
1211        } else {
1212            let mut obs_count = Self::expected_observation_count(self.end, n_obs);
1213            if self.right_exclusive && obs_count % 1.0 == 0.0 {
1214                obs_count -= 1.0;
1215            }
1216            obs_count = obs_count.floor();
1217            if obs_count == 0.0 {
1218                // too few observations to handle exclusiveness
1219                self.end
1220            } else {
1221                AlleleFreq(obs_count.floor() / n_obs as f64)
1222            }
1223        }
1224    }
1225
1226    fn expected_observation_count(freq: AlleleFreq, n_obs: usize) -> f64 {
1227        n_obs as f64 * *freq
1228    }
1229
1230    fn is_adjustment_possible(&self, n_obs: usize) -> bool {
1231        Self::expected_observation_count(self.end - self.start, n_obs) > 1.0
1232    }
1233
1234    pub(crate) fn intersect(&self, other: &Self) -> Self {
1235        let overlap = self.overlap(other);
1236        if overlap == VAFRangeOverlap::None {
1237            return VAFRange::empty();
1238        }
1239
1240        let inner = self.inner.start.max(other.inner.start)..self.inner.end.min(other.inner.end);
1241
1242        let left_exclusive = if self.start > other.start {
1243            self.left_exclusive
1244        } else if self.start < other.start {
1245            other.left_exclusive
1246        } else {
1247            self.left_exclusive || other.left_exclusive
1248        };
1249        let right_exclusive = if self.end < other.end {
1250            self.right_exclusive
1251        } else if self.end > other.end {
1252            other.right_exclusive
1253        } else {
1254            self.right_exclusive || other.right_exclusive
1255        };
1256
1257        VAFRange::builder()
1258            .inner(inner)
1259            .left_exclusive(left_exclusive)
1260            .right_exclusive(right_exclusive)
1261            .build()
1262    }
1263}
1264
1265use auto_ops::impl_op_ex;
1266use ordered_float::NotNan;
1267
1268impl_op_ex!(&|a: &VAFRange, b: &VAFRange| -> VAFRange {
1269    match a.overlap(b) {
1270        VAFRangeOverlap::Contained => a.clone(),
1271        VAFRangeOverlap::Contains => b.clone(),
1272        VAFRangeOverlap::Start => VAFRange {
1273            inner: a.inner.start..b.inner.end,
1274            left_exclusive: a.left_exclusive,
1275            right_exclusive: b.right_exclusive,
1276        },
1277        VAFRangeOverlap::End => VAFRange {
1278            inner: b.inner.start..a.inner.end,
1279            left_exclusive: b.left_exclusive,
1280            right_exclusive: a.right_exclusive,
1281        },
1282        VAFRangeOverlap::Equal => a.clone(),
1283        VAFRangeOverlap::None => VAFRange::empty(),
1284    }
1285});
1286
1287impl_op_ex!(| |a: &VAFRange, b: &VAFRange| -> (VAFRange, Option<VAFRange>) {
1288    match a.overlap(b) {
1289        VAFRangeOverlap::Contained => (b.clone(), None),
1290        VAFRangeOverlap::Contains => (a.clone(), None),
1291        VAFRangeOverlap::Start => (VAFRange {
1292            inner: b.inner.start..a.inner.end,
1293            left_exclusive: b.left_exclusive,
1294            right_exclusive: a.right_exclusive,
1295        }, None),
1296        VAFRangeOverlap::End => (VAFRange {
1297            inner: a.inner.start..b.inner.end,
1298            left_exclusive: a.left_exclusive,
1299            right_exclusive: b.right_exclusive,
1300        }, None),
1301        VAFRangeOverlap::Equal => (a.clone(), None),
1302        VAFRangeOverlap::None => (a.clone(), Some(b.clone()))
1303    }
1304});
1305
1306impl ops::Deref for VAFRange {
1307    type Target = ops::Range<AlleleFreq>;
1308
1309    fn deref(&self) -> &ops::Range<AlleleFreq> {
1310        &self.inner
1311    }
1312}
1313
1314impl Ord for VAFRange {
1315    fn cmp(&self, other: &Self) -> Ordering {
1316        match self.start.cmp(&other.start) {
1317            Ordering::Equal => self.end.cmp(&other.end),
1318            ord => ord,
1319        }
1320    }
1321}
1322
1323impl PartialOrd for VAFRange {
1324    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1325        Some(self.cmp(other))
1326    }
1327}
1328
1329#[derive(Debug, Clone, Default, Derefable)]
1330pub(crate) struct VAFUniverse(#[deref(mutable)] HashSet<VAFSpectrum>);
1331
1332impl VAFUniverse {
1333    pub(crate) fn contains(&self, vaf: AlleleFreq) -> bool {
1334        for atom in &**self {
1335            if atom.contains(vaf) {
1336                return true;
1337            }
1338        }
1339        false
1340    }
1341}
1342
1343impl<'de> Deserialize<'de> for VAFUniverse {
1344    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1345    where
1346        D: de::Deserializer<'de>,
1347    {
1348        deserializer.deserialize_string(VAFUniverseVisitor)
1349    }
1350}
1351
1352struct VAFUniverseVisitor;
1353
1354impl<'de> de::Visitor<'de> for VAFUniverseVisitor {
1355    type Value = VAFUniverse;
1356
1357    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1358        formatter.write_str(
1359            "a disjunction of possible VAFs (see https://varlociraptor.github.io/docs/calling)",
1360        )
1361    }
1362
1363    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1364    where
1365        E: de::Error,
1366    {
1367        let res = FormulaParser::parse(Rule::universe, v);
1368        match res {
1369            Ok(pairs) => {
1370                let mut operands = HashSet::new();
1371                for pair in pairs {
1372                    match pair.as_rule() {
1373                        Rule::vaf => {
1374                            operands.insert(parse_vaf(pair));
1375                        }
1376                        Rule::vafrange => {
1377                            let inner = pair.into_inner();
1378                            operands.insert(parse_vafrange(inner));
1379                        }
1380                        Rule::vafset => {
1381                            let inner = pair.into_inner();
1382                            operands.insert(parse_vafset(inner));
1383                        }
1384                        Rule::EOI => (),
1385                        _ => unreachable!(),
1386                    }
1387                }
1388                Ok(VAFUniverse(operands))
1389            }
1390            Err(e) => {
1391                eprintln!("{}", e);
1392                Err(de::Error::invalid_value(
1393                    serde::de::Unexpected::Other("invalid VAF formula"),
1394                    &self,
1395                ))
1396            }
1397        }
1398    }
1399}
1400
1401impl<'de> Deserialize<'de> for Formula {
1402    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1403    where
1404        D: de::Deserializer<'de>,
1405    {
1406        deserializer.deserialize_string(FormulaVisitor)
1407    }
1408}
1409
1410struct FormulaVisitor;
1411
1412impl<'de> de::Visitor<'de> for FormulaVisitor {
1413    type Value = Formula;
1414
1415    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1416        formatter
1417            .write_str("a valid VAF formula (see https://varlociraptor.github.io/docs/calling)")
1418    }
1419
1420    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1421    where
1422        E: de::Error,
1423    {
1424        let res = FormulaParser::parse(Rule::formula, v);
1425        match res {
1426            Ok(mut pairs) => {
1427                let pair = pairs.next().expect("bug: expecting formula");
1428                parse_formula(pair)
1429            }
1430            Err(e) => Err(de::Error::invalid_value(
1431                serde::de::Unexpected::Other(&format!("invalid VAF formula:\n{}", e)),
1432                &self,
1433            )),
1434        }
1435    }
1436}
1437
1438fn parse_vaf(pair: Pair<Rule>) -> VAFSpectrum {
1439    let vaf = pair.as_str().parse().expect("bug: unable to parse VAF");
1440    VAFSpectrum::singleton(AlleleFreq(vaf))
1441}
1442
1443fn parse_vafrange(mut inner: Pairs<Rule>) -> VAFSpectrum {
1444    let left = inner.next().unwrap().as_str();
1445    let lower = inner.next().unwrap().as_str().parse().unwrap();
1446    let upper = inner.next().unwrap().as_str().parse().unwrap();
1447    let right = inner.next().unwrap().as_str();
1448
1449    let range = VAFRange {
1450        inner: lower..upper,
1451        left_exclusive: left == "]",
1452        right_exclusive: right == "[",
1453    };
1454
1455    VAFSpectrum::Range(range)
1456}
1457
1458fn parse_vafset(inner: Pairs<Rule>) -> VAFSpectrum {
1459    let set = inner.map(|vaf| vaf.as_str().parse().unwrap()).collect();
1460    VAFSpectrum::Set(set)
1461}
1462
1463fn parse_formula<E>(pair: Pair<Rule>) -> Result<Formula, E>
1464where
1465    E: de::Error,
1466{
1467    Ok(match pair.as_rule() {
1468        Rule::expression => {
1469            let mut inner = pair.into_inner();
1470            let identifier = inner.next().unwrap().as_str();
1471            Formula::Terminal(FormulaTerminal::Expression {
1472                identifier: ExpressionIdentifier(identifier.to_owned()),
1473                negated: false,
1474            })
1475        }
1476        Rule::variant => {
1477            let mut inner = pair.into_inner();
1478            let refbase = inner.next().unwrap().as_str().as_bytes()[0];
1479            let altbase = inner.next().unwrap().as_str().as_bytes()[0];
1480            Formula::Terminal(FormulaTerminal::Variant {
1481                refbase: Iupac(refbase),
1482                altbase: Iupac(altbase),
1483                positive: true,
1484            })
1485        }
1486        Rule::sample_vaf => {
1487            let mut inner = pair.into_inner();
1488            let sample = inner.next().unwrap().as_str().to_owned();
1489            Formula::Terminal(FormulaTerminal::Atom {
1490                sample,
1491                vafs: parse_vaf(inner.next().unwrap()),
1492            })
1493        }
1494        Rule::sample_vafrange => {
1495            let mut inner = pair.into_inner();
1496            let sample = inner.next().unwrap().as_str().to_owned();
1497            Formula::Terminal(FormulaTerminal::Atom {
1498                sample,
1499                vafs: parse_vafrange(inner.next().unwrap().into_inner()),
1500            })
1501        }
1502        Rule::sample_vafset => {
1503            let mut inner = pair.into_inner();
1504            let sample = inner.next().unwrap().as_str().to_owned();
1505            Formula::Terminal(FormulaTerminal::Atom {
1506                sample,
1507                vafs: parse_vafset(inner.next().unwrap().into_inner()),
1508            })
1509        }
1510        Rule::conjunction => {
1511            let inner = pair.into_inner();
1512            let mut operands = Vec::new();
1513            for operand in inner {
1514                operands.push(parse_formula(operand)?);
1515            }
1516            Formula::Conjunction { operands }
1517        }
1518        Rule::disjunction => {
1519            let inner = pair.into_inner();
1520            let mut operands = Vec::new();
1521            for operand in inner {
1522                operands.push(parse_formula(operand)?);
1523            }
1524            Formula::Disjunction { operands }
1525        }
1526        Rule::negation => {
1527            let mut inner = pair.into_inner();
1528            Formula::Negation {
1529                operand: Box::new(parse_formula(inner.next().unwrap())?),
1530            }
1531        }
1532        Rule::cmp => {
1533            let mut inner = pair.into_inner();
1534            let sample_a = inner.next().unwrap().as_str().to_owned();
1535            let op = parse_cmp_op(inner.next().unwrap());
1536            let sample_b = inner.next().unwrap().as_str().to_owned();
1537            Formula::Terminal(FormulaTerminal::Log2FoldChange {
1538                sample_a,
1539                sample_b,
1540                predicate: Log2FoldChangePredicate {
1541                    comparison: op,
1542                    value: NotNan::new(0.0).unwrap(),
1543                },
1544            })
1545        }
1546        Rule::lfc => {
1547            let mut inner = pair.into_inner();
1548            let sample_a = inner.next().unwrap().as_str().to_owned();
1549            let sample_b = inner.next().unwrap().as_str().to_owned();
1550            let operand = parse_cmp_op(inner.next().unwrap());
1551            let value = inner.next().unwrap().as_str().parse().unwrap();
1552            let predicate = Log2FoldChangePredicate {
1553                comparison: operand,
1554                value,
1555            };
1556            Formula::Terminal(FormulaTerminal::Log2FoldChange {
1557                sample_a,
1558                sample_b,
1559                predicate,
1560            })
1561        }
1562        Rule::false_literal => Formula::Terminal(FormulaTerminal::False),
1563        Rule::true_literal => Formula::Terminal(FormulaTerminal::True),
1564        Rule::cmp_ops => unreachable!(),
1565        Rule::formula => unreachable!(),
1566        Rule::subformula => unreachable!(),
1567        Rule::vafdef => unreachable!(),
1568        Rule::bound => unreachable!(),
1569        Rule::universe => unreachable!(),
1570        Rule::vafrange => unreachable!(),
1571        Rule::vafset => unreachable!(),
1572        Rule::identifier => unreachable!(),
1573        Rule::number => unreachable!(),
1574        Rule::vaf => unreachable!(),
1575        Rule::sample_vafdef => unreachable!(),
1576        Rule::EOI => unreachable!(),
1577        Rule::WHITESPACE => unreachable!(),
1578        Rule::COMMENT => unreachable!(),
1579        Rule::iupac => unreachable!(),
1580    })
1581}
1582
1583fn parse_cmp_op(pair: Pair<Rule>) -> ComparisonOperator {
1584    match pair.as_str() {
1585        "==" => ComparisonOperator::Equal,
1586        "!=" => ComparisonOperator::NotEqual,
1587        ">" => ComparisonOperator::Greater,
1588        ">=" => ComparisonOperator::GreaterEqual,
1589        "<" => ComparisonOperator::Less,
1590        "<=" => ComparisonOperator::LessEqual,
1591        _ => panic!(),
1592    }
1593}
1594
1595#[cfg(test)]
1596mod test {
1597    use crate::grammar::Scenario;
1598    use crate::grammar::{Formula, VAFRange};
1599    use crate::variants::model::AlleleFreq;
1600
1601    #[test]
1602    fn test_vaf_range_overlap() {
1603        let r1 = VAFRange {
1604            inner: AlleleFreq(0.0)..AlleleFreq(0.7),
1605            left_exclusive: false,
1606            right_exclusive: true,
1607        };
1608        let r2 = VAFRange {
1609            inner: AlleleFreq(0.3)..AlleleFreq(1.0),
1610            left_exclusive: false,
1611            right_exclusive: false,
1612        };
1613        let expected = VAFRange {
1614            inner: AlleleFreq(0.3)..AlleleFreq(0.7),
1615            left_exclusive: false,
1616            right_exclusive: true,
1617        };
1618        assert_eq!(expected, r1 & r2);
1619    }
1620
1621    #[test]
1622    fn test_merge_atoms() {
1623        let scenario: Scenario = serde_yaml::from_str(
1624            r#"
1625species:
1626  heterozygosity: 0.001
1627  germline-mutation-rate: 1e-3
1628  ploidy:
1629    male:
1630        all: 2
1631        X: 1
1632        Y: 1
1633    female:
1634        all: 2
1635        X: 2
1636        Y: 0
1637  genome-size: 3.5e9
1638
1639samples:
1640  tumor:
1641    sex: female
1642    somatic-effective-mutation-rate: 1e-6
1643    inheritance:
1644      clonal:
1645        from: normal
1646        somatic: false
1647    contamination:
1648      by: normal
1649      fraction: 0.11
1650  normal:
1651    sex: female
1652    somatic-effective-mutation-rate: 1e-10
1653
1654expressions:
1655  loh: "normal:0.5 & tumor:1.0"
1656  loh_or_amplification: "normal:0.5 & tumor:[0.9,1.0["
1657events:
1658  germline: "(normal:0.5 | normal:1.0) & !($loh | $loh_or_amplification)"
1659  expected: "(normal:0.5 & tumor:{0.0, 0.5}) | (normal:0.5 & tumor:]0.0,0.5[) | (normal:0.5 & tumor:]0.5,0.9[) | normal:1.0""#,
1660        )
1661        .unwrap();
1662        let expected = scenario.events["expected"]
1663            .clone()
1664            .normalize(&scenario, "all")
1665            .unwrap();
1666        let germline = scenario.events["germline"]
1667            .clone()
1668            .normalize(&scenario, "all")
1669            .unwrap();
1670        assert_eq!(germline, expected);
1671    }
1672
1673    #[test]
1674    fn test_range_conjunction() {
1675        let scenario: Scenario = serde_yaml::from_str(
1676            r#"samples:
1677  normal:
1678    resolution: 0.01
1679    universe: "[0.0,1.0]"
1680events:
1681  full: "normal:[0.0,1.0]"
1682  part1: "normal:[0.0,0.7]"
1683  part2: "normal:[0.3,1.0]"
1684  expected: "normal:[0.3,0.7]""#,
1685        )
1686        .unwrap();
1687        let expected = scenario.events["expected"].clone();
1688        let full = scenario.events["full"].clone();
1689        let part1 = scenario.events["part1"].clone();
1690        let part2 = scenario.events["part2"].clone();
1691        let conjunction = Formula::Conjunction {
1692            operands: vec![part1, part2],
1693        }
1694        .normalize(&scenario, "all")
1695        .unwrap();
1696        assert_eq!(conjunction, expected.normalize(&scenario, "all").unwrap());
1697        assert_ne!(conjunction, full.normalize(&scenario, "all").unwrap());
1698    }
1699
1700    #[test]
1701    fn test_nested_range_disjunction() {
1702        let scenario: Scenario = serde_yaml::from_str(
1703            r#"samples:
1704  normal:
1705    resolution: 0.01
1706    universe: "[0.0,1.0]"
1707events:
1708  full: "(normal:[0.0, 0.25] | normal:[0.5,0.75]) | (normal:[0.25,0.5] | normal:[0.75,1.0]) | normal:[0.1,0.4] | normal:0.1"
1709  expected: "normal:[0.0,1.0]""#,
1710        )
1711            .unwrap();
1712        let expected = scenario.events["expected"].clone();
1713        let full = scenario.events["full"].clone();
1714        let full = full.normalize(&scenario, "all").unwrap();
1715        assert_eq!(full, expected.normalize(&scenario, "all").unwrap());
1716    }
1717
1718    #[test]
1719    fn test_two_separate_range_disjunctions() {
1720        let scenario: Scenario = serde_yaml::from_str(
1721            r#"samples:
1722  normal:
1723    resolution: 0.01
1724    universe: "[0.0,1.0]"
1725events:
1726  full: "(normal:[0.0, 0.25] | normal:[0.5,0.6]) | ((normal:[0.25,0.5] | normal:[0.7,0.9]) | normal:[0.9,1.0]) | normal:]0.8,0.9[ | normal:0.75"
1727  expected: "normal:[0.0,0.6] | normal:[0.7,1.0]""#,
1728        )
1729            .unwrap();
1730        let expected = scenario.events["expected"].clone();
1731        let full = scenario.events["full"].clone();
1732        let full = full.normalize(&scenario, "all").unwrap();
1733        assert_eq!(full, expected.normalize(&scenario, "all").unwrap());
1734    }
1735}