1use std::collections::HashMap;
7use std::convert::{From, TryFrom};
8use std::fs::File;
9use std::path::{Path, PathBuf};
10use std::str::FromStr;
11use std::sync::Arc;
12
13use crate::calling;
14use crate::calling::variants::calling::{
15 call_generic, CallWriter, DefaultCandidateFilter, SampleInfos,
16};
17use crate::calling::variants::preprocessing::haplotype_feature_index::HaplotypeFeatureIndex;
18use crate::candidates;
19use crate::candidates::methylation::MethylationMotif;
20use crate::conversion;
21use crate::errors;
22use crate::estimation;
23use crate::estimation::alignment_properties::AlignmentProperties;
24use anyhow::{bail, Context, Result};
25use bio::stats::bayesian::bayes_factors::evidence::KassRaftery;
26use bio::stats::{LogProb, Prob};
27use itertools::Itertools;
28use structopt::StructOpt;
29use strum::IntoEnumIterator;
30use crate::filtration;
33use crate::grammar;
34use crate::reference;
35use crate::testcase;
36use crate::utils::PathMap;
37use crate::variants::evidence::realignment;
38
39use crate::variants::model::prior::CheckablePrior;
40use crate::variants::model::prior::Prior;
41use crate::variants::model::{AlleleFreq, VariantType};
42use crate::variants::sample::{estimate_alignment_properties, MethylationReadtype};
43
44use crate::SimpleEvent;
45
46#[derive(Debug, StructOpt, Serialize, Deserialize, Clone)]
47#[structopt(
48 name = "varlociraptor",
49 about = "Flexible, arbitrary-scenario, uncertainty-aware variant calling \
50 with parameter free filtration via FDR control for small and structural variants.",
51 setting = structopt::clap::AppSettings::ColoredHelp,
52)]
53pub enum Varlociraptor {
54 #[structopt(
55 name = "preprocess",
56 about = "Preprocess variants",
57 setting = structopt::clap::AppSettings::ColoredHelp,
58 )]
59 Preprocess {
60 #[structopt(subcommand)]
61 kind: PreprocessKind,
62 },
63 #[structopt(
64 name = "call",
65 about = "Call variants.",
66 setting = structopt::clap::AppSettings::ColoredHelp,
67 )]
68 Call {
69 #[structopt(subcommand)]
70 kind: CallKind,
71 },
72 #[structopt(
73 name = "filter-calls",
74 about = "Filter calls by either controlling the false discovery rate (FDR) at given level, or by posterior odds against the given events.",
75 setting = structopt::clap::AppSettings::ColoredHelp,
76 )]
77 FilterCalls {
78 #[structopt(subcommand)]
79 method: FilterMethod,
80 },
81 #[structopt(
82 name = "decode-phred",
83 about = "Decode PHRED-scaled values to human readable probabilities.",
84 usage = "varlociraptor decode-phred < in.bcf > out.bcf",
85 setting = structopt::clap::AppSettings::ColoredHelp,
86 )]
87 DecodePHRED,
88 #[structopt(
89 name = "estimate",
90 about = "Perform estimations.",
91 setting = structopt::clap::AppSettings::ColoredHelp,
92 )]
93 Estimate {
94 #[structopt(subcommand)]
95 kind: EstimateKind,
96 },
97 #[structopt(
98 name = "plot",
99 about = "Create plots",
100 setting = structopt::clap::AppSettings::ColoredHelp,
101 )]
102 Plot {
103 #[structopt(subcommand)]
104 kind: PlotKind,
105 },
106 #[structopt(
107 name = "genotype",
108 about = "Infer classical genotypes from Varlociraptor's AF field (1.0: 1/1, 0.5: 0/1, 0.0: 0/0, otherwise: 0/1). This assumes diploid samples.",
109 usage = "varlociraptor genotype < in.bcf > out.bcf",
110 setting = structopt::clap::AppSettings::ColoredHelp,
111 )]
112 Genotype,
113 #[structopt(
114 name = "methylation-candidates",
115 about = "Generate BCF with methylation candidates",
116 usage = "varlociraptor methylation-candidates input.fasta output.bcf",
117 setting = structopt::clap::AppSettings::ColoredHelp,
118 )]
119 MethylationCandidates {
120 #[structopt(
121 name = "input",
122 parse(from_os_str),
123 required = true,
124 help = "Input FASTA File"
125 )]
126 input: PathBuf,
127 #[structopt(
128 long = "motifs",
129 help = "Comma-separated list of methylation motifs to search for in the input chromosome. Supported motifs: CG, CHG, CHH, GATC.",
130 required = false,
131 use_delimiter = true
132 )]
133 #[serde(default = "default_methylation_motifs")]
134 motifs: Vec<MethylationMotif>,
135 #[structopt(name = "output", parse(from_os_str), help = "Output BCF File")]
136 output: Option<PathBuf>,
137 },
138}
139
140pub struct PreprocessInput {
141 reference: PathBuf,
142 bam: PathBuf,
143}
144
145impl Varlociraptor {
146 fn preprocess_input(&self) -> PreprocessInput {
147 if let Varlociraptor::Preprocess {
148 kind:
149 PreprocessKind::Variants {
150 ref reference,
151 ref bam,
152 ..
153 },
154 } = &self
155 {
156 PreprocessInput {
157 reference: reference.to_owned(),
158 bam: bam.to_owned(),
159 }
160 } else {
161 panic!("bug: these are not preprocess options.");
162 }
163 }
164}
165
166impl FromStr for MethylationMotif {
167 type Err = String;
168
169 fn from_str(s: &str) -> Result<Self, Self::Err> {
170 match s.to_uppercase().as_str() {
171 "CG" => Ok(MethylationMotif::CG),
172 "CHG" => Ok(MethylationMotif::CHG),
173 "CHH" => Ok(MethylationMotif::CHH),
174 "GATC" => Ok(MethylationMotif::GATC),
175 _ => Err(format!("Invalid methylation motif: {}", s)),
176 }
177 }
178}
179
180fn default_methylation_motifs() -> Vec<MethylationMotif> {
181 vec![MethylationMotif::CG]
182}
183
184fn default_reference_buffer_size() -> usize {
185 10
186}
187
188fn default_pairhmm_mode() -> String {
189 "exact".to_owned()
190}
191
192fn default_log_mode() -> String {
193 "default".to_owned()
194}
195
196fn default_min_bam_refetch_distance() -> u64 {
197 1
198}
199
200#[derive(Debug, StructOpt, Serialize, Deserialize, Clone)]
201pub enum PreprocessKind {
202 #[structopt(
203 name = "variants",
204 about = "Preprocess given variants by obtaining internal observations (allele likelihoods, strand information, ...)\
205 for each fragment. \
206 The obtained observations are printed to STDOUT in BCF format. Note that the resulting BCFs \
207 will be very large and are only intended for internal use (e.g. for piping into 'varlociraptor \
208 call variants generic').",
209 usage = "varlociraptor preprocess variants reference.fasta --alignment-properties alignment-properties.json \
210 --candidates candidates.bcf --bam sample.bam > sample.observations.bcf",
211 setting = structopt::clap::AppSettings::ColoredHelp,
212 )]
213 Variants {
214 #[structopt(
215 parse(from_os_str),
216 help = "FASTA file with reference genome. Has to be indexed with (e.g. with samtools faidx)."
217 )]
218 reference: PathBuf,
219 #[structopt(
220 parse(from_os_str),
221 long,
222 required = true,
223 help = "sorted VCF/BCF file to process (if omitted, read from STDIN)."
224 )]
225 candidates: PathBuf,
226 #[structopt(
227 long,
228 required = true,
229 help = "BAM file with aligned reads from a single sample. BAM file must be indexed (e.g. with samtools index)."
230 )]
231 bam: PathBuf,
232 #[structopt(
233 long,
234 help = "Name of INFO field containing an expected heterozygosity per variant allele (can be e.g. a population allele frequency). Field needs to have as many entries as ALT alleles per VCF/BCF record."
235 )]
236 variant_heterozygosity_field: Option<String>,
237 #[structopt(
238 long,
239 help = "Name of INFO field containing an expected somatic effective mutation rate per variant allele (can be e.g. a poplulation scale prevalence of a known cancer variant from a database). Field needs to have as many entries as ALT alleles per VCF/BCF record."
240 )]
241 variant_somatic_effective_mutation_rate_field: Option<String>,
242 #[structopt(
243 long,
244 help = "Report fragment IDs in output BCF. This information can be used for phasing."
245 )]
246 #[serde(default)]
247 report_fragment_ids: bool,
248 #[structopt(
249 long,
250 help = "Assume that candidate variants are given in atomic form (unlike e.g. \
251 provided by freebayes which combines adjacent variants that appear to be in \
252 phase into longer haplotypes). If variants are atomic, we do not want to perform \
253 realignment against SNVs and MNVs because those can lead to false positives if \
254 they are immediately followed or preceeded by indels."
255 )]
256 #[serde(default)]
257 atomic_candidate_variants: bool,
258 #[structopt(
259 long,
260 help = "Do not adjust mapping quality (MAPQ). By default Varlociraptor will adjust mapping qualities \
261 in order to avoid false positive hits caused by inflated MAPQ values at ambiguous loci. \
262 This happens by conservatively averaging MAPQs of all reads that overlap a given locus. \
263 While this is usually a good idea and has been validated by extensive benchmarking, there can be cases \
264 where this is not desired, e.g. when solely evaluating known variants."
265 )]
266 #[serde(default)]
267 omit_mapq_adjustment: bool,
268 #[structopt(
269 long = "reference-buffer-size",
270 short = "b",
271 default_value = "10",
272 help = "Number of reference sequences to keep in buffer. Use a smaller value \
273 to save memory at the expense of sometimes reduced parallelization."
274 )]
275 #[serde(default = "default_reference_buffer_size")]
276 reference_buffer_size: usize,
277 #[structopt(
278 long = "min-bam-refetch-distance",
279 default_value = "1",
280 help = "Base pair distance to last fetched BAM interval such that a refetching is performed \
281 instead of reading through until the next interval is reached. Making this too small \
282 can cause unnecessary random access. Making this too large can lead to unneccessary \
283 iteration over irrelevant records. Benchmarking has shown that at least for short reads, \
284 a value of 1 (e.g. always refetch) does not incur additional costs and is a reasonable \
285 default."
286 )]
287 #[serde(default = "default_min_bam_refetch_distance")]
288 min_bam_refetch_distance: u64,
289 #[structopt(
290 long = "alignment-properties",
291 help = "Alignment properties JSON file for sample. If not provided, properties \
292 will be estimated from the given BAM file. It is recommended to estimate alignment \
293 properties separately, see 'varlociraptor estimate alignment-properties --help'."
294 )]
295 alignment_properties: Option<PathBuf>,
296 #[structopt(
297 parse(from_os_str),
298 long,
299 help = "BCF file that shall contain the results (if omitted, write to STDOUT)."
300 )]
301 output: Option<PathBuf>,
302 #[structopt(
303 long = "propagate-info-fields",
304 help = "Additional INFO fields in the input BCF that shall be propagated to the output BCF \
305 (not supported for variants connected via EVENT tags except breakends)."
306 )]
307 #[serde(default)]
308 propagate_info_fields: Vec<String>,
309 #[structopt(
310 long = "indel-window",
311 default_value = "64",
312 help = "Number of bases to consider left and right of breakpoint when \
313 calculating read support. Currently implemented maximum \
314 value is 64."
315 )]
316 realignment_window: u64,
317 #[structopt(
318 long = "max-depth",
319 default_value = "200",
320 help = "Maximum number of observations to use for calling. If locus is exceeding this \
321 number, downsampling is performed."
322 )]
323 max_depth: usize,
324 #[structopt(
325 long = "omit-insert-size",
326 help = "Do not consider insert size when calculating support for a variant. Use this flag when \
327 processing amplicon data, where indels do not impact the observed insert size"
328 )]
329 #[serde(default)]
330 omit_insert_size: bool,
331 #[structopt(
332 long = "pairhmm-mode",
333 possible_values = &["fast", "exact", "homopolymer"],
334 default_value = "exact",
335 help = "PairHMM computation mode (either fast, exact or homopolymer). Fast mode means that only the best \
336 alignment path is considered for probability calculation. In rare cases, this can lead \
337 to wrong results for single reads. Hence, we advice to not use it when \
338 discrete allele frequences are of interest (0.5, 1.0). For continuous \
339 allele frequencies, fast mode should cause almost no deviations from the \
340 exact results. Also, if per sample allele frequencies are irrelevant (e.g. \
341 in large cohorts), fast mode can be safely used. \
342 Note that fast and exact mode are not suitable for ONT data.\
343 The homopolymer mode should be used for ONT data; it is similar to the exact mode \
344 but considers homopolymer errors to be different from gaps."
345 )]
346 #[serde(default = "default_pairhmm_mode")]
347 pairhmm_mode: String,
348 #[structopt(
349 long = "log-mode",
350 possible_values = &["default", "each-record"],
351 default_value = "default",
352 help = "Specify how progress should be logged. By default, a record count will be printed. With 'each-record', \
353 Varlociraptor will additionally print contig and position of each processed candidate variant record. This is \
354 useful for debugging."
355 )]
356 #[serde(default = "default_log_mode")]
357 log_mode: String,
358 #[structopt(
359 parse(from_os_str),
360 long = "output-raw-observations",
361 help = "Output raw observations as TSV files at the given prefix. Attention, only use this for debugging when processing \
362 a single variant. Otherwise it will cause a lots of files and a significant performance hit."
363 )]
364 #[serde(default)]
365 output_raw_observations: Option<PathBuf>,
366 #[structopt(
367 long = "methylation-read-type",
368 possible_values = &MethylationReadtype::iter().map(|v| v.into()).collect_vec(),
369 help = "Type of methylation information encoded in the reads. Use 'converted' for reads treated with bisulfite or EMSeq. Use 'annotated' for reads where methylation information is encoded in the MM and ML tags."
370 )]
371 methylation_readtype: Option<MethylationReadtype>,
372 },
373}
374
375#[derive(Debug, StructOpt, Serialize, Deserialize, Clone)]
376pub enum PlotKind {
377 #[structopt(
378 name = "variant-calling-prior",
379 about = "Plot variant calling prior given a scenario. Plot is printed to STDOUT in Vega-lite format.",
380 usage = "varlociraptor plot variant-calling-prior --scenario scenario.yaml > plot.vl.json",
381 setting = structopt::clap::AppSettings::ColoredHelp,
382 )]
383 VariantCallingPrior {
384 #[structopt(
385 parse(from_os_str),
386 long = "scenario",
387 required = true,
388 help = "Variant calling scenario that configures the prior."
389 )]
390 scenario: PathBuf,
391 #[structopt(
392 long = "contig",
393 required = true,
394 help = "Contig to consider for ploidy information."
395 )]
396 contig: String,
397 #[structopt(long = "sample", required = true, help = "Sample to plot.")]
398 sample: String,
399 },
400 #[structopt(
401 name = "scatter",
402 about = "Plot variant allelic fraction scatter plot overlayed with a contour plot between two sample groups",
403 usage = "varlociraptor plot scatter --sample-x sample1 \
404 --sample-y sample2 sample3 < calls.bcf | vg2svg > scatter.svg",
405 setting = structopt::clap::AppSettings::ColoredHelp,
406 )]
407 Scatter {
408 #[structopt(
409 long = "sample-x",
410 help = "Name of the first sample in the given VCF/BCF."
411 )]
412 sample_x: String,
413 #[structopt(
414 long = "sample-y",
415 help = "Name(s) of the alternative sample(s) in the given VCF/BCF. Multiple samples can be given."
416 )]
417 sample_y: Vec<String>,
418 },
419}
420
421#[derive(Debug, StructOpt, Serialize, Deserialize, Clone)]
422pub enum EstimateKind {
423 #[structopt(
424 name = "alignment-properties",
425 about = "Estimate properties like insert size, maximum softclip length, and the PCR homopolymer error model.",
426 usage = "varlociraptor estimate alignment-properties reference.fasta --bams sample.bam > sample.alignment-properties.json",
427 setting = structopt::clap::AppSettings::ColoredHelp,
428 )]
429 AlignmentProperties {
430 #[structopt(
431 parse(from_os_str),
432 help = "FASTA file with reference genome. Has to be indexed with samtools faidx."
433 )]
434 reference: PathBuf,
435
436 #[structopt(
437 long,
438 required = true,
439 help = "BAM files with aligned reads from a single sample or a set of \
440 samples that have been sequenced and prepared in exactly the same way. \
441 Use multiple BAM files in order to increase estimation robustness in case \
442 the samples only have comparably few reads (e.g. in case of panel sequencing)."
443 )]
444 bams: Vec<PathBuf>,
445
446 #[structopt(long, help = "Number of records to sample from the BAM file")]
447 num_records: Option<usize>,
448 },
449 #[structopt(
450 name = "contamination",
451 about = "Estimate contamination between samples (still experimental). Takes two samples, the one that \
452 is presumably contaminated and the contaminating one, and calculates the posterior distribution of \
453 the contamination fraction, which is printed into a TSV table. The most likely fraction comes at the top. \
454 Uses a uniform prior distribution by default. Optionally, e.g. a pathologist's estimate can be provided, \
455 such that the calculated posterior distribution can be sharpened by the model with the given prior \
456 knowledge. For now, it is always advisable to check the visual output generated by --output-plot.",
457 usage = "varlociraptor estimate contamination --sample sample-a.bcf --contaminant sample-b.bcf --output-plot contamination-qc-plot.vl.json > contamination.tsv",
458 setting = structopt::clap::AppSettings::ColoredHelp,
459 )]
460 Contamination {
461 #[structopt(long = "sample", help = "Presumably contaminated sample.")]
462 sample: PathBuf,
463 #[structopt(long = "contaminant", help = "Presumably contaminating sample.")]
464 contaminant: PathBuf,
465 #[structopt(
466 long = "prior-estimate",
467 help = "Prior estimate of the contamination (1-purity), e.g. \
468 obtained by counting tumor cells in a histology sample. Has to be used \
469 together with the number of cells considered for the estimate (--prior-considered-cells)."
470 )]
471 prior_estimate: Option<f64>,
472 #[structopt(
473 long = "prior-considered-cells",
474 help = "Number of cells considered for the prior estimate (see --prior-estimate)."
475 )]
476 prior_considered_cells: Option<u32>,
477 #[structopt(
478 long = "output",
479 help = "Output file; if not specified, output is printed to STDOUT."
480 )]
481 output: Option<PathBuf>,
482 #[structopt(
483 long = "output-plot",
484 help = "Path to store vega-lite plot of contamination/purity."
485 )]
486 output_plot: Option<PathBuf>,
487 #[structopt(
488 long = "output-max-vaf-variants",
489 help = "Path to store TSV of variant positions with maximum VAF in the contaminated sample (for QC)."
490 )]
491 output_max_vaf_variants: Option<PathBuf>,
492 },
493 #[structopt(
494 name = "mutational-burden",
495 about = "Estimate mutational burden. Takes Varlociraptor calls (must be annotated \
496 with e.g. VEP but using ANN instead of CSQ) from STDIN, prints mutational burden estimate in Vega-lite JSON format to STDOUT. \
497 It can be converted to an image via vega-lite-cli (see conda package).",
498 usage = "varlociraptor estimate mutational-burden --mode curve --coding-genome-size 3e7 --events SOMATIC_TUMOR \
499 --sample tumor < calls.bcf | vg2svg > tmb.svg\n \
500 varlociraptor estimate mutational-burden --mode table --coding-genome-size 3e7 --events SOMATIC_TUMOR \
501 --sample tumor < calls.bcf > tmb.tsv",
502 setting = structopt::clap::AppSettings::ColoredHelp,
503 )]
504 MutationalBurden {
505 #[structopt(long = "events", help = "Events to consider (e.g. SOMATIC_TUMOR).")]
506 events: Vec<String>,
507 #[structopt(
508 long = "sample",
509 help = "Name(s) of the sample(s) in the given VCF/BCF. Multiple samples can be given when using the multibar plot mode."
510 )]
511 sample: Vec<String>,
512 #[structopt(
513 long = "coding-genome-size",
514 default_value = "3e7",
515 help = "Size (in bases) of the covered coding genome."
516 )]
517 coding_genome_size: f64,
518 #[structopt(
519 long = "mode",
520 possible_values = &estimation::mutational_burden::Mode::iter().map(|v| v.into()).collect_vec(),
521 help = "How to output to STDOUT (as stratified curve, histogram, or multi-sample barplot, or TSV table)."
522 )]
523 mode: estimation::mutational_burden::Mode,
524 #[structopt(
525 long = "vaf-cutoff",
526 default_value = "0.2",
527 help = "Minimal variant allelic fraction to consider for mutli-sample barplot"
528 )]
529 cutoff: f64,
530 },
531}
532
533#[derive(Debug, StructOpt, Serialize, Deserialize, Clone)]
534pub enum CallKind {
535 #[structopt(
536 name = "variants",
537 about = "Call variants.",
538 setting = structopt::clap::AppSettings::ColoredHelp,
539 )]
540 Variants {
541 #[structopt(subcommand)]
542 mode: VariantCallMode,
543 #[structopt(
544 long = "propagate-info-fields",
545 help = "Additional INFO fields in the input BCF that shall be propagated to the output BCF."
546 )]
547 propagate_info_fields: Vec<String>,
548 #[structopt(
549 long = "omit-strand-bias",
550 help = "Do not consider strand bias when calculating the probability of an artifact.\
551 Use this flag when processing (panel) sequencing data, where the wet-lab \
552 methodology leads to strand bias in the coverage of genuine variants."
553 )]
554 #[serde(default)]
555 omit_strand_bias: bool,
556 #[structopt(
557 long = "omit-read-orientation-bias",
558 help = "Do not consider read orientation bias when calculating the probability of an \
559 artifact."
560 )]
561 #[serde(default)]
562 omit_read_orientation_bias: bool,
563 #[structopt(
564 long = "omit-read-position-bias",
565 help = "Do not consider read position bias when calculating the probability of an \
566 artifact. Use this flag when processing (panel) sequencing data, where the \
567 wet-lab methodology leads to stacks of reads starting at the same position."
568 )]
569 #[serde(default)]
570 omit_read_position_bias: bool,
571 #[structopt(
572 long = "omit-softclip-bias",
573 help = "Do not consider softclip bias when calculating the probability of an \
574 artifact. Use this flag when processing (panel) sequencing data, where the \
575 wet-lab methodology leads to stacks of reads starting at the same position."
576 )]
577 #[serde(default)]
578 omit_softclip_bias: bool,
579 #[structopt(
580 long = "omit-homopolymer-artifact-detection",
581 help = "Do not perform PCR homopolymer artifact detection when calculating the probability of an \
582 artifact. If you are sure that your protocol did not use any PCR you should use this flag."
583 )]
584 #[serde(default)]
585 omit_homopolymer_artifact_detection: bool,
586 #[structopt(
587 long = "omit-alt-locus-bias",
588 help = "Do not consider alt locus bias when calculating the probability of an artifact. \
589 Use this flag when you have e.g. prior knowledge about all candidate variants being not \
590 caused by mapping artifacts."
591 )]
592 #[serde(default)]
593 omit_alt_locus_bias: bool,
594 #[structopt(
595 long = "full-prior",
596 help = "Compute the full prior distribution for any allele frequency combination. \
597 This is in contrast to the default behavior, where the prior is only used to \
598 distinguish between absence and presence of variants \
599 (in essence we calculate the prior probability for the variant to be not present given all \
600 provided mutation rates and the heterozygosity). \
601 The advantage of this default behavior is that observations become in any case more important \
602 than the prior to distinguish between different events, while the prior still effectively protects \
603 from false positives."
604 )]
605 #[serde(default)]
606 full_prior: bool,
607 #[structopt(
608 long = "testcase-locus",
609 help = "Create a test case for the given locus. Locus must be given in the form \
610 CHROM:POS. Alternatively, for single variant VCFs, you can \
611 specify 'all'."
612 )]
613 testcase_locus: Option<String>,
614 #[structopt(
615 long = "testcase-prefix",
616 help = "Create test case files in the given directory."
617 )]
618 testcase_prefix: Option<String>,
619 #[structopt(
620 long = "testcase-anonymous",
621 help = "Anonymize any identifiers (via uuid4) and the sequences (by randomly permuting the alphabet) in the test case."
622 )]
623 testcase_anonymous: bool,
624 #[structopt(
625 long,
626 short,
627 help = "Output variant calls to given path (in BCF format). If omitted, prints calls to STDOUT."
628 )]
629 output: Option<PathBuf>,
630 #[structopt(
631 long = "log-mode",
632 possible_values = &["default", "each-record"],
633 default_value = "default",
634 help = "Specify how progress should be logged. By default, a record count will be printed. With 'each-record', \
635 Varlociraptor will additionally print contig and position of each processed candidate variant record. This is \
636 useful for debugging."
637 )]
638 #[serde(default = "default_log_mode")]
639 log_mode: String,
640 },
641 }
682
683#[derive(Debug, StructOpt, Serialize, Deserialize, Clone)]
684pub enum VariantCallMode {
685 #[structopt(
686 name = "tumor-normal",
687 about = "Call somatic and germline variants from a tumor-normal sample pair and a VCF/BCF with candidate variants.",
688 usage = "varlociraptor call variants tumor-normal --purity 0.75 --tumor tumor.bcf --normal normal.bcf > calls.bcf",
689 setting = structopt::clap::AppSettings::ColoredHelp,
690 )]
691 TumorNormal {
692 #[structopt(
693 parse(from_os_str),
694 long = "tumor",
695 required = true,
696 help = "BCF file with varlociraptor preprocess results for the tumor sample."
697 )]
698 tumor_observations: PathBuf,
699 #[structopt(
700 parse(from_os_str),
701 long = "normal",
702 required = true,
703 help = "BCF file with varlociraptor preprocess results for the normal sample."
704 )]
705 normal_observations: PathBuf,
706 #[structopt(short, long, default_value = "1.0", help = "Purity of tumor sample.")]
707 purity: f64,
708 },
709 #[structopt(
710 name = "generic",
711 about = "Call variants for a given scenario specified with the varlociraptor calling \
712 grammar and a VCF/BCF with candidate variants.",
713 usage = "varlociraptor call variants --output calls.bcf generic --scenario scenario.yaml \
714 --obs relapse=relapse.bcf tumor=tumor.bcf normal=normal.bcf",
715 setting = structopt::clap::AppSettings::ColoredHelp,
716 )]
717 Generic {
718 #[structopt(
719 parse(from_os_str),
720 long,
721 required = true,
722 help = "Scenario defined in the varlociraptor calling grammar."
723 )]
724 scenario: PathBuf,
725 #[structopt(
726 long = "obs",
727 required = true,
728 help = "BCF file with varlociraptor preprocess results for samples defined in the given scenario (given as samplename=path/to/calls.bcf). It is possible to omit a sample here (e.g. model tumor/normal in the scenario, but only call on the tumor sample when there is no normal sequenced). In that case, the resulting probabilities will be accordingly uncertain, because observations of the omitted sample are missing (which is equivalent to having no coverage in the sample)."
729 )]
730 sample_observations: Vec<String>,
731 },
732}
733
734#[derive(Debug, Serialize, Deserialize, Clone, EnumString, Display)]
735#[strum(serialize_all = "kebab_case")]
736pub enum ControlFDRMode {
737 LocalSmart,
738 LocalStrict,
739 GlobalSmart,
740 GlobalStrict,
741}
742
743impl ControlFDRMode {
744 pub fn is_local(&self) -> bool {
745 match self {
746 ControlFDRMode::LocalSmart | ControlFDRMode::LocalStrict => true,
747 ControlFDRMode::GlobalSmart | ControlFDRMode::GlobalStrict => false,
748 }
749 }
750
751 pub fn is_smart(&self) -> bool {
752 match self {
753 ControlFDRMode::LocalSmart | ControlFDRMode::GlobalSmart => true,
754 ControlFDRMode::LocalStrict | ControlFDRMode::GlobalStrict => false,
755 }
756 }
757}
758
759#[derive(Debug, StructOpt, Serialize, Deserialize, Clone)]
760pub enum FilterMethod {
761 #[structopt(
762 name = "control-fdr",
763 about = "Filter variant calls by controlling FDR. Filtered calls are printed to STDOUT.",
764 usage = "varlociraptor filter-calls control-fdr calls.bcf --events SOMATIC_TUMOR --fdr 0.05 \
765 --var SNV > calls.filtered.bcf",
766 setting = structopt::clap::AppSettings::ColoredHelp,
767 )]
768 ControlFDR {
769 #[structopt(parse(from_os_str), help = "BCF file with varlociraptor calls.")]
770 calls: PathBuf,
771 #[structopt(
772 long = "var",
773 possible_values = &VariantType::iter().map(|v| v.into()).collect_vec(),
774 help = "Variant type to consider. When controlling global FDR (not using --local) this should \
775 be used to control FDR for each type separately. Otherwise, less certain variant types will be \
776 underrepresented."
777 )]
778 vartype: Option<VariantType>,
779 #[structopt(long, help = "FDR to control for.")]
780 fdr: f64,
781 #[structopt(
782 long = "mode",
783 help = "Mode of FDR control (recommended: local-smart). Choose between local or global and strict or smart, combined via a hyphen (e.g. local-smart). \
784 Local means that for each record, the posterior of the selected events has to be at least 1-fdr. Global means that the \
785 posterior FDR of all selected records may not exceed the given fdr threshold. Strict means that the given events \
786 are directly used for above calculations. Smart means that FDR is controlled just by the probability of variants \
787 to be not absent or artifacts, and the given events are used to further filter out those where the summed probability \
788 of the particular events is <50%. Smart mode is recommended, as the strict mode can lead to unexpected behavior. \
789 For example: if want to control for somatic variants, but there is a degree of ambiguity between the somatic and some other event, \
790 strict mode will filter such variants, while smart mode will keep them and you will see the ambiguity in the record. \
791 The former behavior is much more intuitive than loosing such variants entirely."
792 )]
793 mode: ControlFDRMode,
794 #[structopt(
795 long,
796 help = "Whether smart mode shall retain artifact calls \
797 instead of taking the sum of artifact probability and \
798 absent probability for \
799 determining whether a variant shall be filtered. Setting this causes \
800 variants that are marked as artifacts to be kept.
801 "
802 )]
803 smart_retain_artifacts: bool,
804 #[structopt(long, help = "Events to consider.")]
805 events: Vec<String>,
806 #[structopt(long, help = "Minimum indel length to consider.")]
807 minlen: Option<u64>,
808 #[structopt(long, help = "Maximum indel length to consider (exclusive).")]
809 maxlen: Option<u64>,
810 },
811 #[structopt(
812 name = "posterior-odds",
813 about = "Filter variant calls by posterior odds of given events against the rest of events. \
814 Calls are taken from STDIN, filtered calls are printed to STDOUT.",
815 usage = "varlociraptor filter-calls posterior-odds --events SOMATIC_TUMOR --odds strong < calls.bcf",
816 setting = structopt::clap::AppSettings::ColoredHelp,
817 )]
818 PosteriorOdds {
819 #[structopt(
820 long,
821 possible_values = &KassRaftery::iter().map(|v| v.into()).collect_vec(),
822 help = "Kass-Raftery score to filter against."
823 )]
824 odds: KassRaftery,
825 #[structopt(long, help = "Events to consider.")]
826 events: Vec<String>,
827 },
828}
829
830fn parse_key_values(values: &[String]) -> Option<PathMap> {
831 let mut map = HashMap::new();
832 for value in values {
833 let kv = value.split_terminator('=').collect_vec();
834 if kv.len() == 2 {
835 map.insert(kv[0].to_owned(), PathBuf::from(kv[1]));
836 } else {
837 return None;
838 }
839 }
840 Some(map)
841}
842
843impl Default for Varlociraptor {
844 fn default() -> Self {
845 Varlociraptor::from_iter(vec!["--help"])
846 }
847}
848
849pub fn run(opt: Varlociraptor) -> Result<()> {
850 let opt_clone = opt.clone();
851 match opt {
852 Varlociraptor::Preprocess { kind } => {
853 match kind {
854 PreprocessKind::Variants {
855 reference,
856 candidates,
857 bam,
858 report_fragment_ids,
859 atomic_candidate_variants,
860 omit_mapq_adjustment,
861 alignment_properties,
862 output,
863 propagate_info_fields,
864 realignment_window,
865 max_depth,
866 omit_insert_size,
867 pairhmm_mode,
868 reference_buffer_size,
869 min_bam_refetch_distance,
870 log_mode,
871 output_raw_observations,
872 methylation_readtype,
873 variant_heterozygosity_field,
874 variant_somatic_effective_mutation_rate_field,
875 } => {
876 if realignment_window > (128 / 2) {
878 return Err(
879 structopt::clap::Error::with_description(
880 "Command-line option --indel-window requires a value <= 64 with the current implementation.",
881 structopt::clap::ErrorKind::ValueValidation
882 ).into()
883 );
884 };
885
886 let variant_heterozygosity_field = variant_heterozygosity_field.map(Vec::from);
887 let variant_somatic_effective_mutation_rate_field =
888 variant_somatic_effective_mutation_rate_field.map(Vec::from);
889
890 let mut reference_buffer = Arc::new(
891 reference::Buffer::from_path(&reference, reference_buffer_size)
892 .context("Unable to read genome reference.")?,
893 );
894
895 let alignment_properties = est_or_load_alignment_properties(
896 &alignment_properties,
897 &bam,
898 omit_insert_size,
899 Arc::get_mut(&mut reference_buffer).unwrap(),
900 Some(crate::estimation::alignment_properties::NUM_FRAGMENTS),
901 )?;
902
903 let gap_params = alignment_properties.gap_params.clone();
904
905 let log_each_record = log_mode == "each-record";
906
907 let propagate_info_fields = propagate_info_fields
908 .iter()
909 .map(|s| s.as_bytes().to_owned())
910 .collect();
911
912 match pairhmm_mode.as_ref() {
913 "homopolymer" => {
914 let hop_params = alignment_properties.hop_params.clone();
915 let mut processor =
916 calling::variants::preprocessing::ObservationProcessor::builder()
917 .report_fragment_ids(report_fragment_ids)
918 .adjust_prob_mapping(!omit_mapq_adjustment)
919 .alignment_properties(alignment_properties)
920 .max_depth(max_depth)
921 .inbam(bam)
922 .min_bam_refetch_distance(min_bam_refetch_distance)
923 .reference_buffer(Arc::clone(&reference_buffer))
924 .haplotype_feature_index(HaplotypeFeatureIndex::new(
925 &candidates,
926 )?)
927 .inbcf(candidates)
928 .aux_info_fields(propagate_info_fields)
929 .options(opt_clone)
930 .outbcf(output)
931 .raw_observation_output(output_raw_observations)
932 .log_each_record(log_each_record)
933 .realigner(realignment::HomopolyPairHMMRealigner::new(
934 reference_buffer,
935 gap_params,
936 hop_params,
937 realignment_window,
938 ))
939 .atomic_candidate_variants(atomic_candidate_variants)
940 .methylation_readtype(methylation_readtype)
941 .variant_heterozygosity_field(variant_heterozygosity_field)
942 .variant_somatic_effective_mutation_rate_field(
943 variant_somatic_effective_mutation_rate_field,
944 )
945 .build();
946 processor.process()?;
947 }
948 "fast" => {
949 let mut processor =
950 calling::variants::preprocessing::ObservationProcessor::builder()
951 .report_fragment_ids(report_fragment_ids)
952 .adjust_prob_mapping(!omit_mapq_adjustment)
953 .alignment_properties(alignment_properties)
954 .max_depth(max_depth)
955 .inbam(bam)
956 .min_bam_refetch_distance(min_bam_refetch_distance)
957 .reference_buffer(Arc::clone(&reference_buffer))
958 .haplotype_feature_index(HaplotypeFeatureIndex::new(
959 &candidates,
960 )?)
961 .inbcf(candidates)
962 .aux_info_fields(propagate_info_fields)
963 .options(opt_clone)
964 .outbcf(output)
965 .raw_observation_output(output_raw_observations)
966 .log_each_record(log_each_record)
967 .realigner(realignment::PathHMMRealigner::new(
968 gap_params,
969 realignment_window,
970 reference_buffer,
971 ))
972 .atomic_candidate_variants(atomic_candidate_variants)
973 .methylation_readtype(methylation_readtype)
974 .variant_heterozygosity_field(variant_heterozygosity_field)
975 .variant_somatic_effective_mutation_rate_field(
976 variant_somatic_effective_mutation_rate_field,
977 )
978 .build();
979 processor.process()?;
980 }
981 "exact" => {
982 if Prob::from(gap_params.prob_deletion_extend_artifact) > Prob(0.3)
983 || Prob::from(gap_params.prob_insertion_extend_artifact) > Prob(0.3)
984 {
985 warn!(
986 "Unexpectedly high prob_deletion_extend_artifact or \
987 prob_insertion_extend_artifact in alignment properties (>30%). \
988 Consider estimating alignment properties with multiple samples \
989 in case you have only few reads."
990 );
991 }
992 let mut processor =
993 calling::variants::preprocessing::ObservationProcessor::builder()
994 .report_fragment_ids(report_fragment_ids)
995 .adjust_prob_mapping(!omit_mapq_adjustment)
996 .alignment_properties(alignment_properties)
997 .max_depth(max_depth)
998 .inbam(bam)
999 .min_bam_refetch_distance(min_bam_refetch_distance)
1000 .reference_buffer(Arc::clone(&reference_buffer))
1001 .haplotype_feature_index(HaplotypeFeatureIndex::new(
1002 &candidates,
1003 )?)
1004 .inbcf(candidates)
1005 .aux_info_fields(propagate_info_fields)
1006 .options(opt_clone)
1007 .outbcf(output)
1008 .raw_observation_output(output_raw_observations)
1009 .log_each_record(log_each_record)
1010 .realigner(realignment::PairHMMRealigner::new(
1011 reference_buffer,
1012 gap_params,
1013 realignment_window,
1014 ))
1015 .atomic_candidate_variants(atomic_candidate_variants)
1016 .methylation_readtype(methylation_readtype)
1017 .variant_heterozygosity_field(variant_heterozygosity_field)
1018 .variant_somatic_effective_mutation_rate_field(
1019 variant_somatic_effective_mutation_rate_field,
1020 )
1021 .build();
1022 processor.process()?;
1023 }
1024 _ => panic!("Unknown pairhmm mode '{}'", pairhmm_mode),
1025 };
1026 }
1027 }
1028 }
1029 Varlociraptor::Call { kind } => {
1030 match kind {
1031 CallKind::Variants {
1032 mode,
1033 omit_strand_bias,
1034 omit_read_orientation_bias,
1035 omit_read_position_bias,
1036 omit_softclip_bias,
1037 omit_homopolymer_artifact_detection,
1038 omit_alt_locus_bias,
1039 testcase_locus,
1040 testcase_prefix,
1041 testcase_anonymous,
1042 output,
1043 log_mode,
1044 propagate_info_fields,
1045 full_prior,
1046 } => {
1047 let testcase_builder = if let Some(testcase_locus) = testcase_locus {
1048 if let Some(testcase_prefix) = testcase_prefix {
1049 Some(
1051 testcase::builder::TestcaseBuilder::default()
1052 .prefix(PathBuf::from(testcase_prefix))
1053 .anonymize(testcase_anonymous)
1054 .locus(&testcase_locus)?,
1055 )
1056 } else {
1057 return Err(errors::Error::MissingPrefix.into());
1058 }
1059 } else {
1060 None
1061 };
1062
1063 let log_each_record = log_mode == "each-record";
1064
1065 match mode {
1066 VariantCallMode::Generic {
1067 scenario,
1068 sample_observations,
1069 } => {
1070 if let Some(sample_observations) =
1071 parse_key_values(&sample_observations)
1072 {
1073 if let Some(mut testcase_builder) = testcase_builder {
1074 for (i, (sample_name, obspath)) in
1075 sample_observations.iter().enumerate()
1076 {
1077 let options = calling::variants::preprocessing::read_preprocess_options(obspath)?;
1078 let preprocess_input = options.preprocess_input();
1079 testcase_builder = testcase_builder.register_sample(
1080 sample_name,
1081 preprocess_input.bam,
1082 options,
1083 )?;
1084 if i == 0 {
1085 testcase_builder =
1086 testcase_builder.candidates(obspath.to_owned());
1087 testcase_builder = testcase_builder
1088 .reference(preprocess_input.reference)?;
1089 }
1090 }
1091
1092 let mut testcase = testcase_builder
1093 .scenario(Some(scenario))
1094 .mode(testcase::builder::Mode::Generic)
1095 .build()
1096 .unwrap();
1097 info!("Writing testcase.");
1098 testcase.write()?;
1099 return Ok(());
1100 }
1101
1102 let scenario = grammar::Scenario::from_path(scenario)?;
1103
1104 call_generic(
1105 scenario,
1106 sample_observations,
1107 omit_strand_bias,
1108 omit_read_orientation_bias,
1109 omit_read_position_bias,
1110 omit_softclip_bias,
1111 omit_homopolymer_artifact_detection,
1112 omit_alt_locus_bias,
1113 output,
1114 log_each_record,
1115 CallWriter::new(),
1116 DefaultCandidateFilter::new(),
1117 propagate_info_fields,
1118 full_prior,
1119 )?;
1120 } else {
1121 return Err(errors::Error::InvalidObservationsSpec.into());
1122 }
1123 }
1124 VariantCallMode::TumorNormal {
1125 tumor_observations,
1126 normal_observations,
1127 purity,
1128 } => {
1129 if let Some(testcase_builder) = testcase_builder {
1130 let tumor_options =
1131 calling::variants::preprocessing::read_preprocess_options(
1132 &tumor_observations,
1133 )?;
1134 let normal_options =
1135 calling::variants::preprocessing::read_preprocess_options(
1136 &normal_observations,
1137 )?;
1138 let mut testcase = testcase_builder
1139 .candidates(tumor_observations)
1140 .reference(tumor_options.preprocess_input().reference)?
1141 .register_sample(
1142 "tumor",
1143 tumor_options.preprocess_input().bam,
1144 tumor_options,
1145 )?
1146 .register_sample(
1147 "normal",
1148 normal_options.preprocess_input().bam,
1149 normal_options,
1150 )?
1151 .scenario(None)
1152 .mode(testcase::builder::Mode::TumorNormal)
1153 .purity(Some(purity))
1154 .build()
1155 .unwrap();
1156
1157 testcase.write()?;
1158 return Ok(());
1159 }
1160
1161 let scenario = grammar::Scenario::try_from(
1162 format!(
1163 r#"
1164 samples:
1165 tumor:
1166 resolution: 0.01
1167 contamination:
1168 by: normal
1169 fraction: {impurity}
1170 universe: "[0.0,1.0]"
1171 normal:
1172 resolution: 0.1
1173 universe: "[0.0,0.5[ | 0.5 | 1.0"
1174 events:
1175 somatic_tumor: "tumor:]0.0,1.0] & normal:0.0"
1176 somatic_normal: "tumor:]0.0,1.0] & normal:]0.0,0.5["
1177 germline_het: "tumor:]0.0,1.0] & normal:0.5"
1178 germline_hom: "tumor:]0.0,1.0] & normal:1.0"
1179 "#,
1180 impurity = 1.0 - purity
1181 )
1182 .as_str(),
1183 )?;
1184
1185 let mut observations = PathMap::default();
1186 observations.insert("tumor".to_owned(), tumor_observations);
1187 observations.insert("normal".to_owned(), normal_observations);
1188
1189 call_generic(
1190 scenario,
1191 observations,
1192 omit_strand_bias,
1193 omit_read_orientation_bias,
1194 omit_read_position_bias,
1195 omit_softclip_bias,
1196 omit_homopolymer_artifact_detection,
1197 omit_alt_locus_bias,
1198 output,
1199 log_each_record,
1200 CallWriter::new(),
1201 DefaultCandidateFilter::new(),
1202 propagate_info_fields,
1203 full_prior,
1204 )?;
1205 }
1206 }
1207 } }
1233 }
1234 Varlociraptor::FilterCalls { method } => match method {
1235 FilterMethod::ControlFDR {
1236 calls,
1237 events,
1238 fdr,
1239 mode,
1240 vartype,
1241 minlen,
1242 maxlen,
1243 smart_retain_artifacts,
1244 } => {
1245 let events = events
1246 .iter()
1247 .map(|event| SimpleEvent::new(event))
1248 .collect_vec();
1249 let vartype = match (vartype, minlen, maxlen) {
1250 (Some(VariantType::Insertion(None)), Some(minlen), Some(maxlen)) => {
1251 Some(VariantType::Insertion(Some(minlen..maxlen)))
1252 }
1253 (Some(VariantType::Deletion(None)), Some(minlen), Some(maxlen)) => {
1254 Some(VariantType::Deletion(Some(minlen..maxlen)))
1255 }
1256 (vartype, _, _) => vartype,
1257 };
1258
1259 let local = mode.is_local();
1260 let smart = mode.is_smart();
1261
1262 filtration::fdr::control_fdr::<&PathBuf, &str>(
1263 &calls,
1264 None,
1265 &events,
1266 vartype.as_ref(),
1267 LogProb::from(Prob::checked(fdr)?),
1268 local,
1269 smart,
1270 smart_retain_artifacts,
1271 )?;
1272 }
1273 FilterMethod::PosteriorOdds { ref events, odds } => {
1274 let events = events
1275 .iter()
1276 .map(|event| SimpleEvent {
1277 name: event.to_owned(),
1278 })
1279 .collect_vec();
1280
1281 filtration::posterior_odds::filter_by_odds::<_, &PathBuf, &PathBuf>(
1282 None, None, &events, odds,
1283 )?;
1284 }
1285 },
1286 Varlociraptor::DecodePHRED => {
1287 conversion::decode_phred::decode_phred()?;
1288 }
1289 Varlociraptor::Genotype => {
1290 conversion::genotype::genotype()?;
1291 }
1292 Varlociraptor::Estimate { kind } => match kind {
1293 EstimateKind::Contamination {
1294 sample,
1295 contaminant,
1296 prior_estimate,
1297 prior_considered_cells,
1298 output,
1299 output_plot,
1300 output_max_vaf_variants,
1301 } => {
1302 let prior_estimate = match (prior_estimate, prior_considered_cells) {
1303 (Some(p), Some(n)) if n > 0 => Some(
1304 estimation::contamination::PriorEstimate::new(AlleleFreq(p), n),
1305 ),
1306 (None, None) => None,
1307 _ => bail!(errors::Error::InvalidPriorContaminationEstimate),
1308 };
1309 estimation::contamination::estimate_contamination(
1310 sample,
1311 contaminant,
1312 output,
1313 output_plot,
1314 output_max_vaf_variants,
1315 prior_estimate,
1316 )?;
1317 }
1318 EstimateKind::MutationalBurden {
1319 events,
1320 sample,
1321 coding_genome_size,
1322 mode,
1323 cutoff,
1324 } => estimation::mutational_burden::collect_estimates(
1325 &events,
1326 &sample,
1327 coding_genome_size as u64,
1328 mode,
1329 cutoff,
1330 )?,
1331 EstimateKind::AlignmentProperties {
1332 reference,
1333 bams,
1334 num_records,
1335 } => {
1336 let mut reference_buffer = reference::Buffer::from_path(&reference, 1)?;
1337 let alignment_properties = estimate_alignment_properties(
1338 &bams,
1339 false,
1340 &mut reference_buffer,
1341 num_records,
1342 )?;
1343 println!("{}", serde_json::to_string_pretty(&alignment_properties)?);
1344 }
1345 },
1346 Varlociraptor::Plot { kind } => match kind {
1347 PlotKind::VariantCallingPrior {
1348 scenario,
1349 contig,
1350 sample,
1351 } => {
1352 let scenario = grammar::Scenario::from_path(scenario)?;
1353 let sample_infos = SampleInfos::try_from(&scenario)?;
1354
1355 let mut universes = scenario.sample_info();
1356 let mut ploidies = scenario.sample_info();
1357 for (sample_name, sample) in scenario.samples().iter() {
1358 universes = universes.push(
1359 sample_name,
1360 sample.contig_universe(&contig, scenario.species())?,
1361 );
1362 ploidies = ploidies.push(
1363 sample_name,
1364 sample.contig_ploidy(&contig, scenario.species())?,
1365 );
1366 }
1367 let universes = universes.build();
1368 let ploidies = ploidies.build();
1369
1370 let prior = Prior::builder()
1371 .variant_type_fractions(scenario.variant_type_fractions())
1372 .ploidies(Some(ploidies))
1373 .universe(Some(universes))
1374 .uniform(sample_infos.uniform_prior().clone())
1375 .germline_mutation_rate(sample_infos.germline_mutation_rates().clone())
1376 .somatic_effective_mutation_rate(
1377 sample_infos.somatic_effective_mutation_rates().clone(),
1378 )
1379 .inheritance(sample_infos.inheritance().clone())
1380 .heterozygosity(scenario.species().as_ref().and_then(|species| {
1381 species.heterozygosity().map(|het| LogProb::from(Prob(het)))
1382 }))
1383 .variant_type(Some(VariantType::Snv))
1384 .is_absent_only(false) .build();
1386 prior.check()?;
1387
1388 prior.plot(&sample, sample_infos.names())?;
1389 }
1390 PlotKind::Scatter { sample_x, sample_y } => {
1391 estimation::sample_variants::vaf_scatter(&sample_x, &sample_y)?
1392 }
1393 },
1394 Varlociraptor::MethylationCandidates {
1395 input,
1396 motifs,
1397 output,
1398 } => {
1399 candidates::methylation::find_candidates(input, motifs, output)?;
1400 }
1401 }
1402 Ok(())
1403}
1404
1405pub(crate) fn est_or_load_alignment_properties(
1406 alignment_properties_file: &Option<impl AsRef<Path>>,
1407 bam_file: impl AsRef<Path>,
1408 omit_insert_size: bool,
1409 reference_buffer: &mut reference::Buffer,
1410 num_records: Option<usize>,
1411) -> Result<AlignmentProperties> {
1412 if let Some(alignment_properties_file) = alignment_properties_file {
1413 Ok(serde_json::from_reader(File::open(
1414 alignment_properties_file,
1415 )?)?)
1416 } else {
1417 estimate_alignment_properties(&[bam_file], omit_insert_size, reference_buffer, num_records)
1418 }
1419}