diploid_contam_estimator/
cli.rs

1use clap::{App, Arg, ArgMatches};
2
3const PROGRAM_DESC: &str = "Estimating contamination level from a diploid VCF file\n\n
4    The program assume we are dealing with a diploid genome, and using the 
5    deviation of allelic balance from the expected allelic frequence for homozygous
6    or heterozygous variant calls to compute a contamination value.
7
8    For homozygous variants, we deviation from allelic frequency of 1 is all introduced by contaminaion.
9
10    For heterozygous variants, it is a little more complex, because it could be due to: 
11        1. contamination that doesn't look like the HET ALT allele: we expect lower HET alt allele frequency 
12        2. contamination that doesn't look like the HOM ALT allele: we expect High HET alt allele frequency 
13        3. contamination that looks like the ALT allele: we expect higher alt allele frequency 
14        4. contamination that looks like the REF allele: we expect lower alt allele frequency
15        5. contamination being called as ALT
16";
17const PROGRAM_NAME: &str = "diploid-contam-estimator";
18
19/// arg parser to get input from command line
20pub fn parse_args() -> ArgMatches {
21    let matches: ArgMatches = App::new(PROGRAM_NAME)
22        .version("0.1.0")
23        .author("Douglas Wu <wckdouglas@gmail.com>")
24        .about(PROGRAM_DESC)
25        .arg(
26            Arg::with_name("in_vcf")
27                .short('i')
28                .long("in-vcf")
29                .takes_value(true)
30                .required(true)
31                .help("A diploid vcf file for estimating contamination"),
32        )
33        .arg(
34            Arg::with_name("out_json")
35                .short('o')
36                .long("out-json")
37                .takes_value(true)
38                .required(false)
39                .help("A json output file for storing the maximum likelihood contam level for the vcf file"),
40        )
41        .arg(
42            Arg::with_name("debug_json")
43                .short('d')
44                .long("debug-json")
45                .takes_value(true)
46                .required(false)
47                .help("A json output file for storing all intermediate log prob"),
48        )
49        .arg(
50            Arg::with_name("debug_variant_json")
51                .long("debug-variant-json")
52                .takes_value(true)
53                .required(false)
54                .help("A json output file for storing all input variants used for calculation"),
55        )
56        .arg(
57            Arg::with_name("snv_only")
58                .long("snv-only")
59                .takes_value(false)
60                .help("Only use SNV (ignore indel) for contamination estimations"),
61        )
62        .arg(
63            Arg::with_name("depth_threshold")
64                .short('m')
65                .long("min-depth")
66                .takes_value(true)
67                .default_value("0")
68                .help("Minimum depth for a variant to be considered (i.e. DP tag)"),
69        )
70        .arg(
71            Arg::with_name("loci_bed")
72                .short('b')
73                .long("bed")
74                .takes_value(true)
75                .required(false)
76                .help("bed file containing loci for extracting variants"),
77        )
78        .get_matches();
79    matches
80}