diploid_contam_estimator/
cli.rs1use clap::{App, Arg, ArgMatches};
2
3const PROGRAM_DESC: &str = "Estimating contamination level from a diploid VCF file\n\n
4 The program assume we are dealing with a diploid genome, and using the
5 deviation of allelic balance from the expected allelic frequence for homozygous
6 or heterozygous variant calls to compute a contamination value.
7
8 For homozygous variants, we deviation from allelic frequency of 1 is all introduced by contaminaion.
9
10 For heterozygous variants, it is a little more complex, because it could be due to:
11 1. contamination that doesn't look like the HET ALT allele: we expect lower HET alt allele frequency
12 2. contamination that doesn't look like the HOM ALT allele: we expect High HET alt allele frequency
13 3. contamination that looks like the ALT allele: we expect higher alt allele frequency
14 4. contamination that looks like the REF allele: we expect lower alt allele frequency
15 5. contamination being called as ALT
16";
17const PROGRAM_NAME: &str = "diploid-contam-estimator";
18
19pub fn parse_args() -> ArgMatches {
21 let matches: ArgMatches = App::new(PROGRAM_NAME)
22 .version("0.1.0")
23 .author("Douglas Wu <wckdouglas@gmail.com>")
24 .about(PROGRAM_DESC)
25 .arg(
26 Arg::with_name("in_vcf")
27 .short('i')
28 .long("in-vcf")
29 .takes_value(true)
30 .required(true)
31 .help("A diploid vcf file for estimating contamination"),
32 )
33 .arg(
34 Arg::with_name("out_json")
35 .short('o')
36 .long("out-json")
37 .takes_value(true)
38 .required(false)
39 .help("A json output file for storing the maximum likelihood contam level for the vcf file"),
40 )
41 .arg(
42 Arg::with_name("debug_json")
43 .short('d')
44 .long("debug-json")
45 .takes_value(true)
46 .required(false)
47 .help("A json output file for storing all intermediate log prob"),
48 )
49 .arg(
50 Arg::with_name("debug_variant_json")
51 .long("debug-variant-json")
52 .takes_value(true)
53 .required(false)
54 .help("A json output file for storing all input variants used for calculation"),
55 )
56 .arg(
57 Arg::with_name("snv_only")
58 .long("snv-only")
59 .takes_value(false)
60 .help("Only use SNV (ignore indel) for contamination estimations"),
61 )
62 .arg(
63 Arg::with_name("depth_threshold")
64 .short('m')
65 .long("min-depth")
66 .takes_value(true)
67 .default_value("0")
68 .help("Minimum depth for a variant to be considered (i.e. DP tag)"),
69 )
70 .arg(
71 Arg::with_name("loci_bed")
72 .short('b')
73 .long("bed")
74 .takes_value(true)
75 .required(false)
76 .help("bed file containing loci for extracting variants"),
77 )
78 .get_matches();
79 matches
80}