barcode_count/
arguments.rs

1use anyhow::{Context, Result};
2use chrono::Local;
3use clap::{crate_version, App, Arg};
4
5/// A struct that contains and initiates all input arguments
6pub struct Args {
7    pub fastq: String,                           // fastq file path
8    pub format: String,                          // format scheme file path
9    pub sample_barcodes_option: Option<String>,  // sample barcode file path.  Optional
10    pub counted_barcodes_option: Option<String>, // building block barcode file path. Optional
11    pub output_dir: String,                      // output directory.  Deafaults to './'
12    pub threads: u16, // Number of threads to use.  Defaults to number of threads on the machine
13    pub prefix: String, // Prefix string for the output files
14    pub merge_output: bool, // Whether or not to create an additional output file that merges all samples
15    pub barcodes_errors_option: Option<u16>, // Optional input of how many errors are allowed in each building block barcode.  Defaults to 20% of the length
16    pub sample_errors_option: Option<u16>, // Optional input of how many errors are allowed in each sample barcode.  Defaults to 20% of the length
17    pub constant_errors_option: Option<u16>, // Optional input of how many errors are allowed in each constant region barcode.  Defaults to 20% of the length
18    pub min_average_quality_score: f32,
19    pub enrich: bool,
20}
21
22impl Args {
23    pub fn new() -> Result<Self> {
24        let total_cpus = num_cpus::get().to_string();
25        let today = Local::now().format("%Y-%m-%d").to_string();
26        // parse arguments
27        let args = App::new("NGS-Barcode-Count")
28        .version(crate_version!())
29        .author("Rory Coffey <coffeyrt@gmail.com>")
30        .about("Counts barcodes located in sequencing data")
31        .arg(
32            Arg::with_name("fastq")
33                .short("f")
34                .long("fastq")
35                .takes_value(true)
36                .required(true)
37                .help("FastQ file"),
38        )
39        .arg(
40            Arg::with_name("format_file")
41                .short("q")
42                .long("sequence-format")
43                .takes_value(true)
44                .required(true)
45                .help("Sequence format file"),
46        )
47        .arg(
48            Arg::with_name("sample_file")
49                .short("s")
50                .long("sample-barcodes")
51                .takes_value(true)
52                .help("Sample barcodes file"),
53        )
54        .arg(
55            Arg::with_name("barcode_file")
56                .short("c")
57                .long("counted-barcodes")
58                .takes_value(true)
59                .help("Counted barcodes file"),
60        )
61        .arg(
62            Arg::with_name("threads")
63                .short("t")
64                .long("threads")
65                .takes_value(true)
66                .default_value(&total_cpus)
67                .help("Number of threads"),
68        )
69        .arg(
70            Arg::with_name("dir")
71                .short("o")
72                .long("output-dir")
73                .takes_value(true)
74                .default_value("./")
75                .help("Directory to output the counts to"),
76        )
77        .arg(
78            Arg::with_name("prefix")
79                .short("p")
80                .long("prefix")
81                .takes_value(true)
82                .default_value(&today)
83                .help("File prefix name.  THe output will end with '_<sample_name>_counts.csv'"),
84        )
85        .arg(
86            Arg::with_name("merge-output")
87                .short("m")
88                .long("merge-output")
89                .takes_value(false)
90                .help("Merge sample output counts into a single file.  Not necessary when there is only one sample"),
91        )
92        .arg(
93            Arg::with_name("enrich")
94                .long("enrich")
95                .short("e")
96                .takes_value(false)
97                .help("Create output files of enrichment for single and double synthons/barcodes"),
98        )
99        .arg(
100            Arg::with_name("max_barcode")
101                .long("max-errors-counted-barcode")
102                .takes_value(true)
103                .help("Maximimum number of sequence errors allowed within each counted barcode. Defaults to 20% of the total."),
104        )
105        .arg(
106            Arg::with_name("max_sample")
107                .long("max-errors-sample")
108                .takes_value(true)
109                .help("Maximimum number of sequence errors allowed within sample barcode. Defaults to 20% of the total."),
110        )
111        .arg(
112            Arg::with_name("max_constant")
113                .long("max-errors-constant")
114                .takes_value(true)
115                .help("Maximimum number of sequence errors allowed within constant region. Defaults to 20% of the total."),
116        )
117        .arg(
118            Arg::with_name("min")
119                .long("min-quality")
120                .takes_value(true)
121                .default_value("0")
122                .help("Minimum average read quality score per barcode"),
123        )
124        .get_matches();
125
126        let sample_barcodes_option;
127        if let Some(sample) = args.value_of("sample_file") {
128            sample_barcodes_option = Some(sample.to_string())
129        } else {
130            sample_barcodes_option = None
131        }
132
133        let counted_barcodes_option;
134        if let Some(barcodes) = args.value_of("barcode_file") {
135            counted_barcodes_option = Some(barcodes.to_string())
136        } else {
137            counted_barcodes_option = None
138        }
139
140        let barcodes_errors_option;
141        if let Some(barcodes) = args.value_of("max_barcode") {
142            barcodes_errors_option = Some(
143                barcodes
144                    .parse::<u16>()
145                    .context("Unable to convert maximum barcode errors to an integer")?,
146            )
147        } else {
148            barcodes_errors_option = None
149        }
150
151        let sample_errors_option;
152        if let Some(sample) = args.value_of("max_sample") {
153            sample_errors_option = Some(
154                sample
155                    .parse::<u16>()
156                    .context("Unable to convert maximum sample errors to an integer")?,
157            )
158        } else {
159            sample_errors_option = None
160        }
161
162        let constant_errors_option;
163        if let Some(constant) = args.value_of("max_constant") {
164            constant_errors_option = Some(
165                constant
166                    .parse::<u16>()
167                    .context("Unable to convert maximum constant errors to an integer")?,
168            )
169        } else {
170            constant_errors_option = None
171        }
172
173        let merge_output = args.is_present("merge-output");
174        let enrich = args.is_present("enrich");
175        let fastq = args.value_of("fastq").unwrap().to_string();
176        let format = args.value_of("format_file").unwrap().to_string();
177        let output_dir = args.value_of("dir").unwrap().to_string();
178        let threads = args
179            .value_of("threads")
180            .unwrap()
181            .parse::<u16>()
182            .context("Unable to convert threads to an integer")?;
183        let prefix = args.value_of("prefix").unwrap().to_string();
184        let min_average_quality_score = args
185            .value_of("min")
186            .unwrap()
187            .parse::<f32>()
188            .context("Unable to convert min score to a float")?;
189
190        Ok(Args {
191            fastq,
192            format,
193            sample_barcodes_option,
194            counted_barcodes_option,
195            output_dir,
196            threads,
197            prefix,
198            merge_output,
199            barcodes_errors_option,
200            sample_errors_option,
201            constant_errors_option,
202            min_average_quality_score,
203            enrich,
204        })
205    }
206}