goat_cli/utils/
cli_matches.rs

1use crate::utils::{
2    expression, tax_ranks, url, utils,
3    variable_data::{GOAT_ASSEMBLY_VARIABLE_DATA, GOAT_TAXON_VARIABLE_DATA},
4};
5use crate::{IndexType, TaxType, GOAT_URL, TAXONOMY, UPPER_CLI_FILE_LIMIT, UPPER_CLI_SIZE_LIMIT};
6use anyhow::{bail, Result};
7use std::path::PathBuf;
8
9/// Take CLI arguments and parse them. Return a tuple of:
10///
11/// (the size arg you passed, vector of taxon ID's, vector of URLs, and a vector
12/// of unique ID's).
13pub fn process_cli_args(
14    matches: &clap::ArgMatches,
15    api: &str,
16    unique_ids: Vec<String>,
17    index_type: IndexType,
18) -> Result<(u64, Vec<String>, Vec<String>)> {
19    // command line args same between taxon/assembly
20    let print_url = *matches.get_one::<bool>("url").expect("cli defaults false");
21    let print_goat_ui_url = *matches
22        .get_one::<bool>("goat-ui-url")
23        .expect("cli defaults false");
24    let tax_tree_enum = match *matches
25        .get_one::<bool>("descendents")
26        .expect("cli defaults false")
27    {
28        true => TaxType::Tree,
29        false => TaxType::Name,
30    };
31    // I think lineage is of limited value for assembly? but keep here anyways
32    let tax_lineage_enum = match *matches.get_one::<bool>("lineage").unwrap_or(&false) {
33        true => TaxType::Lineage,
34        false => TaxType::Name,
35    };
36    let include_estimates = *matches
37        .get_one::<bool>("include-estimates")
38        .expect("cli defaults false");
39    let expression = match matches.get_one::<String>("expression") {
40        Some(s) => url::format_expression(s, index_type)?,
41        None => "".to_string(),
42    };
43    // map needed to convert Option<String> -> Option<&str>
44    let variable_string = matches.get_one::<String>("variables").map(|x| &**x);
45    // this output will differ depending on taxon/assembly
46    // but keep cli arg the same
47    let print_expression = *matches
48        .get_one::<bool>("print-expression")
49        .expect("cli defaults false");
50
51    let tax_rank = match matches.get_one::<String>("tax-rank") {
52        Some(t) => tax_ranks::TaxRanks::init().parse(t, false)?,
53        None => "".to_string(),
54    };
55    let size = *matches.get_one::<u64>("size").expect("cli default = 50");
56    let ranks = matches
57        .get_one::<String>("ranks")
58        .expect("cli default = none");
59    let tax_name_op = matches.get_one::<String>("taxon");
60    let filename_op = matches.get_one::<PathBuf>("file");
61    let result = index_type.to_string();
62    let summarise_values_by = "count";
63    // add in exclusion of missing and ancestral values by default, but allow the user
64    // to toggle this on the command line
65    let exclude = *matches.get_one::<bool>("exclude").unwrap_or(&false);
66
67    // command line args unique to taxon
68    let taxon_include_raw_values = *matches.get_one::<bool>("taxon-raw").unwrap_or(&false);
69    let taxon_tidy = match taxon_include_raw_values {
70        true => true,
71        false => *matches.get_one::<bool>("taxon-tidy").unwrap_or(&false),
72    };
73    let taxon_assembly = *matches.get_one::<bool>("taxon-assembly").unwrap_or(&false);
74    let taxon_cvalues = *matches.get_one::<bool>("taxon-c-values").unwrap_or(&false);
75    let taxon_karyotype = *matches.get_one::<bool>("taxon-karyotype").unwrap_or(&false);
76    let taxon_gs = *matches
77        .get_one::<bool>("taxon-genome-size")
78        .unwrap_or(&false);
79    let taxon_busco = *matches.get_one::<bool>("taxon-busco").unwrap_or(&false);
80    let taxon_gc_percent = *matches
81        .get_one::<bool>("taxon-gc-percent")
82        .unwrap_or(&false);
83    let taxon_mitochondrion = *matches
84        .get_one::<bool>("taxon-mitochondria")
85        .unwrap_or(&false);
86    let taxon_plastid = *matches.get_one::<bool>("taxon-plastid").unwrap_or(&false);
87    let taxon_ploidy = *matches.get_one::<bool>("taxon-ploidy").unwrap_or(&false);
88    let taxon_sex_determination = *matches
89        .get_one::<bool>("taxon-sex-determination")
90        .unwrap_or(&false);
91    let taxon_legislation = *matches
92        .get_one::<bool>("taxon-legislation")
93        .unwrap_or(&false);
94    let taxon_names = *matches.get_one::<bool>("taxon-names").unwrap_or(&false);
95    let taxon_target_lists = *matches
96        .get_one::<bool>("taxon-target-lists")
97        .unwrap_or(&false);
98    let taxon_n50 = *matches.get_one::<bool>("taxon-n50").unwrap_or(&false);
99    let taxon_bioproject = *matches
100        .get_one::<bool>("taxon-bioproject")
101        .unwrap_or(&false);
102    let taxon_gene_count = *matches
103        .get_one::<bool>("taxon-gene-count")
104        .unwrap_or(&false);
105    let taxon_date = *matches.get_one::<bool>("taxon-date").unwrap_or(&false);
106    let taxon_country_list = *matches
107        .get_one::<bool>("taxon-country-list")
108        .unwrap_or(&false);
109    let taxon_status = *matches.get_one::<bool>("taxon-status").unwrap_or(&false);
110
111    // command line args unique to assembly
112    let assembly_assembly = *matches
113        .get_one::<bool>("assembly-assembly")
114        .unwrap_or(&false);
115    let assembly_karyotype = *matches
116        .get_one::<bool>("assembly-karyotype")
117        .unwrap_or(&false);
118    let assembly_contig = *matches.get_one::<bool>("assembly-contig").unwrap_or(&false);
119    let assembly_scaffold = *matches
120        .get_one::<bool>("assembly-scaffold")
121        .unwrap_or(&false);
122    let assembly_gc = *matches.get_one::<bool>("assembly-gc").unwrap_or(&false);
123    let assembly_gene = *matches
124        .get_one::<bool>("assembly-gene-count")
125        .unwrap_or(&false);
126    let assembly_busco = *matches.get_one::<bool>("assembly-busco").unwrap_or(&false);
127    let assembly_btk = *matches.get_one::<bool>("assembly-btk").unwrap_or(&false);
128
129    if print_expression {
130        match index_type {
131            IndexType::Taxon => expression::print_variable_data(&*GOAT_TAXON_VARIABLE_DATA),
132            IndexType::Assembly => expression::print_variable_data(&*GOAT_ASSEMBLY_VARIABLE_DATA),
133        }
134        std::process::exit(0);
135    }
136
137    // merge the field flags
138    let fields = url::FieldBuilder {
139        taxon_assembly,
140        taxon_bioproject,
141        taxon_busco,
142        taxon_country_list,
143        taxon_cvalues,
144        taxon_date,
145        taxon_gc_percent,
146        taxon_gene_count,
147        taxon_gs,
148        taxon_karyotype,
149        taxon_legislation,
150        taxon_mitochondrion,
151        taxon_names,
152        taxon_n50,
153        taxon_plastid,
154        taxon_ploidy,
155        taxon_sex_determination,
156        taxon_status,
157        taxon_target_lists,
158        taxon_tidy,
159        assembly_assembly,
160        assembly_karyotype,
161        assembly_contig,
162        assembly_scaffold,
163        assembly_gc,
164        assembly_gene,
165        assembly_busco,
166        assembly_btk,
167    };
168
169    if size as usize > *UPPER_CLI_SIZE_LIMIT {
170        let limit_string = utils::pretty_print_usize(*UPPER_CLI_SIZE_LIMIT);
171        bail!(
172            "Searches with more than {} results are not currently supported.",
173            limit_string
174        )
175    }
176
177    // tree includes all descendents of a node
178    let tax_tree = match (tax_tree_enum, tax_lineage_enum) {
179        (TaxType::Tree, TaxType::Name) => "tree",
180        (TaxType::Name, TaxType::Lineage) => "lineage",
181        (TaxType::Name, TaxType::Name) => "name",
182        (_, _) => bail!("If we get here, I've done something wrong in the `TaxType` enum logic. Please file an issue."),
183    };
184
185    let url_vector: Vec<String>;
186    // if -t use this
187    match tax_name_op {
188        Some(s) => {
189            // catch empty string hanging here.
190            if s.is_empty() {
191                bail!("Empty string found, please specify a taxon.")
192            }
193            url_vector = utils::parse_comma_separated(s)
194        }
195        None => match filename_op {
196            Some(s) => {
197                url_vector = utils::lines_from_file(s)?;
198                // check length of vector and bail if > 1000
199                if url_vector.len() > *UPPER_CLI_FILE_LIMIT {
200                    let limit_string = utils::pretty_print_usize(*UPPER_CLI_FILE_LIMIT);
201                    bail!("Number of taxa specified cannot exceed {}.", limit_string)
202                }
203            }
204            None => bail!("One of -f (--file) or -t (--taxon) should be specified."),
205        },
206    }
207
208    let url_vector_api = url::make_goat_urls(
209        api,
210        &url_vector,
211        &*GOAT_URL,
212        tax_tree,
213        include_estimates,
214        // check again whether to include
215        // raw values in `assembly` index.
216        taxon_include_raw_values,
217        exclude,
218        summarise_values_by,
219        &result,
220        &*TAXONOMY,
221        size,
222        ranks,
223        fields,
224        variable_string,
225        &expression,
226        &tax_rank,
227        unique_ids,
228        index_type,
229    )?;
230
231    if print_url {
232        for (index, url) in url_vector_api.iter().enumerate() {
233            println!("{}.\tGoaT API URL: {}", index, url);
234        }
235        std::process::exit(0);
236    } else if print_goat_ui_url {
237        for (index, url) in url_vector_api.iter().enumerate() {
238            // remove api/v2/
239            let new_url = url.replace("api/v2/", "");
240            println!("{}.\tGoaT API URL: {}", index, new_url);
241        }
242        std::process::exit(0);
243    }
244
245    // return the url vector
246    Ok((size, url_vector, url_vector_api))
247}