1use crate::{
2 utils::{
3 expression::CLIexpression,
4 variable_data::{GOAT_ASSEMBLY_VARIABLE_DATA, GOAT_TAXON_VARIABLE_DATA},
5 variables::Variables,
6 },
7 IndexType,
8};
9
10use anyhow::Result;
11
12fn format_rank(r: &str) -> String {
16 let ranks = vec![
19 "subspecies",
20 "species",
21 "genus",
22 "family",
23 "order",
24 "class",
25 "phylum",
26 "kingdom",
27 "superkingdom",
28 ];
29 let position_selected = ranks.iter().position(|e| e == &r);
30 let updated_ranks = match position_selected {
31 Some(p) => &ranks[p..],
32 None => return "".to_string(),
33 };
34 let mut rank_string = String::new();
35 rank_string += "&ranks=";
36 let ranks_to_add = updated_ranks.join("%2C");
37 rank_string += &ranks_to_add;
38
39 rank_string
40}
41
42fn format_names(flag: bool) -> String {
45 match flag {
46 true => "&names=synonym%2Ctol_id%2Ccommon_name".to_string(),
47 false => "".to_string(),
48 }
49}
50
51pub fn format_expression(exp: &str, index_type: IndexType) -> Result<String> {
53 let mut new_exp = CLIexpression::new(exp);
54 let parsed_string = match index_type {
55 IndexType::Taxon => new_exp.parse(&*GOAT_TAXON_VARIABLE_DATA)?,
56 IndexType::Assembly => new_exp.parse(&*GOAT_ASSEMBLY_VARIABLE_DATA)?,
57 };
58 Ok(parsed_string)
59}
60
61#[derive(Copy, Clone)]
65pub struct FieldBuilder {
66 pub taxon_assembly: bool,
70 pub taxon_bioproject: bool,
74 pub taxon_busco: bool,
78 pub taxon_country_list: bool,
82 pub taxon_cvalues: bool,
86 pub taxon_date: bool,
90 pub taxon_gc_percent: bool,
94 pub taxon_gene_count: bool,
98 pub taxon_gs: bool,
102 pub taxon_karyotype: bool,
107 pub taxon_legislation: bool,
113 pub taxon_mitochondrion: bool,
118 pub taxon_n50: bool,
122 pub taxon_names: bool,
128 pub taxon_plastid: bool,
133 pub taxon_ploidy: bool,
137 pub taxon_sex_determination: bool,
141 pub taxon_status: bool,
145 pub taxon_target_lists: bool,
150 pub taxon_tidy: bool,
156 pub assembly_assembly: bool,
160 pub assembly_karyotype: bool,
164 pub assembly_contig: bool,
168 pub assembly_scaffold: bool,
172 pub assembly_gc: bool,
176 pub assembly_gene: bool,
180 pub assembly_busco: bool,
184 pub assembly_btk: bool,
188}
189
190impl FieldBuilder {
191 fn to_vec_tuples(&self) -> Vec<(bool, Vec<&str>)> {
204 vec![
205 (self.taxon_assembly, vec!["assembly_level", "assembly_span"]),
207 (self.taxon_bioproject, vec!["bioproject", "biosample"]),
208 (
210 self.taxon_busco,
211 vec![
212 "busco_completeness",
213 "odb10_lineage",
214 "busco_lineage",
215 "busco_string",
216 ],
217 ),
218 (self.taxon_country_list, vec!["country_list"]),
219 (self.taxon_cvalues, vec!["c_value"]),
220 (self.taxon_date, vec!["assembly_date", "ebp_metric_date"]),
221 (self.taxon_gc_percent, vec!["gc_percent"]),
222 (self.taxon_gene_count, vec!["gene_count"]),
223 (
224 self.taxon_gs,
225 vec!["genome_size", "genome_size_kmer", "genome_size_draft"],
226 ),
227 (
228 self.taxon_karyotype,
229 vec!["chromosome_number", "haploid_number"],
230 ),
231 (
232 self.taxon_legislation,
233 vec![
234 "isb_wildlife_act_1976",
235 "HabReg_2017",
236 "MarHabReg-2017",
237 "waca_1981",
238 "Protection_of_Badgers_Act_1992",
239 "ECHabs92",
240 ],
241 ),
242 (
243 self.taxon_mitochondrion,
244 vec!["mitochondrion_assembly_span", "mitochondrion_gc_percent"],
245 ),
246 (self.taxon_n50, vec!["scaffold_n50", "contig_n50"]),
247 (
248 self.taxon_plastid,
249 vec!["plastid_assembly_span", "plastid_gc_percent"],
250 ),
251 (self.taxon_ploidy, vec!["ploidy"]),
252 (self.taxon_sex_determination, vec!["sex_determination"]),
253 (
256 self.taxon_status,
257 vec![
258 "sequencing_status",
259 "sample_collected",
260 "sample_acquired",
261 "in_progress",
262 "insdc_submitted",
263 "insdc_open",
264 "published",
265 "sample_collected_by",
266 ],
267 ),
268 (
269 self.taxon_target_lists,
270 vec!["long_list", "other_priority", "family_representative"],
271 ),
272 (
274 self.assembly_assembly,
275 vec!["assembly_level", "assembly_span"],
276 ),
277 (self.assembly_btk, vec!["nohit", "target"]),
278 (
279 self.assembly_busco,
280 vec!["busco_completeness", "busco_lineage", "busco_string"],
281 ),
282 (
283 self.assembly_contig,
284 vec!["contig_count", "contig_l50", "contig_n50"],
285 ),
286 (self.assembly_gc, vec!["gc_percent"]),
287 (
288 self.assembly_gene,
289 vec!["gene_count", "noncoding_gene_count"],
290 ),
291 (self.assembly_karyotype, vec!["chromosome_count"]),
292 (
293 self.assembly_scaffold,
294 vec!["scaffold_count", "scaffold_l50", "scaffold_n50"],
295 ),
296 ]
297 }
298
299 pub fn build_fields_string(&self) -> String {
302 const BASE: &str = "&fields=";
303 const DELIMITER: &str = "%2C";
304
305 let data = self.to_vec_tuples();
307
308 let mut field_string = String::new();
310 field_string += BASE;
312 for (field_present, field_vec) in data.iter() {
313 match field_present {
314 true => {
315 field_string += &field_vec.join(DELIMITER);
316 field_string += DELIMITER;
317 }
318 false => continue,
319 }
320 }
321
322 field_string.drain(field_string.len() - 3..);
324 let any_true = data.iter().map(|e| e.0).any(|e| e);
326 if !any_true {
327 field_string.drain(..);
329 }
330
331 field_string
332 }
333
334 fn generate_exculde_flags(&self) -> String {
336 const ANCESTRAL: &str = "&excludeAncestral";
337 const MISSING: &str = "&excludeMissing";
338 const OPEN_ANGLE_BRACE: &str = "%5B";
339 const CLOSE_ANGLE_BRACE: &str = "%5D";
340
341 let data = self.to_vec_tuples();
342 let mut exclusion_string = String::new();
343
344 let mut exclude_index: i32 = 0;
345 for (field_present, field_vec) in data.iter() {
346 match field_present {
347 true => {
348 for field in field_vec {
349 exclusion_string += ANCESTRAL;
352 exclusion_string += OPEN_ANGLE_BRACE;
353 exclusion_string += &exclude_index.to_string();
354 exclusion_string += CLOSE_ANGLE_BRACE;
355 exclusion_string += &format!("={field}");
356
357 exclusion_string += MISSING;
359 exclusion_string += OPEN_ANGLE_BRACE;
360 exclusion_string += &exclude_index.to_string();
361 exclusion_string += CLOSE_ANGLE_BRACE;
362 exclusion_string += &format!("={field}");
363
364 exclude_index += 1;
365 }
366 }
367 false => continue,
368 }
369 }
370
371 exclusion_string
372 }
373}
374
375pub fn make_goat_urls(
378 api: &str,
379 taxids: &[String],
380 goat_url: &str,
381 tax_tree: &str,
382 include_estimates: bool,
383 include_raw_values: bool,
384 exclude: bool,
385 summarise_values_by: &str,
386 result: &str,
387 taxonomy: &str,
388 size: u64,
389 ranks: &str,
390 fields: FieldBuilder,
391 variables: Option<&str>,
392 expression: &str,
393 tax_rank: &str,
394 unique_ids: Vec<String>,
395 index_type: IndexType,
396) -> Result<Vec<String>> {
397 let mut res = Vec::new();
398
399 let rank_string = format_rank(ranks);
401 let fields_string = match variables {
406 Some(v) => match index_type {
407 IndexType::Taxon => Variables::new(v).parse(&*GOAT_TAXON_VARIABLE_DATA)?,
408 IndexType::Assembly => Variables::new(v).parse(&*GOAT_ASSEMBLY_VARIABLE_DATA)?,
409 },
410 None => fields.build_fields_string(),
411 };
412
413 let exclude_missing_or_ancestral = if exclude {
414 match variables {
415 Some(v) => match index_type {
416 IndexType::Taxon => Variables::new(v).parse_exclude(&*GOAT_TAXON_VARIABLE_DATA)?,
417 IndexType::Assembly => Variables::new(v).parse_exclude(&*GOAT_ASSEMBLY_VARIABLE_DATA)?,
418 },
419 None => fields.generate_exculde_flags(),
420 }
421 } else {
422 "".into()
423 };
424
425 let names = format_names(fields.taxon_names);
426
427 let tidy_data: &str = match fields.taxon_tidy {
428 true => "&tidyData=true",
429 false => "",
430 };
431
432 for (taxon, chars) in taxids.iter().zip(unique_ids.iter()) {
435 let query_id = format!("&queryId=goat_cli_{}", chars);
436 let url = format!(
437 "{goat_url}{api}?query=tax_{tax_tree}%28{taxon}%29{tax_rank}{expression}&includeEstimates={include_estimates}&includeRawValues={include_raw_values}&summaryValues={summarise_values_by}&result={result}&taxonomy={taxonomy}&size={size}{rank_string}{fields_string}{tidy_data}{names}{query_id}{exclude_missing_or_ancestral}"
439 );
440 res.push(url);
441 }
442 Ok(res)
443}