goat_cli/utils/
variable_data.rs

1use crate::utils::expression::{Function, TypeOf, Variable};
2use lazy_static::lazy_static;
3use std::collections::BTreeMap;
4
5// https://stackoverflow.com/questions/27582739/how-do-i-create-a-hashmap-literal
6
7/// Makes a static [`BTreeMap`] from the input of `GOAT_VARIABLE_DATA`.
8macro_rules! collection {
9    // map-like
10    ($($k:expr => $v:expr),* $(,)?) => {{
11        use std::iter::{Iterator, IntoIterator};
12        Iterator::collect(IntoIterator::into_iter([$(($k, $v),)*]))
13    }};
14}
15
16// this is data for the `taxon` index.
17
18lazy_static! {
19    /// Automatically generated GoaT variable data from a Bash script in the
20    /// `/vars` directory.
21    pub static ref GOAT_TAXON_VARIABLE_DATA: BTreeMap<&'static str, Variable<'static>> = collection!(
22        // automated input start taxon
23		"c_value" => Variable { display_name: "C value", type_of: TypeOf::HalfFloat, functions: Function::Some(vec!["min", "max"]) },
24		"c_value_method" => Variable { display_name: "C value method", type_of: TypeOf::Keyword(vec!["biochemical analysis", "bulk fluorometric assay", "complete genome sequencing", "feulgen densitometry", "feulgen image analysis densitometry", "flow cytometry", "flow karyotyping", "fluorescence fading analysis", "gallocyanin chrom alum densitometry", "methyl green densitometry", "not specified", "static cell fluorometry", "ultraviolet microscopy", "unknown", "biochemical analysis", "feulgen image analysis densitometry", "flow cytometry", "feulgen densitometry", "feulgen densitometry & flow cytometry", "microdensitometry", "pulse field gel electrophoresis", "reassociation kinetics", "whole genome sequencing"]), functions: Function::None },
25		"c_value_cell_type" => Variable { display_name: "C value cell type", type_of: TypeOf::Keyword(vec!["antennae", "antennal gland", "blood cells", "brain", "buccal epithelium", "coelomocytes", "corneal epithelium", "digestive gland", "dorsal fin clip", "egg", "embyro", "epidermis", "exopodite", "fibroblasts", "fin clips", "germarium", "gills", "haemocytes", "heart cells", "individual chromosomes", "intestine", "kidney cells", "legs", "leukocytes", "liver", "lung (culture)", "mantle", "midgut", "muscle cells", "ne", "not specified", "oocytes", "ovaries", "pancreas", "pharynx", "polypide cells in suspension", "red blood cells", "retinal cells", "salivary gland", "somatic cells", "sperm", "spleen", "tentacles", "testes", "thymus", "tissue culture", "various", "ventral hypodermal chord", "whole body", "whole body squash"]), functions: Function::None },
26		"genome_size" => Variable { display_name: "Genome size", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
27		"genome_size_kmer" => Variable { display_name: "Genome size kmer", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
28		"genome_size_draft" => Variable { display_name: "Genome size draft", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
29		"assembly_level" => Variable { display_name: "Assembly level", type_of: TypeOf::Keyword(vec!["complete genome", "chromosome", "scaffold", "contig"]), functions: Function::None },
30		"assembly_span" => Variable { display_name: "Assembly span", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
31		"bioproject" => Variable { display_name: "bioproject", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
32		"biosample" => Variable { display_name: "biosample", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
33		"contig_n50" => Variable { display_name: "Contig N50", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
34		"assembly_date" => Variable { display_name: "Last updated", type_of: TypeOf::Date, functions: Function::Some(vec!["min", "max"]) },
35		"scaffold_n50" => Variable { display_name: "Scaffold N50", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
36		"gene_count" => Variable { display_name: "Gene count", type_of: TypeOf::Integer, functions: Function::Some(vec!["min", "max"]) },
37		"ebp_metric_date" => Variable { display_name: "EBP metric date", type_of: TypeOf::Date, functions: Function::Some(vec!["min", "max"]) },
38		"mitochondrion_assembly_span" => Variable { display_name: "mitochondrion span", type_of: TypeOf::Long, functions: Function::None },
39		"mitochondrion_gc_percent" => Variable { display_name: "mitochondrion GC%", type_of: TypeOf::TwoDP, functions: Function::None },
40		"plastid_assembly_span" => Variable { display_name: "plastid span", type_of: TypeOf::Long, functions: Function::None },
41		"plastid_gc_percent" => Variable { display_name: "plastid GC%", type_of: TypeOf::TwoDP, functions: Function::None },
42		"chromosome_number" => Variable { display_name: "Chromosome number", type_of: TypeOf::Short, functions: Function::Some(vec!["min", "max"]) },
43		"haploid_number" => Variable { display_name: "Haploid number", type_of: TypeOf::Short, functions: Function::Some(vec!["min", "max"]) },
44		"sex_determination" => Variable { display_name: "Sex Determination", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
45		"ploidy" => Variable { display_name: "Ploidy", type_of: TypeOf::Short, functions: Function::Some(vec!["min", "max"]) },
46		"echabs92" => Variable { display_name: "EC Habitats Directive 1992", type_of: TypeOf::Keyword(vec!["echabs92_annex_iib", "echabs92_annex_ivb", "echabs92_annex_iva"]), functions: Function::None },
47		"habreg_2017" => Variable { display_name: "Conservation of Habitats and Species Regulations 2017", type_of: TypeOf::Keyword(vec!["habreg-sch2", "habreg-sch5"]), functions: Function::None },
48		"marhabreg-2017" => Variable { display_name: "Conservation of Offshore Marine Habitats and Species Regulations 2017", type_of: TypeOf::Keyword(vec!["marhabreg-sch1"]), functions: Function::None },
49		"waca_1981" => Variable { display_name: "Wildlife and Countryside Act 1981", type_of: TypeOf::Keyword(vec!["waca-sch1", "waca-sch5"]), functions: Function::None },
50		"isb_wildlife_act_1976" => Variable { display_name: "Irish Statute Book Wildlife Act, 1976", type_of: TypeOf::Keyword(vec!["iwa-nsch3", "iwa-sch5"]), functions: Function::None },
51		"protection_of_badgers_act_1992" => Variable { display_name: "Protection of Badgers Act 1992", type_of: TypeOf::Keyword(vec!["badgers92"]), functions: Function::None },
52		"sample_sex" => Variable { display_name: "Sample sex", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
53		"sample_location" => Variable { display_name: "location", type_of: TypeOf::None, functions: Function::None },
54		"country_list" => Variable { display_name: "Country list", type_of: TypeOf::Keyword(vec!["gb", "ie"]), functions: Function::None },
55		"btk_nohit" => Variable { display_name: "BTK no hit", type_of: TypeOf::OneDP, functions: Function::None },
56		"btk_target" => Variable { display_name: "BTK_target", type_of: TypeOf::OneDP, functions: Function::None },
57		"busco_completeness" => Variable { display_name: "BUSCO completeness", type_of: TypeOf::OneDP, functions: Function::None },
58		"busco_lineage" => Variable { display_name: "BUSCO lineage", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
59		"busco_string" => Variable { display_name: "BUSCO string", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
60		"gc_percent" => Variable { display_name: "GC percent", type_of: TypeOf::OneDP, functions: Function::None },
61		"n_percent" => Variable { display_name: "N percent", type_of: TypeOf::OneDP, functions: Function::None },
62		"odb10_lineage" => Variable { display_name: "Busco_odb10 lineage", type_of: TypeOf::Keyword(vec!["aconoidasida_odb10", "actinopterygii_odb10", "agaricales_odb10", "agaricomycetes_odb10", "alveolata_odb10", "apicomplexa_odb10", "arachnida_odb10", "arthropoda_odb10", "ascomycota_odb10", "aves_odb10", "basidiomycota_odb10", "boletales_odb10", "brassicales_odb10", "capnodiales_odb10", "carnivora_odb10", "cetartiodactyla_odb10", "chaetothyriales_odb10", "chlorophyta_odb10", "coccidia_odb10", "cyprinodontiformes_odb10", "diptera_odb10", "dothideomycetes_odb10", "embryophyta_odb10", "endopterygota_odb10", "euarchontoglires_odb10", "eudicots_odb10", "euglenozoa_odb10", "eukaryota_odb10", "eurotiales_odb10", "eurotiomycetes_odb10", "eutheria_odb10", "fabales_odb10", "fungi_odb10", "glires_odb10", "glomerellales_odb10", "helotiales_odb10", "hemiptera_odb10", "hymenoptera_odb10", "hypocreales_odb10", "insecta_odb10", "laurasiatheria_odb10", "leotiomycetes_odb10", "lepidoptera_odb10", "liliopsida_odb10", "mammalia_odb10", "metazoa_odb10", "microsporidia_odb10", "mollusca_odb10", "mucorales_odb10", "mucoromycota_odb10", "nematoda_odb10", "onygenales_odb10", "passeriformes_odb10", "plasmodium_odb10", "pleosporales_odb10", "poales_odb10", "polyporales_odb10", "primates_odb10", "saccharomycetes_odb10", "sauropsida_odb10", "solanales_odb10", "sordariomycetes_odb10", "stramenopiles_odb10", "tetrapoda_odb10", "tremellomycetes_odb10", "vertebrata_odb10", "viridiplantae_odb10"]), functions: Function::None },
63		"sample_collected" => Variable { display_name: "sample_collected", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
64		"sample_collected_by" => Variable { display_name: "sample_collected_by", type_of: TypeOf::Keyword(vec!["dalu", "ghc", "mba", "nhm", "nsu", "psu", "qmul", "rbge", "kew", "san", "ubc", "derb", "oxf", "vien"]), functions: Function::None },
65		"sample_acquired" => Variable { display_name: "sample_acquired", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
66		"in_progress" => Variable { display_name: "in_progress", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
67		"insdc_open" => Variable { display_name: "insdc_open", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
68		"published" => Variable { display_name: "published", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
69		"sequencing_status" => Variable { display_name: "sequencing_status", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
70		"sequencing_status_ag100pest" => Variable { display_name: "sequencing_status_ag100pest", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
71		"sequencing_status_asg" => Variable { display_name: "sequencing_status_asg", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
72		"sequencing_status_b10k" => Variable { display_name: "sequencing_status_b10k", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
73		"sequencing_status_canbp" => Variable { display_name: "sequencing_status_canbp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
74		"sequencing_status_cbp" => Variable { display_name: "sequencing_status_cbp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
75		"sequencing_status_ccgp" => Variable { display_name: "sequencing_status_ccgp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
76		"sequencing_status_cfgp" => Variable { display_name: "sequencing_status_cfgp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
77		"sequencing_status_dtol" => Variable { display_name: "sequencing_status_dtol", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
78		"sequencing_status_ebpn" => Variable { display_name: "sequencing_status_ebpn", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
79		"sequencing_status_endemixit" => Variable { display_name: "sequencing_status_endemixit", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
80		"sequencing_status_erga" => Variable { display_name: "sequencing_status_erga", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
81		"sequencing_status_eurofish" => Variable { display_name: "sequencing_status_eurofish", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
82		"sequencing_status_gaga" => Variable { display_name: "sequencing_status_gaga", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
83		"sequencing_status_giga" => Variable { display_name: "sequencing_status_giga", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
84		"sequencing_status_ilebp" => Variable { display_name: "sequencing_status_ilebp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
85		"sequencing_status_metainvert" => Variable { display_name: "sequencing_status_metainvert", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
86		"sequencing_status_pgp" => Variable { display_name: "sequencing_status_pgp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
87		"sequencing_status_squalomix" => Variable { display_name: "sequencing_status_squalomix", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
88		"sequencing_status_vgp" => Variable { display_name: "sequencing_status_vgp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
89		"sequencing_status_zoonomia" => Variable { display_name: "sequencing_status_zoonomia", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
90		"sequencing_status_omg" => Variable { display_name: "sequencing_status_omg", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
91		"sequencing_status_arg" => Variable { display_name: "sequencing_status_arg", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
92		"sequencing_status_agi" => Variable { display_name: "sequencing_status_agi", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
93		"sequencing_status_tsi" => Variable { display_name: "sequencing_status_tsi", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
94		"sequencing_status_gap" => Variable { display_name: "sequencing_status_gap", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
95		"sequencing_status_gbr" => Variable { display_name: "sequencing_status_gbr", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
96		"long_list" => Variable { display_name: "long_list", type_of: TypeOf::Keyword(vec!["africabp", "ag100pest", "agi", "arg", "asg", "b10k", "canbp", "cbp", "ccgp", "cfgp", "dtol", "ebpn", "ein", "endemixit", "erga", "eurofish", "gaga", "gap", "gbr", "giga", "ilebp", "metainvert", "omg", "pgp", "squalomix", "tsi", "vgp", "zoonomia"]), functions: Function::None },
97		"other_priority" => Variable { display_name: "other_priority", type_of: TypeOf::Keyword(vec!["africabp", "ag100pest", "agi", "arg", "asg", "b10k", "canbp", "cbp", "ccgp", "cfgp", "dtol", "ebpn", "ein", "endemixit", "erga", "eurofish", "gaga", "gap", "gbr", "giga", "ilebp", "metainvert", "omg", "pgp", "squalomix", "tsi", "vgp", "zoonomia"]), functions: Function::None },
98		"family_representative" => Variable { display_name: "family_representative", type_of: TypeOf::Keyword(vec!["africabp", "ag100pest", "agi", "arg", "asg", "b10k", "canbp", "cbp", "ccgp", "cfgp", "dtol", "ebpn", "ein", "endemixit", "erga", "eurofish", "gaga", "gap", "gbr", "giga", "ilebp", "metainvert", "omg", "pgp", "squalomix", "tsi", "vgp", "zoonomia"]), functions: Function::None },
99		"sequencing_status_ebp" => Variable { display_name: "sequencing_status_ebp", type_of: TypeOf::None, functions: Function::None },
100        // automated input end taxon
101    );
102}
103
104// this is the data for `assembly` index.
105
106lazy_static! {
107    /// Automatically generated GoaT variable data from a Bash script in the
108    /// `/vars` directory.
109    pub static ref GOAT_ASSEMBLY_VARIABLE_DATA: BTreeMap<&'static str, Variable<'static>> = collection!(
110        // automated input start assembly
111		"assembly_level" => Variable { display_name: "Assembly level", type_of: TypeOf::Keyword(vec!["complete genome", "chromosome", "scaffold", "contig"]), functions: Function::None },
112		"assembly_span" => Variable { display_name: "Assembly span", type_of: TypeOf::Long, functions: Function::None },
113		"assembly_type" => Variable { display_name: "Assembly type", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
114		"bioproject" => Variable { display_name: "bioproject", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
115		"biosample" => Variable { display_name: "biosample", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
116		"chromosome_count" => Variable { display_name: "Chromosome count", type_of: TypeOf::Long, functions: Function::None },
117		"contig_count" => Variable { display_name: "Contig count", type_of: TypeOf::Long, functions: Function::None },
118		"contig_l50" => Variable { display_name: "Contig L50", type_of: TypeOf::Long, functions: Function::None },
119		"contig_n50" => Variable { display_name: "Contig N50", type_of: TypeOf::Long, functions: Function::None },
120		"ebp_metric_date" => Variable { display_name: "EBP metric date", type_of: TypeOf::Date, functions: Function::None },
121		"gene_count" => Variable { display_name: "Gene count", type_of: TypeOf::Integer, functions: Function::None },
122		"isolate" => Variable { display_name: "Isolate", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
123		"last_updated" => Variable { display_name: "Last updated", type_of: TypeOf::Date, functions: Function::None },
124		"noncoding_gene_count" => Variable { display_name: "Non-coding gene count", type_of: TypeOf::Integer, functions: Function::None },
125		"organelle" => Variable { display_name: "organelle", type_of: TypeOf::None, functions: Function::None },
126		"protein_count" => Variable { display_name: "Protein count", type_of: TypeOf::Integer, functions: Function::None },
127		"pseudogene_count" => Variable { display_name: "Pseudogene count", type_of: TypeOf::Integer, functions: Function::None },
128		"refseq_category" => Variable { display_name: "RefSeq category", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
129		"sample_sex" => Variable { display_name: "Sample sex", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
130		"scaffold_count" => Variable { display_name: "Scaffold count", type_of: TypeOf::Long, functions: Function::None },
131		"scaffold_l50" => Variable { display_name: "Scaffold L50", type_of: TypeOf::Long, functions: Function::None },
132		"scaffold_n50" => Variable { display_name: "Scaffold N50", type_of: TypeOf::Long, functions: Function::None },
133		"sequence_count" => Variable { display_name: "Sequence count", type_of: TypeOf::Long, functions: Function::None },
134		"submitter" => Variable { display_name: "Submitter", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
135		"ungapped_span" => Variable { display_name: "Ungapped span", type_of: TypeOf::Long, functions: Function::None },
136		"btk_nohit" => Variable { display_name: "BTK no hit", type_of: TypeOf::OneDP, functions: Function::None },
137		"btk_target" => Variable { display_name: "BTK_target", type_of: TypeOf::OneDP, functions: Function::None },
138		"busco_completeness" => Variable { display_name: "BUSCO completeness", type_of: TypeOf::OneDP, functions: Function::None },
139		"busco_lineage" => Variable { display_name: "BUSCO lineage", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
140		"busco_string" => Variable { display_name: "BUSCO string", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
141		"gc_percent" => Variable { display_name: "gc_percent", type_of: TypeOf::TwoDP, functions: Function::None },
142		"n_percent" => Variable { display_name: "n_percent", type_of: TypeOf::TwoDP, functions: Function::None },
143		"sample_location" => Variable { display_name: "sample_location", type_of: TypeOf::None, functions: Function::None },
144        // automated input end assembly
145    );
146}