1use crate::utils::expression::{Function, TypeOf, Variable};
2use lazy_static::lazy_static;
3use std::collections::BTreeMap;
4
5macro_rules! collection {
9 ($($k:expr => $v:expr),* $(,)?) => {{
11 use std::iter::{Iterator, IntoIterator};
12 Iterator::collect(IntoIterator::into_iter([$(($k, $v),)*]))
13 }};
14}
15
16lazy_static! {
19 pub static ref GOAT_TAXON_VARIABLE_DATA: BTreeMap<&'static str, Variable<'static>> = collection!(
22 "c_value" => Variable { display_name: "C value", type_of: TypeOf::HalfFloat, functions: Function::Some(vec!["min", "max"]) },
24 "c_value_method" => Variable { display_name: "C value method", type_of: TypeOf::Keyword(vec!["biochemical analysis", "bulk fluorometric assay", "complete genome sequencing", "feulgen densitometry", "feulgen image analysis densitometry", "flow cytometry", "flow karyotyping", "fluorescence fading analysis", "gallocyanin chrom alum densitometry", "methyl green densitometry", "not specified", "static cell fluorometry", "ultraviolet microscopy", "unknown", "biochemical analysis", "feulgen image analysis densitometry", "flow cytometry", "feulgen densitometry", "feulgen densitometry & flow cytometry", "microdensitometry", "pulse field gel electrophoresis", "reassociation kinetics", "whole genome sequencing"]), functions: Function::None },
25 "c_value_cell_type" => Variable { display_name: "C value cell type", type_of: TypeOf::Keyword(vec!["antennae", "antennal gland", "blood cells", "brain", "buccal epithelium", "coelomocytes", "corneal epithelium", "digestive gland", "dorsal fin clip", "egg", "embyro", "epidermis", "exopodite", "fibroblasts", "fin clips", "germarium", "gills", "haemocytes", "heart cells", "individual chromosomes", "intestine", "kidney cells", "legs", "leukocytes", "liver", "lung (culture)", "mantle", "midgut", "muscle cells", "ne", "not specified", "oocytes", "ovaries", "pancreas", "pharynx", "polypide cells in suspension", "red blood cells", "retinal cells", "salivary gland", "somatic cells", "sperm", "spleen", "tentacles", "testes", "thymus", "tissue culture", "various", "ventral hypodermal chord", "whole body", "whole body squash"]), functions: Function::None },
26 "genome_size" => Variable { display_name: "Genome size", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
27 "genome_size_kmer" => Variable { display_name: "Genome size kmer", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
28 "genome_size_draft" => Variable { display_name: "Genome size draft", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
29 "assembly_level" => Variable { display_name: "Assembly level", type_of: TypeOf::Keyword(vec!["complete genome", "chromosome", "scaffold", "contig"]), functions: Function::None },
30 "assembly_span" => Variable { display_name: "Assembly span", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
31 "bioproject" => Variable { display_name: "bioproject", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
32 "biosample" => Variable { display_name: "biosample", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
33 "contig_n50" => Variable { display_name: "Contig N50", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
34 "assembly_date" => Variable { display_name: "Last updated", type_of: TypeOf::Date, functions: Function::Some(vec!["min", "max"]) },
35 "scaffold_n50" => Variable { display_name: "Scaffold N50", type_of: TypeOf::Long, functions: Function::Some(vec!["min", "max"]) },
36 "gene_count" => Variable { display_name: "Gene count", type_of: TypeOf::Integer, functions: Function::Some(vec!["min", "max"]) },
37 "ebp_metric_date" => Variable { display_name: "EBP metric date", type_of: TypeOf::Date, functions: Function::Some(vec!["min", "max"]) },
38 "mitochondrion_assembly_span" => Variable { display_name: "mitochondrion span", type_of: TypeOf::Long, functions: Function::None },
39 "mitochondrion_gc_percent" => Variable { display_name: "mitochondrion GC%", type_of: TypeOf::TwoDP, functions: Function::None },
40 "plastid_assembly_span" => Variable { display_name: "plastid span", type_of: TypeOf::Long, functions: Function::None },
41 "plastid_gc_percent" => Variable { display_name: "plastid GC%", type_of: TypeOf::TwoDP, functions: Function::None },
42 "chromosome_number" => Variable { display_name: "Chromosome number", type_of: TypeOf::Short, functions: Function::Some(vec!["min", "max"]) },
43 "haploid_number" => Variable { display_name: "Haploid number", type_of: TypeOf::Short, functions: Function::Some(vec!["min", "max"]) },
44 "sex_determination" => Variable { display_name: "Sex Determination", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
45 "ploidy" => Variable { display_name: "Ploidy", type_of: TypeOf::Short, functions: Function::Some(vec!["min", "max"]) },
46 "echabs92" => Variable { display_name: "EC Habitats Directive 1992", type_of: TypeOf::Keyword(vec!["echabs92_annex_iib", "echabs92_annex_ivb", "echabs92_annex_iva"]), functions: Function::None },
47 "habreg_2017" => Variable { display_name: "Conservation of Habitats and Species Regulations 2017", type_of: TypeOf::Keyword(vec!["habreg-sch2", "habreg-sch5"]), functions: Function::None },
48 "marhabreg-2017" => Variable { display_name: "Conservation of Offshore Marine Habitats and Species Regulations 2017", type_of: TypeOf::Keyword(vec!["marhabreg-sch1"]), functions: Function::None },
49 "waca_1981" => Variable { display_name: "Wildlife and Countryside Act 1981", type_of: TypeOf::Keyword(vec!["waca-sch1", "waca-sch5"]), functions: Function::None },
50 "isb_wildlife_act_1976" => Variable { display_name: "Irish Statute Book Wildlife Act, 1976", type_of: TypeOf::Keyword(vec!["iwa-nsch3", "iwa-sch5"]), functions: Function::None },
51 "protection_of_badgers_act_1992" => Variable { display_name: "Protection of Badgers Act 1992", type_of: TypeOf::Keyword(vec!["badgers92"]), functions: Function::None },
52 "sample_sex" => Variable { display_name: "Sample sex", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
53 "sample_location" => Variable { display_name: "location", type_of: TypeOf::None, functions: Function::None },
54 "country_list" => Variable { display_name: "Country list", type_of: TypeOf::Keyword(vec!["gb", "ie"]), functions: Function::None },
55 "btk_nohit" => Variable { display_name: "BTK no hit", type_of: TypeOf::OneDP, functions: Function::None },
56 "btk_target" => Variable { display_name: "BTK_target", type_of: TypeOf::OneDP, functions: Function::None },
57 "busco_completeness" => Variable { display_name: "BUSCO completeness", type_of: TypeOf::OneDP, functions: Function::None },
58 "busco_lineage" => Variable { display_name: "BUSCO lineage", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
59 "busco_string" => Variable { display_name: "BUSCO string", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
60 "gc_percent" => Variable { display_name: "GC percent", type_of: TypeOf::OneDP, functions: Function::None },
61 "n_percent" => Variable { display_name: "N percent", type_of: TypeOf::OneDP, functions: Function::None },
62 "odb10_lineage" => Variable { display_name: "Busco_odb10 lineage", type_of: TypeOf::Keyword(vec!["aconoidasida_odb10", "actinopterygii_odb10", "agaricales_odb10", "agaricomycetes_odb10", "alveolata_odb10", "apicomplexa_odb10", "arachnida_odb10", "arthropoda_odb10", "ascomycota_odb10", "aves_odb10", "basidiomycota_odb10", "boletales_odb10", "brassicales_odb10", "capnodiales_odb10", "carnivora_odb10", "cetartiodactyla_odb10", "chaetothyriales_odb10", "chlorophyta_odb10", "coccidia_odb10", "cyprinodontiformes_odb10", "diptera_odb10", "dothideomycetes_odb10", "embryophyta_odb10", "endopterygota_odb10", "euarchontoglires_odb10", "eudicots_odb10", "euglenozoa_odb10", "eukaryota_odb10", "eurotiales_odb10", "eurotiomycetes_odb10", "eutheria_odb10", "fabales_odb10", "fungi_odb10", "glires_odb10", "glomerellales_odb10", "helotiales_odb10", "hemiptera_odb10", "hymenoptera_odb10", "hypocreales_odb10", "insecta_odb10", "laurasiatheria_odb10", "leotiomycetes_odb10", "lepidoptera_odb10", "liliopsida_odb10", "mammalia_odb10", "metazoa_odb10", "microsporidia_odb10", "mollusca_odb10", "mucorales_odb10", "mucoromycota_odb10", "nematoda_odb10", "onygenales_odb10", "passeriformes_odb10", "plasmodium_odb10", "pleosporales_odb10", "poales_odb10", "polyporales_odb10", "primates_odb10", "saccharomycetes_odb10", "sauropsida_odb10", "solanales_odb10", "sordariomycetes_odb10", "stramenopiles_odb10", "tetrapoda_odb10", "tremellomycetes_odb10", "vertebrata_odb10", "viridiplantae_odb10"]), functions: Function::None },
63 "sample_collected" => Variable { display_name: "sample_collected", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
64 "sample_collected_by" => Variable { display_name: "sample_collected_by", type_of: TypeOf::Keyword(vec!["dalu", "ghc", "mba", "nhm", "nsu", "psu", "qmul", "rbge", "kew", "san", "ubc", "derb", "oxf", "vien"]), functions: Function::None },
65 "sample_acquired" => Variable { display_name: "sample_acquired", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
66 "in_progress" => Variable { display_name: "in_progress", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
67 "insdc_open" => Variable { display_name: "insdc_open", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
68 "published" => Variable { display_name: "published", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
69 "sequencing_status" => Variable { display_name: "sequencing_status", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
70 "sequencing_status_ag100pest" => Variable { display_name: "sequencing_status_ag100pest", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
71 "sequencing_status_asg" => Variable { display_name: "sequencing_status_asg", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
72 "sequencing_status_b10k" => Variable { display_name: "sequencing_status_b10k", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
73 "sequencing_status_canbp" => Variable { display_name: "sequencing_status_canbp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
74 "sequencing_status_cbp" => Variable { display_name: "sequencing_status_cbp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
75 "sequencing_status_ccgp" => Variable { display_name: "sequencing_status_ccgp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
76 "sequencing_status_cfgp" => Variable { display_name: "sequencing_status_cfgp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
77 "sequencing_status_dtol" => Variable { display_name: "sequencing_status_dtol", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
78 "sequencing_status_ebpn" => Variable { display_name: "sequencing_status_ebpn", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
79 "sequencing_status_endemixit" => Variable { display_name: "sequencing_status_endemixit", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
80 "sequencing_status_erga" => Variable { display_name: "sequencing_status_erga", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
81 "sequencing_status_eurofish" => Variable { display_name: "sequencing_status_eurofish", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
82 "sequencing_status_gaga" => Variable { display_name: "sequencing_status_gaga", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
83 "sequencing_status_giga" => Variable { display_name: "sequencing_status_giga", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
84 "sequencing_status_ilebp" => Variable { display_name: "sequencing_status_ilebp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
85 "sequencing_status_metainvert" => Variable { display_name: "sequencing_status_metainvert", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
86 "sequencing_status_pgp" => Variable { display_name: "sequencing_status_pgp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
87 "sequencing_status_squalomix" => Variable { display_name: "sequencing_status_squalomix", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
88 "sequencing_status_vgp" => Variable { display_name: "sequencing_status_vgp", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
89 "sequencing_status_zoonomia" => Variable { display_name: "sequencing_status_zoonomia", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
90 "sequencing_status_omg" => Variable { display_name: "sequencing_status_omg", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
91 "sequencing_status_arg" => Variable { display_name: "sequencing_status_arg", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
92 "sequencing_status_agi" => Variable { display_name: "sequencing_status_agi", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
93 "sequencing_status_tsi" => Variable { display_name: "sequencing_status_tsi", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
94 "sequencing_status_gap" => Variable { display_name: "sequencing_status_gap", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
95 "sequencing_status_gbr" => Variable { display_name: "sequencing_status_gbr", type_of: TypeOf::Keyword(vec!["published", "insdc_open", "in_progress", "sample_acquired", "sample_collected"]), functions: Function::None },
96 "long_list" => Variable { display_name: "long_list", type_of: TypeOf::Keyword(vec!["africabp", "ag100pest", "agi", "arg", "asg", "b10k", "canbp", "cbp", "ccgp", "cfgp", "dtol", "ebpn", "ein", "endemixit", "erga", "eurofish", "gaga", "gap", "gbr", "giga", "ilebp", "metainvert", "omg", "pgp", "squalomix", "tsi", "vgp", "zoonomia"]), functions: Function::None },
97 "other_priority" => Variable { display_name: "other_priority", type_of: TypeOf::Keyword(vec!["africabp", "ag100pest", "agi", "arg", "asg", "b10k", "canbp", "cbp", "ccgp", "cfgp", "dtol", "ebpn", "ein", "endemixit", "erga", "eurofish", "gaga", "gap", "gbr", "giga", "ilebp", "metainvert", "omg", "pgp", "squalomix", "tsi", "vgp", "zoonomia"]), functions: Function::None },
98 "family_representative" => Variable { display_name: "family_representative", type_of: TypeOf::Keyword(vec!["africabp", "ag100pest", "agi", "arg", "asg", "b10k", "canbp", "cbp", "ccgp", "cfgp", "dtol", "ebpn", "ein", "endemixit", "erga", "eurofish", "gaga", "gap", "gbr", "giga", "ilebp", "metainvert", "omg", "pgp", "squalomix", "tsi", "vgp", "zoonomia"]), functions: Function::None },
99 "sequencing_status_ebp" => Variable { display_name: "sequencing_status_ebp", type_of: TypeOf::None, functions: Function::None },
100 );
102}
103
104lazy_static! {
107 pub static ref GOAT_ASSEMBLY_VARIABLE_DATA: BTreeMap<&'static str, Variable<'static>> = collection!(
110 "assembly_level" => Variable { display_name: "Assembly level", type_of: TypeOf::Keyword(vec!["complete genome", "chromosome", "scaffold", "contig"]), functions: Function::None },
112 "assembly_span" => Variable { display_name: "Assembly span", type_of: TypeOf::Long, functions: Function::None },
113 "assembly_type" => Variable { display_name: "Assembly type", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
114 "bioproject" => Variable { display_name: "bioproject", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
115 "biosample" => Variable { display_name: "biosample", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
116 "chromosome_count" => Variable { display_name: "Chromosome count", type_of: TypeOf::Long, functions: Function::None },
117 "contig_count" => Variable { display_name: "Contig count", type_of: TypeOf::Long, functions: Function::None },
118 "contig_l50" => Variable { display_name: "Contig L50", type_of: TypeOf::Long, functions: Function::None },
119 "contig_n50" => Variable { display_name: "Contig N50", type_of: TypeOf::Long, functions: Function::None },
120 "ebp_metric_date" => Variable { display_name: "EBP metric date", type_of: TypeOf::Date, functions: Function::None },
121 "gene_count" => Variable { display_name: "Gene count", type_of: TypeOf::Integer, functions: Function::None },
122 "isolate" => Variable { display_name: "Isolate", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
123 "last_updated" => Variable { display_name: "Last updated", type_of: TypeOf::Date, functions: Function::None },
124 "noncoding_gene_count" => Variable { display_name: "Non-coding gene count", type_of: TypeOf::Integer, functions: Function::None },
125 "organelle" => Variable { display_name: "organelle", type_of: TypeOf::None, functions: Function::None },
126 "protein_count" => Variable { display_name: "Protein count", type_of: TypeOf::Integer, functions: Function::None },
127 "pseudogene_count" => Variable { display_name: "Pseudogene count", type_of: TypeOf::Integer, functions: Function::None },
128 "refseq_category" => Variable { display_name: "RefSeq category", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
129 "sample_sex" => Variable { display_name: "Sample sex", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
130 "scaffold_count" => Variable { display_name: "Scaffold count", type_of: TypeOf::Long, functions: Function::None },
131 "scaffold_l50" => Variable { display_name: "Scaffold L50", type_of: TypeOf::Long, functions: Function::None },
132 "scaffold_n50" => Variable { display_name: "Scaffold N50", type_of: TypeOf::Long, functions: Function::None },
133 "sequence_count" => Variable { display_name: "Sequence count", type_of: TypeOf::Long, functions: Function::None },
134 "submitter" => Variable { display_name: "Submitter", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
135 "ungapped_span" => Variable { display_name: "Ungapped span", type_of: TypeOf::Long, functions: Function::None },
136 "btk_nohit" => Variable { display_name: "BTK no hit", type_of: TypeOf::OneDP, functions: Function::None },
137 "btk_target" => Variable { display_name: "BTK_target", type_of: TypeOf::OneDP, functions: Function::None },
138 "busco_completeness" => Variable { display_name: "BUSCO completeness", type_of: TypeOf::OneDP, functions: Function::None },
139 "busco_lineage" => Variable { display_name: "BUSCO lineage", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
140 "busco_string" => Variable { display_name: "BUSCO string", type_of: TypeOf::Keyword(vec![""]), functions: Function::None },
141 "gc_percent" => Variable { display_name: "gc_percent", type_of: TypeOf::TwoDP, functions: Function::None },
142 "n_percent" => Variable { display_name: "n_percent", type_of: TypeOf::TwoDP, functions: Function::None },
143 "sample_location" => Variable { display_name: "sample_location", type_of: TypeOf::None, functions: Function::None },
144 );
146}