goat_cli/lookup/
mod.rs

1//!
2//! Invoked by calling:
3//! `goat-cli taxon/assembly lookup <args>`
4//!
5//! The state of the code here is not great, it's
6//! quite fragmented. Functional, but lacks coherence.
7//!
8//! Probably should be refactored at some point.
9
10use crate::IndexType;
11use anyhow::{bail, Result};
12use futures::StreamExt;
13use reqwest;
14use reqwest::header::ACCEPT;
15use serde_json::Value;
16
17/// The inner structs used in lookup.
18pub mod lookup;
19use lookup::{AssemblyCollector, Collector, Lookups, TaxonCollector};
20
21/// Main entry point for `goat-cli lookup`.
22pub async fn lookup(matches: &clap::ArgMatches, cli: bool, index_type: IndexType) -> Result<()> {
23    let lookups = Lookups::new(matches, index_type)?;
24    let url_vector_api = lookups.make_urls();
25    let print_url = *matches.get_one::<bool>("url").expect("cli default false");
26    let size = *matches.get_one::<u64>("size").expect("cli default = 10");
27
28    if print_url {
29        for (index, (url, _)) in url_vector_api.iter().enumerate() {
30            println!("{}.\tGoaT lookup API URL: {}", index, url);
31        }
32        // don't exit here internally; we'll exit later
33        if cli {
34            std::process::exit(0);
35        }
36    }
37    // so we can make as many concurrent requests
38    let concurrent_requests = url_vector_api.len();
39
40    let fetches = futures::stream::iter(url_vector_api.into_iter().map(|(path, search_query)| async move {
41        // possibly make a again::RetryPolicy
42        // to catch all the values in a *very* large request.
43        let client = reqwest::Client::new();
44
45        match again::retry(|| client.get(&path).header(ACCEPT, "application/json").send()).await {
46            Ok(resp) => match resp.text().await {
47                Ok(body) => {
48                    let v: Value = serde_json::from_str(&body)?;
49                    // print a warning if number of hits > size specified.
50                    let request_size_op = &v["status"]["hits"].as_u64();
51                    match request_size_op {
52                        Some(s) => {
53                            if size < *s {
54                                eprintln!(
55                                "For seach query {}, size specified ({}) was less than the number of results returned, ({}).",
56                                search_query, size, s
57                            )
58                        }
59                    },
60                        None => (),
61                    }
62
63                    // get all the suggestions
64                    let suggestions_text_op = &v["suggestions"].as_array();
65                    // collect into a vec
66                    let mut suggestions_vec = Vec::new();
67                    let suggestions_text = match suggestions_text_op {
68                        Some(suggestions) => {
69                            for el in *suggestions {
70                                let sug_str = el["suggestion"]["text"].as_str();
71                                let sug_string_op = sug_str.map(String::from);
72                                suggestions_vec.push(sug_string_op);
73                            }
74                            Some(suggestions_vec.clone())
75                        }
76                        None => None,
77                    };
78                    // we have all the information to process the results
79                    match index_type {
80                        IndexType::Taxon => Ok(Collector::Taxon(process_taxon_results(v, search_query, suggestions_text))),
81                        IndexType::Assembly => Ok(Collector::Assembly(process_assembly_results(v, search_query, suggestions_text))),
82                    }
83                }
84                Err(e) => bail!("Error reading {}: {}", path, e),
85            },
86            Err(e) => bail!("Error downloading {}: {}", path, e),
87        }
88    }))
89    .buffer_unordered(concurrent_requests)
90    .collect::<Vec<_>>();
91
92    let awaited_fetches = fetches.await;
93
94    for (index, el) in awaited_fetches.into_iter().enumerate() {
95        match el {
96            Ok(e) => {
97                if cli {
98                    match e {
99                        Collector::Taxon(e) => e?.print_result(index)?,
100                        Collector::Assembly(e) => e?.print_result(index)?,
101                    }
102                } else {
103                    // this avenue is for internal use
104                    // where the user could get info about
105                    // bad spelling etc...
106                    bail!("This is not yet implemented.")
107                }
108            }
109            Err(_) => bail!("No results found."),
110        }
111    }
112
113    Ok(())
114}
115
116/// As the taxon and assembly return JSON's are in
117/// different structures, they have to be parsed differently.
118///
119/// Each must return [`Result<Collector, anyhow::Error>`].
120fn process_taxon_results(
121    v: Value,
122    search_query: String,
123    suggestions_text: Option<Vec<Option<String>>>,
124) -> Result<TaxonCollector> {
125    // and the taxon ID
126    // we need to iterate over the array of results.
127    // potentially look at the scores, and keep those over a certain amount
128    // or keep everything. Currently I'm missing synonymous genera.
129
130    let mut taxon_id_vec = Vec::new();
131    let mut taxon_rank_vec = Vec::new();
132    let mut taxon_names_array_vec = Vec::new();
133
134    let results_array = v["results"].as_array();
135    // unwrap safely here
136    if let Some(arr) = results_array {
137        for el in arr {
138            let taxon_id = el["result"]["taxon_id"].as_str();
139            let taxon_rank = el["result"]["taxon_rank"].as_str();
140            let taxon_names_array_op = el["result"]["taxon_names"].as_array();
141
142            let taxon_names_array = match taxon_names_array_op {
143                Some(vec) => {
144                    let mut collect_names = Vec::new();
145                    for el in vec.iter() {
146                        let key = el["name"].as_str().unwrap_or("-");
147                        let value = el["class"].as_str().unwrap_or("-");
148                        // let source = el["source"].as_str().unwrap_or("-");
149                        collect_names.push((key.to_string(), value.to_string()));
150                    }
151                    Some(collect_names)
152                }
153                None => None,
154            };
155
156            // gather results into the vecs
157            taxon_id_vec.push(taxon_id);
158            taxon_rank_vec.push(taxon_rank);
159            taxon_names_array_vec.push(taxon_names_array);
160        }
161    }
162
163    // Vec<Option<&str>> -> Vec<Option<String>>
164    let taxon_id = taxon_id_vec.iter().map(|e| e.map(String::from)).collect();
165    let taxon_rank = taxon_rank_vec.iter().map(|e| e.map(String::from)).collect();
166
167    Ok(TaxonCollector {
168        search: Some(search_query),
169        suggestions: suggestions_text,
170        taxon_id,
171        taxon_names: taxon_names_array_vec,
172        taxon_rank,
173    })
174}
175
176/// The assembly counterpart to the above function.
177fn process_assembly_results(
178    v: Value,
179    search_query: String,
180    suggestions_text: Option<Vec<Option<String>>>,
181) -> Result<AssemblyCollector> {
182    // taxon ID stays the same
183    let mut taxon_id_vec = Vec::new();
184    // there is no taxon rank
185    let mut identifiers_array_vec = Vec::new();
186
187    let results_array = v["results"].as_array();
188    // unwrap safely here
189    if let Some(arr) = results_array {
190        for el in arr {
191            let taxon_id = el["result"]["taxon_id"].as_str();
192            let identifiers_array_op = el["result"]["identifiers"].as_array();
193
194            let identifiers_array = match identifiers_array_op {
195                Some(vec) => {
196                    let mut collect_names = Vec::new();
197                    for el in vec.iter() {
198                        let key = el["identifier"].as_str().unwrap_or("-");
199                        let value = el["class"].as_str().unwrap_or("-");
200                        // let source = el["source"].as_str().unwrap_or("-");
201                        collect_names.push((key.to_string(), value.to_string()));
202                    }
203                    Some(collect_names)
204                }
205                None => None,
206            };
207
208            // gather results into the vecs
209            taxon_id_vec.push(taxon_id);
210            identifiers_array_vec.push(identifiers_array);
211        }
212    }
213
214    // Vec<Option<&str>> -> Vec<Option<String>>
215    let taxon_id = taxon_id_vec.iter().map(|e| e.map(String::from)).collect();
216
217    Ok(AssemblyCollector {
218        search: Some(search_query),
219        suggestions: suggestions_text,
220        taxon_id,
221        identifiers: identifiers_array_vec,
222    })
223}