goat_cli/lookup/
lookup.rs

1use std::path::PathBuf;
2use crate::utils::utils::{
3    lines_from_file, parse_comma_separated, some_kind_of_uppercase_first_letter,
4};
5use crate::{IndexType, GOAT_URL, TAXONOMY, UPPER_CLI_FILE_LIMIT};
6
7use anyhow::{bail, Result};
8
9/// The lookup struct
10#[derive(Clone, Debug)]
11pub struct Lookup {
12    /// the users search
13    pub search: String,
14    /// The size for each search (default = 10)
15    pub size: u64,
16    /// The index type, currently taxon or
17    /// assembly
18    pub index_type: IndexType,
19}
20
21impl Lookup {
22    /// From our lookup struct we can make an individual URL.
23    pub fn make_url(&self) -> String {
24        let mut url = String::new();
25        // add the base
26        url += &GOAT_URL;
27        // add lookup
28        url += "lookup?";
29        // add the search term
30        let search_term = format!("searchTerm={}", self.search);
31        url += &search_term;
32        // add size
33        let size = format!("&size={}", self.size);
34        url += &size;
35        // hardcode the rest for now
36        url += &format!("&result={}&taxonomy={}", self.index_type, &*TAXONOMY);
37        url
38    }
39}
40
41/// A vector of [`Lookup`] structs.
42#[derive(Debug)]
43pub struct Lookups {
44    /// The entries in [`Lookups`].
45    pub entries: Vec<Lookup>,
46}
47
48// throw warnings if there are no hits
49impl Lookups {
50    /// Constructor which takes the CLI args and returns
51    /// `Self`.
52    pub fn new(matches: &clap::ArgMatches, index_type: IndexType) -> Result<Self> {
53        let tax_name_op = matches.get_one::<String>("taxon");
54        let filename_op = matches.get_one::<PathBuf>("file");
55        // safe to unwrap, as default is defined.
56        let no_hits = *matches.get_one::<u64>("size").expect("cli default = 10");
57
58        let tax_name_vector: Vec<String>;
59        match tax_name_op {
60            Some(s) => tax_name_vector = parse_comma_separated(s),
61            None => match filename_op {
62                Some(s) => {
63                    tax_name_vector = lines_from_file(s)?;
64                    // check length of vector and bail if > 1000
65                    if tax_name_vector.len() > *UPPER_CLI_FILE_LIMIT {
66                        bail!(
67                            "Number of taxa specified cannot exceed {}.",
68                            *UPPER_CLI_FILE_LIMIT
69                        )
70                    }
71                }
72                None => bail!("One of -f (--file) or -t (--taxon) should be specified."),
73            },
74        }
75
76        let mut res = Vec::new();
77
78        for el in tax_name_vector {
79            res.push(Lookup {
80                search: el,
81                size: no_hits,
82                index_type,
83            })
84        }
85
86        Ok(Self { entries: res })
87    }
88
89    // make urls, these are slightly different, and simpler than those
90    // made for the main search program
91
92    /// Make URLs calls [`Lookup::make_url`] on each element.
93    pub fn make_urls(&self) -> Vec<(String, String)> {
94        let mut url_vector = Vec::new();
95        for el in &self.entries {
96            let id = el.search.clone();
97            url_vector.push((el.make_url(), id));
98        }
99        url_vector
100    }
101}
102
103/// Took this out of `print_result` as
104fn format_suggestion_string(suggestions: &Vec<Option<String>>) -> Result<()> {
105    let mut suggestion_str = String::new();
106    for el in suggestions {
107        match el {
108            Some(s) => {
109                suggestion_str += &some_kind_of_uppercase_first_letter(&s[..]);
110                suggestion_str += ", ";
111            }
112            None => {}
113        }
114    }
115    // remove last comma
116    if suggestion_str.len() > 2 {
117        suggestion_str.drain(suggestion_str.len() - 2..);
118        bail!("Did you mean: {}?", suggestion_str)
119    } else {
120        bail!("There are no results.")
121    }
122}
123
124/// Collect the results from concurrent `goat-cli taxon lookup`
125/// queries.
126#[derive(Clone)]
127pub struct TaxonCollector {
128    /// User search value.
129    pub search: Option<String>,
130    /// The taxon id that we fetch.
131    /// Can return multiple taxon id's.
132    pub taxon_id: Vec<Option<String>>,
133    /// The taxon rank.
134    pub taxon_rank: Vec<Option<String>>,
135    /// A vector of optional taxon names.
136    ///
137    /// Decomposed this is a vector of Some vector of a
138    /// two-element tuple:
139    /// - The name of the taxon
140    /// - The class of the taxon name
141    pub taxon_names: Vec<Option<Vec<(String, String)>>>,
142    /// The suggestions vector.
143    pub suggestions: Option<Vec<Option<String>>>,
144}
145
146impl TaxonCollector {
147    /// Print the result from a collector struct.
148    /// add an index, so we don't repeat headers
149    pub fn print_result(&self, index: usize) -> Result<()> {
150        // if we got a hit
151        match &self.search {
152            Some(search) => {
153                // if we got a suggestion
154                match &self.suggestions {
155                    // we end up here even if there are no *actual* suggestions.
156                    Some(suggestions) => format_suggestion_string(suggestions),
157                    // no suggestion, so we got a hit
158                    None => {
159                        // Vec<Option<String>> -> Option<Vec<String>>
160                        // these vecs should all be the same length?
161                        let taxon_id = self.taxon_id.clone();
162                        let taxon_rank = self.taxon_rank.clone();
163                        let taxon_names = self.taxon_names.clone();
164
165                        let taxon_ids_op: Option<Vec<String>> = taxon_id.into_iter().collect();
166                        let taxon_ranks_op: Option<Vec<String>> = taxon_rank.into_iter().collect();
167                        // same but for nested vec.
168                        let taxon_names_op: Option<Vec<Vec<(String, String)>>> =
169                            taxon_names.into_iter().collect();
170
171                        // print headers for first result only.
172                        if index == 0 {
173                            println!("taxon\trank\tsearch_query\tname\ttype");
174                        }
175                        match taxon_names_op {
176                            Some(n) => {
177                                // get taxon_ids and taxon_ranks
178                                let taxon_ids = match taxon_ids_op {
179                                    Some(t) => t,
180                                    // empty vec
181                                    None => vec![],
182                                };
183                                let taxon_ranks = match taxon_ranks_op {
184                                    Some(t) => t,
185                                    // empty vec
186                                    None => vec![],
187                                };
188                                // zip these vectors together
189                                let zipped_taxon_vectors =
190                                    taxon_ids.iter().zip(taxon_ranks.iter()).zip(n.iter());
191
192                                // this may not be the best way to print
193                                // as everything has to be loaded into mem
194                                // however, each result string should be small.
195                                let mut whole_res_string = String::new();
196
197                                for ((taxon_id, taxon_rank), taxon_ranks) in zipped_taxon_vectors {
198                                    for el in taxon_ranks {
199                                        let row = format!(
200                                            "{}\t{}\t{}\t{}\t{}\n",
201                                            taxon_id, taxon_rank, search, el.0, el.1
202                                        );
203                                        whole_res_string += &row;
204                                    }
205                                }
206                                // remove trailing newline
207                                whole_res_string.pop();
208                                Ok(println!("{}", whole_res_string))
209                            }
210                            None => bail!("There were no taxon names."),
211                        }
212                    }
213                }
214            }
215            None => bail!("No results."),
216        }
217    }
218}
219
220/// Collect the results from concurrent `goat-cli taxon lookup`
221/// queries.
222#[derive(Clone)]
223pub struct AssemblyCollector {
224    /// User search value.
225    pub search: Option<String>,
226    /// The taxon id that we fetch.
227    /// Can return multiple taxon id's.
228    pub taxon_id: Vec<Option<String>>,
229    /// The identifiers, which is an enumeration of all
230    /// of the identifier:class pairs. This could be a Map.
231    pub identifiers: Vec<Option<Vec<(String, String)>>>,
232    /// The suggestions vector.
233    pub suggestions: Option<Vec<Option<String>>>,
234}
235
236impl AssemblyCollector {
237    /// Print the result from a collector struct.
238    /// add an index, so we don't repeat headers
239    pub fn print_result(&self, index: usize) -> Result<()> {
240        // if we got a hit
241        match &self.search {
242            Some(search) => {
243                // if we got a suggestion
244                match &self.suggestions {
245                    // we end up here even if there are no *actual* suggestions.
246                    Some(suggestions) => format_suggestion_string(suggestions),
247                    // no suggestion, so we got a hit
248                    None => {
249                        // Vec<Option<String>> -> Option<Vec<String>>
250                        // these vecs should all be the same length?
251                        let taxon_id = self.taxon_id.clone();
252                        let assembly_identifiers = self.identifiers.clone();
253
254                        let taxon_ids_op: Option<Vec<String>> = taxon_id.into_iter().collect();
255                        // same but for nested vec.
256                        let assembly_identifiers_op: Option<Vec<Vec<(String, String)>>> =
257                            assembly_identifiers.into_iter().collect();
258
259                        // print headers for first result only.
260                        if index == 0 {
261                            println!("taxon\tsearch_query\tidentifier\ttype");
262                        }
263                        match assembly_identifiers_op {
264                            Some(n) => {
265                                // get taxon_ids and taxon_ranks
266                                let taxon_ids = match taxon_ids_op {
267                                    Some(t) => t,
268                                    // empty vec
269                                    None => vec![],
270                                };
271                                // zip these vectors together
272                                let zipped_taxon_vectors = taxon_ids.iter().zip(n.iter());
273
274                                // this may not be the best way to print
275                                // as everything has to be loaded into mem
276                                // however, each result string should be small.
277                                let mut whole_res_string = String::new();
278
279                                for (taxon_id, taxon_ranks) in zipped_taxon_vectors {
280                                    for el in taxon_ranks {
281                                        let row = format!(
282                                            "{}\t{}\t{}\t{}\n",
283                                            taxon_id, search, el.0, el.1
284                                        );
285                                        whole_res_string += &row;
286                                    }
287                                }
288                                // remove trailing newline
289                                whole_res_string.pop();
290                                Ok(println!("{}", whole_res_string))
291                            }
292                            None => bail!("There were no taxon names."),
293                        }
294                    }
295                }
296            }
297            None => bail!("No results."),
298        }
299    }
300}
301
302/// A wrapper so we can return the same from our request.
303/// Otherwise I am going to have to do extensive changes above
304/// which I decided against.
305pub enum Collector {
306    /// The taxon results.
307    Taxon(Result<TaxonCollector>),
308    /// The assembly results.
309    Assembly(Result<AssemblyCollector>),
310}