goat_cli/lookup/lookup.rs
1use std::path::PathBuf;
2use crate::utils::utils::{
3 lines_from_file, parse_comma_separated, some_kind_of_uppercase_first_letter,
4};
5use crate::{IndexType, GOAT_URL, TAXONOMY, UPPER_CLI_FILE_LIMIT};
6
7use anyhow::{bail, Result};
8
9/// The lookup struct
10#[derive(Clone, Debug)]
11pub struct Lookup {
12 /// the users search
13 pub search: String,
14 /// The size for each search (default = 10)
15 pub size: u64,
16 /// The index type, currently taxon or
17 /// assembly
18 pub index_type: IndexType,
19}
20
21impl Lookup {
22 /// From our lookup struct we can make an individual URL.
23 pub fn make_url(&self) -> String {
24 let mut url = String::new();
25 // add the base
26 url += &GOAT_URL;
27 // add lookup
28 url += "lookup?";
29 // add the search term
30 let search_term = format!("searchTerm={}", self.search);
31 url += &search_term;
32 // add size
33 let size = format!("&size={}", self.size);
34 url += &size;
35 // hardcode the rest for now
36 url += &format!("&result={}&taxonomy={}", self.index_type, &*TAXONOMY);
37 url
38 }
39}
40
41/// A vector of [`Lookup`] structs.
42#[derive(Debug)]
43pub struct Lookups {
44 /// The entries in [`Lookups`].
45 pub entries: Vec<Lookup>,
46}
47
48// throw warnings if there are no hits
49impl Lookups {
50 /// Constructor which takes the CLI args and returns
51 /// `Self`.
52 pub fn new(matches: &clap::ArgMatches, index_type: IndexType) -> Result<Self> {
53 let tax_name_op = matches.get_one::<String>("taxon");
54 let filename_op = matches.get_one::<PathBuf>("file");
55 // safe to unwrap, as default is defined.
56 let no_hits = *matches.get_one::<u64>("size").expect("cli default = 10");
57
58 let tax_name_vector: Vec<String>;
59 match tax_name_op {
60 Some(s) => tax_name_vector = parse_comma_separated(s),
61 None => match filename_op {
62 Some(s) => {
63 tax_name_vector = lines_from_file(s)?;
64 // check length of vector and bail if > 1000
65 if tax_name_vector.len() > *UPPER_CLI_FILE_LIMIT {
66 bail!(
67 "Number of taxa specified cannot exceed {}.",
68 *UPPER_CLI_FILE_LIMIT
69 )
70 }
71 }
72 None => bail!("One of -f (--file) or -t (--taxon) should be specified."),
73 },
74 }
75
76 let mut res = Vec::new();
77
78 for el in tax_name_vector {
79 res.push(Lookup {
80 search: el,
81 size: no_hits,
82 index_type,
83 })
84 }
85
86 Ok(Self { entries: res })
87 }
88
89 // make urls, these are slightly different, and simpler than those
90 // made for the main search program
91
92 /// Make URLs calls [`Lookup::make_url`] on each element.
93 pub fn make_urls(&self) -> Vec<(String, String)> {
94 let mut url_vector = Vec::new();
95 for el in &self.entries {
96 let id = el.search.clone();
97 url_vector.push((el.make_url(), id));
98 }
99 url_vector
100 }
101}
102
103/// Took this out of `print_result` as
104fn format_suggestion_string(suggestions: &Vec<Option<String>>) -> Result<()> {
105 let mut suggestion_str = String::new();
106 for el in suggestions {
107 match el {
108 Some(s) => {
109 suggestion_str += &some_kind_of_uppercase_first_letter(&s[..]);
110 suggestion_str += ", ";
111 }
112 None => {}
113 }
114 }
115 // remove last comma
116 if suggestion_str.len() > 2 {
117 suggestion_str.drain(suggestion_str.len() - 2..);
118 bail!("Did you mean: {}?", suggestion_str)
119 } else {
120 bail!("There are no results.")
121 }
122}
123
124/// Collect the results from concurrent `goat-cli taxon lookup`
125/// queries.
126#[derive(Clone)]
127pub struct TaxonCollector {
128 /// User search value.
129 pub search: Option<String>,
130 /// The taxon id that we fetch.
131 /// Can return multiple taxon id's.
132 pub taxon_id: Vec<Option<String>>,
133 /// The taxon rank.
134 pub taxon_rank: Vec<Option<String>>,
135 /// A vector of optional taxon names.
136 ///
137 /// Decomposed this is a vector of Some vector of a
138 /// two-element tuple:
139 /// - The name of the taxon
140 /// - The class of the taxon name
141 pub taxon_names: Vec<Option<Vec<(String, String)>>>,
142 /// The suggestions vector.
143 pub suggestions: Option<Vec<Option<String>>>,
144}
145
146impl TaxonCollector {
147 /// Print the result from a collector struct.
148 /// add an index, so we don't repeat headers
149 pub fn print_result(&self, index: usize) -> Result<()> {
150 // if we got a hit
151 match &self.search {
152 Some(search) => {
153 // if we got a suggestion
154 match &self.suggestions {
155 // we end up here even if there are no *actual* suggestions.
156 Some(suggestions) => format_suggestion_string(suggestions),
157 // no suggestion, so we got a hit
158 None => {
159 // Vec<Option<String>> -> Option<Vec<String>>
160 // these vecs should all be the same length?
161 let taxon_id = self.taxon_id.clone();
162 let taxon_rank = self.taxon_rank.clone();
163 let taxon_names = self.taxon_names.clone();
164
165 let taxon_ids_op: Option<Vec<String>> = taxon_id.into_iter().collect();
166 let taxon_ranks_op: Option<Vec<String>> = taxon_rank.into_iter().collect();
167 // same but for nested vec.
168 let taxon_names_op: Option<Vec<Vec<(String, String)>>> =
169 taxon_names.into_iter().collect();
170
171 // print headers for first result only.
172 if index == 0 {
173 println!("taxon\trank\tsearch_query\tname\ttype");
174 }
175 match taxon_names_op {
176 Some(n) => {
177 // get taxon_ids and taxon_ranks
178 let taxon_ids = match taxon_ids_op {
179 Some(t) => t,
180 // empty vec
181 None => vec![],
182 };
183 let taxon_ranks = match taxon_ranks_op {
184 Some(t) => t,
185 // empty vec
186 None => vec![],
187 };
188 // zip these vectors together
189 let zipped_taxon_vectors =
190 taxon_ids.iter().zip(taxon_ranks.iter()).zip(n.iter());
191
192 // this may not be the best way to print
193 // as everything has to be loaded into mem
194 // however, each result string should be small.
195 let mut whole_res_string = String::new();
196
197 for ((taxon_id, taxon_rank), taxon_ranks) in zipped_taxon_vectors {
198 for el in taxon_ranks {
199 let row = format!(
200 "{}\t{}\t{}\t{}\t{}\n",
201 taxon_id, taxon_rank, search, el.0, el.1
202 );
203 whole_res_string += &row;
204 }
205 }
206 // remove trailing newline
207 whole_res_string.pop();
208 Ok(println!("{}", whole_res_string))
209 }
210 None => bail!("There were no taxon names."),
211 }
212 }
213 }
214 }
215 None => bail!("No results."),
216 }
217 }
218}
219
220/// Collect the results from concurrent `goat-cli taxon lookup`
221/// queries.
222#[derive(Clone)]
223pub struct AssemblyCollector {
224 /// User search value.
225 pub search: Option<String>,
226 /// The taxon id that we fetch.
227 /// Can return multiple taxon id's.
228 pub taxon_id: Vec<Option<String>>,
229 /// The identifiers, which is an enumeration of all
230 /// of the identifier:class pairs. This could be a Map.
231 pub identifiers: Vec<Option<Vec<(String, String)>>>,
232 /// The suggestions vector.
233 pub suggestions: Option<Vec<Option<String>>>,
234}
235
236impl AssemblyCollector {
237 /// Print the result from a collector struct.
238 /// add an index, so we don't repeat headers
239 pub fn print_result(&self, index: usize) -> Result<()> {
240 // if we got a hit
241 match &self.search {
242 Some(search) => {
243 // if we got a suggestion
244 match &self.suggestions {
245 // we end up here even if there are no *actual* suggestions.
246 Some(suggestions) => format_suggestion_string(suggestions),
247 // no suggestion, so we got a hit
248 None => {
249 // Vec<Option<String>> -> Option<Vec<String>>
250 // these vecs should all be the same length?
251 let taxon_id = self.taxon_id.clone();
252 let assembly_identifiers = self.identifiers.clone();
253
254 let taxon_ids_op: Option<Vec<String>> = taxon_id.into_iter().collect();
255 // same but for nested vec.
256 let assembly_identifiers_op: Option<Vec<Vec<(String, String)>>> =
257 assembly_identifiers.into_iter().collect();
258
259 // print headers for first result only.
260 if index == 0 {
261 println!("taxon\tsearch_query\tidentifier\ttype");
262 }
263 match assembly_identifiers_op {
264 Some(n) => {
265 // get taxon_ids and taxon_ranks
266 let taxon_ids = match taxon_ids_op {
267 Some(t) => t,
268 // empty vec
269 None => vec![],
270 };
271 // zip these vectors together
272 let zipped_taxon_vectors = taxon_ids.iter().zip(n.iter());
273
274 // this may not be the best way to print
275 // as everything has to be loaded into mem
276 // however, each result string should be small.
277 let mut whole_res_string = String::new();
278
279 for (taxon_id, taxon_ranks) in zipped_taxon_vectors {
280 for el in taxon_ranks {
281 let row = format!(
282 "{}\t{}\t{}\t{}\n",
283 taxon_id, search, el.0, el.1
284 );
285 whole_res_string += &row;
286 }
287 }
288 // remove trailing newline
289 whole_res_string.pop();
290 Ok(println!("{}", whole_res_string))
291 }
292 None => bail!("There were no taxon names."),
293 }
294 }
295 }
296 }
297 None => bail!("No results."),
298 }
299 }
300}
301
302/// A wrapper so we can return the same from our request.
303/// Otherwise I am going to have to do extensive changes above
304/// which I decided against.
305pub enum Collector {
306 /// The taxon results.
307 Taxon(Result<TaxonCollector>),
308 /// The assembly results.
309 Assembly(Result<AssemblyCollector>),
310}