1use std::{
2 fs::File,
3 io::{BufRead, BufReader},
4 path::{Path, PathBuf},
5};
6
7use crate::{
8 utils::expression,
9 utils::variable_data::{GOAT_ASSEMBLY_VARIABLE_DATA, GOAT_TAXON_VARIABLE_DATA},
10 IndexType, UPPER_CLI_FILE_LIMIT,
11};
12use anyhow::{bail, Context, Result};
13use rand::distributions::Alphanumeric;
14use rand::{thread_rng, Rng};
15
16pub fn generate_unique_strings(
21 matches: &clap::ArgMatches,
22 index_type: IndexType,
23) -> Result<Vec<String>> {
24 let tax_name_op = matches.get_one::<String>("taxon");
25 let filename_op = matches.get_one::<PathBuf>("file");
26 let print_expression = matches.get_one::<bool>("print-expression");
30
31 if let Some(p) = print_expression {
32 if *p {
33 match index_type {
34 IndexType::Taxon => expression::print_variable_data(&*GOAT_TAXON_VARIABLE_DATA),
35 IndexType::Assembly => {
36 expression::print_variable_data(&*GOAT_ASSEMBLY_VARIABLE_DATA)
37 }
38 }
39 std::process::exit(0);
40 }
41 }
42
43 let url_vector: Vec<String>;
44 match tax_name_op {
46 Some(s) => {
47 if s.is_empty() {
49 bail!("Empty string found, please specify a taxon.");
50 }
51 url_vector = parse_comma_separated(s);
52 }
53 None => match filename_op {
54 Some(s) => {
55 url_vector = lines_from_file(s)?;
56 if url_vector.len() > *UPPER_CLI_FILE_LIMIT {
58 let limit_string = pretty_print_usize(*UPPER_CLI_FILE_LIMIT);
59 bail!("Number of taxa specified cannot exceed {}.", limit_string)
60 }
61 }
62 None => bail!("One of -f (--file) or -t (--taxon) should be specified."),
63 },
64 }
65
66 let url_vector_len = url_vector.len();
67
68 let mut chars_vec = vec![];
69 for _ in 0..url_vector_len {
70 let mut rng = thread_rng();
71 let chars: String = (0..15).map(|_| rng.sample(Alphanumeric) as char).collect();
72 chars_vec.push(chars.clone());
73 }
74
75 Ok(chars_vec)
76}
77
78pub fn lines_from_file(filename: impl AsRef<Path>) -> Result<Vec<String>> {
81 let file = File::open(&filename)
82 .with_context(|| format!("Could not open {:?}", filename.as_ref().as_os_str()))?;
83 let buf = BufReader::new(file);
84 let buf_res: Result<Vec<String>> = buf
85 .lines()
86 .map(|l| {
87 l.with_context(|| {
88 format!(
89 "Error in mapping buf_lines from {:?}",
90 filename.as_ref().as_os_str()
91 )
92 })
93 })
94 .collect();
95 buf_res
96}
97
98pub fn parse_comma_separated(taxids: &str) -> Vec<String> {
105 let res: Vec<&str> = taxids.split(',').collect();
106
107 let mut res2 = Vec::new();
108 for mut str in res {
109 while str.ends_with(' ') {
111 let len = str.len();
112 let new_len = len.saturating_sub(" ".len());
113 str = &str[..new_len];
114 }
115 let mut index = 0;
117 while str.starts_with(' ') {
118 index += 1;
119 str = &str[index..];
120 }
121 let replaced = str.replace('\"', "").replace('\'', "");
126
127 res2.push(replaced);
128 }
129 res2.sort_unstable();
130 res2.dedup();
131 res2
132}
133
134pub fn get_rank_vector(r: &str) -> Vec<String> {
137 let ranks = vec![
138 "subspecies".to_string(),
139 "species".to_string(),
140 "genus".to_string(),
141 "family".to_string(),
142 "order".to_string(),
143 "class".to_string(),
144 "phylum".to_string(),
145 "kingdom".to_string(),
146 "superkingdom".to_string(),
147 ];
148 let position_selected = ranks.iter().position(|e| e == r);
149 match position_selected {
150 Some(p) => ranks[p..].to_vec(),
151 None => vec!["".to_string()],
152 }
153}
154
155pub fn format_tsv_output(awaited_fetches: Vec<Result<String, anyhow::Error>>) -> Result<()> {
158 let mut headers = Vec::new();
161 for el in &awaited_fetches {
162 let tsv = match el {
163 Ok(ref e) => e,
164 Err(e) => bail!("{}", e),
165 };
166 headers.push(tsv.split('\n').next());
167 }
168
169 let header = headers.iter().fold(headers[0], |acc, &item| {
171 let acc = acc?;
172 let item = item?;
173 if item.len() > acc.len() {
174 Some(item)
175 } else {
176 Some(acc)
177 }
178 });
179
180 match header {
181 Some(h) => println!("{}", h),
182 None => bail!("No header found."),
183 }
184
185 for el in awaited_fetches {
186 let tsv = match el {
187 Ok(ref e) => e,
188 Err(e) => bail!("{}", e),
189 };
190
191 let tsv_iter = tsv.split('\n');
192 for row in tsv_iter.skip(1) {
193 println!("{}", row)
194 }
195 }
196
197 Ok(())
198}
199
200pub fn some_kind_of_uppercase_first_letter(s: &str) -> String {
203 let mut c = s.chars();
204 match c.next() {
205 None => String::new(),
206 Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
207 }
208}
209
210pub fn pretty_print_usize(i: usize) -> String {
214 let mut s = String::new();
215 let i_str = i.to_string();
216 let a = i_str.chars().rev().enumerate();
217 for (idx, val) in a {
218 if idx != 0 && idx % 3 == 0 {
219 s.insert(0, ',');
220 }
221 s.insert(0, val);
222 }
223 s.to_string()
224}
225
226pub fn switch_string_to_url_encoding(string: &str) -> Result<&str> {
230 let res = match string {
231 "!=" => "!%3D",
233 "<" => "%3C",
235 "<=" => "<%3D",
237 "=" => "%3D",
238 "==" => "%3D%3D",
239 ">" => "%3E",
241 ">=" => ">%3D",
243 _ => bail!("Should not reach here."),
244 };
245 Ok(res)
246}
247
248pub fn did_you_mean(possibilities: &[String], tried: &str) -> Option<String> {
251 let mut possible_matches: Vec<_> = possibilities
252 .iter()
253 .map(|word| {
254 let edit_distance = levenshtein_distance(&word.to_lowercase(), &tried.to_lowercase());
255 (edit_distance, word.to_owned())
256 })
257 .collect();
258
259 possible_matches.sort();
260
261 if let Some((_, first)) = possible_matches.into_iter().next() {
262 Some(first)
263 } else {
264 None
265 }
266}
267
268fn levenshtein_distance(a: &str, b: &str) -> usize {
271 let mut result = 0;
272
273 if a == b {
275 return result;
276 }
277
278 let length_a = a.chars().count();
279 let length_b = b.chars().count();
280
281 if length_a == 0 {
282 return length_b;
283 }
284
285 if length_b == 0 {
286 return length_a;
287 }
288
289 let mut cache: Vec<usize> = (1..).take(length_a).collect();
294 let mut distance_a;
295 let mut distance_b;
296
297 for (index_b, code_b) in b.chars().enumerate() {
299 result = index_b;
300 distance_a = index_b;
301
302 for (index_a, code_a) in a.chars().enumerate() {
303 distance_b = if code_a == code_b {
304 distance_a
305 } else {
306 distance_a + 1
307 };
308
309 distance_a = cache[index_a];
310
311 result = if distance_a > result {
312 if distance_b > result {
313 result + 1
314 } else {
315 distance_b
316 }
317 } else if distance_b > distance_a {
318 distance_a + 1
319 } else {
320 distance_b
321 };
322
323 cache[index_a] = result;
324 }
325 }
326
327 result
328}