1use crate::utils::tax_ranks::TaxRanks;
2use crate::utils::utils::{did_you_mean, switch_string_to_url_encoding};
3
4use anyhow::{bail, ensure, Result};
5use regex::{CaptureMatches, Captures, Regex};
6use std::{collections::BTreeMap, fmt};
7use tabled::{object::Rows, Panel, Width, Modify, Table, Tabled};
8
9#[derive(Tabled)]
14pub enum TypeOf<'a> {
15 Long,
17 Short,
19 OneDP,
21 TwoDP,
23 Integer,
25 Date,
27 HalfFloat,
29 Keyword(Vec<&'a str>),
31 None,
33}
34
35impl<'a> TypeOf<'a> {
36 fn check(&self, other: &str, variable: &str) -> Result<()> {
38 match self {
41 TypeOf::Long => match other.parse::<i64>() {
42 Ok(_) => (),
43 Err(_) => bail!(format!("For variable \"{variable}\" in the expression, an input error was found. Pass an integer as a value.")),
44 },
45 TypeOf::Short => match other.parse::<i16>() {
46 Ok(_) => (),
47 Err(_) => bail!(format!("For variable \"{variable}\" in the expression, an input error was found. Pass an integer as a value.")),
48 },
49 TypeOf::OneDP => match other.parse::<f32>() {
50 Ok(_) => (),
51 Err(_) => bail!(format!("For variable \"{variable}\" in the expression, an input error was found. Pass a float as a value.")),
52 },
53 TypeOf::TwoDP => match other.parse::<f32>() {
54 Ok(_) => (),
55 Err(_) => bail!(format!("For variable \"{variable}\" in the expression, an input error was found. Pass a float as a value.")),
56 },
57 TypeOf::Integer => match other.parse::<i32>() {
58 Ok(_) => (),
59 Err(_) => bail!(format!("For variable \"{variable}\" in the expression, an input error was found. Pass an integer as a value.")),
60 },
61 TypeOf::Date => {
64 let tokens = other.split('-').collect::<Vec<_>>();
65 ensure!(
66 tokens.len() == 1 || tokens.len() == 3,
67 "Improperly formatted date. Please make sure date is in the format yyyy-mm-dd, or yyyy."
68 )
69 }
70 TypeOf::HalfFloat => match other.parse::<f32>() {
71 Ok(_) => (),
72 Err(_) => bail!(format!("For variable \"{variable}\" in the expression, an input error was found. Pass a float as a value.")),
73 },
74 TypeOf::Keyword(_) => (),
76 TypeOf::None => (),
78 };
79 Ok(())
80 }
81}
82
83impl<'a> fmt::Display for TypeOf<'a> {
84 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85 match self {
86 TypeOf::None => write!(f, "Please don't use yet! This variable needs fixing."),
88 TypeOf::Long => write!(f, "!=, <, <=, =, ==, >, >="),
89 TypeOf::Short => write!(f, "!=, <, <=, =, ==, >, >="),
90 TypeOf::OneDP => write!(f, "!=, <, <=, =, ==, >, >="),
91 TypeOf::TwoDP => write!(f, "!=, <, <=, =, ==, >, >="),
92 TypeOf::Integer => write!(f, "!=, <, <=, =, ==, >, >="),
93 TypeOf::Date => write!(f, "!=, <, <=, =, ==, >, >="),
94 TypeOf::HalfFloat => write!(f, "!=, <, <=, =, ==, >, >="),
95 TypeOf::Keyword(k) => match k[0] {
96 "" => write!(f, ""),
97 _ => write!(f, "== {}", k.join(", ")),
98 },
99 }
100 }
101}
102
103pub enum Function<'a> {
107 None,
108 Some(Vec<&'a str>),
109}
110
111impl<'a> fmt::Display for Function<'a> {
112 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113 match self {
114 Function::None => write!(f, ""),
115 Function::Some(fun) => write!(f, "{}", fun.join(", ")),
116 }
117 }
118}
119
120#[derive(Tabled)]
122pub struct Variable<'a> {
123 #[tabled(rename = "Display Name")]
124 pub display_name: &'a str,
125 #[tabled(rename = "Operators/Keywords")]
126 pub type_of: TypeOf<'a>,
127 #[tabled(rename = "Function(s)")]
128 pub functions: Function<'a>,
129}
130
131#[derive(Tabled)]
133struct ColHeader(#[tabled(rename = "Expression Name")] &'static str);
134
135pub fn print_variable_data(data: &BTreeMap<&'static str, Variable<'static>>) {
137 println!();
139 let table_data = data
142 .iter()
143 .map(|(e, f)| (ColHeader(e), f))
144 .collect::<Vec<(ColHeader, &Variable)>>();
145 let footer_data = TaxRanks::init();
147
148 let table_string = Table::new(&table_data)
149 .with(Panel::footer(format!("NCBI taxon ranks:\n\n{}", footer_data)))
150 .with(
151 Modify::new(Rows::new(1..table_data.len() - 1))
152 .with(Width::wrap(30).keep_words()),
153 )
154 .with(
156 Modify::new(Rows::new(table_data.len()..))
157 .with(Width::wrap(30 * 4).keep_words()),
158 )
159 .to_string();
160
161 println!("{}", table_string);
162}
163
164pub struct CLIexpression<'a> {
166 pub inner: &'a str,
167 pub length: usize, pub expression: Vec<&'a str>,
169}
170
171impl<'a> CLIexpression<'a> {
172 pub fn new(string: &'a str) -> Self {
174 Self {
175 inner: string,
176 length: string.len(),
177 expression: Vec::new(),
178 }
179 }
180
181 fn split(&self) -> Self {
183 let mut res_vec = Vec::new();
184 let re = Regex::new("AND").unwrap();
186 let splitter = SplitCaptures::new(&re, self.inner);
187 for state in splitter {
188 let el = match state {
189 SplitState::Unmatched(s) => s,
190 SplitState::Captured(s) => s.get(0).map_or("", |m| m.as_str()),
191 };
192 res_vec.push(el);
193 }
194 Self {
195 inner: self.inner,
196 length: self.length,
197 expression: res_vec,
198 }
199 }
200
201 pub fn parse(
204 &mut self,
205 reference_data: &BTreeMap<&'static str, Variable<'static>>,
206 ) -> Result<String> {
207 let expression_length_limit = 100;
208 if self.length > expression_length_limit {
209 bail!(
210 "The expression query provided is greater than {} chars.",
211 expression_length_limit
212 )
213 }
214 if self.inner.contains("&&") {
215 bail!("Use AND keyword, not && for expression queries.")
216 }
217 if self.inner.contains(" contains") {
218 bail!("Using the \"contains\" keyword is not yet supported.")
219 }
220 if self.inner.contains("||") || self.inner.contains("OR") {
221 bail!("OR (or ||) keyword is not supported.")
222 }
223 if self.inner.contains("tax_name")
224 || self.inner.contains("tax_tree")
225 || self.inner.contains("tax_lineage")
226 {
227 bail!("Set tax_name through -t <taxon_name>, tax_tree by -d flag, and tax_lineage by -l flag.")
228 }
229 let split_vec = &self.split();
230 let exp_vec = &split_vec.expression;
231
232 let mut index = 0;
234 let exp_vec_len = exp_vec.len();
235 let mut expression = String::new();
236 let re = Regex::new(r"!=|<=|<|==|=|>=|>").unwrap();
239 if !re.is_match(self.inner) {
240 bail!("No operators were found in the expression.")
241 }
242
243 expression += "%20AND";
245 let var_vec_check = &reference_data
247 .iter()
248 .map(|(e, _)| *e)
249 .collect::<Vec<&str>>();
250 let var_vec_min_max_check = {
254 let mut collector = Vec::new();
255 for (goat_var, el) in reference_data {
256 match &el.functions {
257 Function::None => (),
258 Function::Some(f) => {
259 for pos in f {
260 let format_pos = format!("{}({})", pos, goat_var);
261 collector.push(format_pos);
262 }
263 }
264 }
265 }
266 collector
267 };
268
269 loop {
273 if index == exp_vec_len {
274 break;
275 }
276 let curr_el = exp_vec[index];
279
280 let mut curr_el_vec = Vec::new();
281 let splitter = SplitCaptures::new(&re, curr_el);
285
286 for state in splitter {
287 match state {
288 SplitState::Unmatched(s) => {
289 curr_el_vec.push(s);
290 }
291 SplitState::Captured(s) => {
292 curr_el_vec.push(s.get(0).map_or("", |m| m.as_str()));
293 }
294 };
295 }
296
297 ensure!(
299 curr_el_vec.len() == 3 || curr_el_vec.len() == 1,
300 "Split vector on single expression is invalid - length = {}. Are the input variables or operands correct?",
301 curr_el_vec.len()
302 );
303 match curr_el_vec.len() {
304 3 => {
305 let variable = &curr_el_vec[0].trim().replace('\"', "").replace('\'', "")[..];
309 let operator = switch_string_to_url_encoding(curr_el_vec[1])?.trim();
310 let value = &curr_el_vec[2].trim().replace('\"', "").replace('\'', "")[..];
311
312 if !var_vec_check.contains(&variable)
313 && !var_vec_min_max_check.contains(&variable.to_string())
314 {
315 let combined_checks = var_vec_check
319 .iter()
320 .map(|e| String::from(*e))
321 .collect::<Vec<String>>()
322 .iter()
323 .chain(
324 var_vec_min_max_check
325 .iter()
326 .map(String::from)
327 .collect::<Vec<String>>()
328 .iter(),
329 )
330 .map(String::from)
331 .collect::<Vec<String>>();
332
333 let var_vec_mean = did_you_mean(&combined_checks, variable);
334
335 if let Some(value) = var_vec_mean {
336 bail!(
337 "In your expression (LHS) you typed \"{}\" - did you mean \"{}\"?",
338 variable,
339 value
340 )
341 }
342 }
343
344 let keyword_enums = match var_vec_min_max_check.contains(&variable.to_string())
347 {
348 true => {
349 let re = Regex::new(r"\((.*?)\)").unwrap();
351 let extract_var =
354 re.captures(variable).unwrap().get(1).unwrap().as_str();
355 &reference_data.get(extract_var).unwrap().type_of
356 }
357 false => &reference_data.get(variable).unwrap().type_of,
358 };
359
360 let url_encoded_variable = variable.replace('(', "%28");
362 let url_encoded_variable = url_encoded_variable.replace(')', "%29");
363
364 match keyword_enums {
366 TypeOf::Keyword(k) => {
367 let value_split_commas = value
370 .split(',')
371 .map(|e| {
372 let trimmed = e.trim();
373 trimmed.replace('!', "")
374 })
375 .collect::<Vec<String>>();
376
377 for val in &value_split_commas {
379 let possibilities =
380 k.iter().map(|e| String::from(*e)).collect::<Vec<_>>();
381 let did_you_mean_str = did_you_mean(&possibilities, val);
382
383 if let Some(value) = did_you_mean_str {
384 if value != *val {
385 bail!("In your expression (RHS) you typed \"{}\" - did you mean \"{}\"?", val, value)
386 }
387 }
388 }
389
390 let parsed_value_split_commas = value
392 .split(',')
393 .map(|e| {
394 let f = e.trim();
396 let f = f.replace('(', "%28");
398 let f = f.replace(')', "%29");
399 let f = f.replace(' ', "%20");
400 f.replace('!', "%21")
401 })
402 .collect::<Vec<String>>();
403 expression += "%20";
405 expression += &url_encoded_variable;
406 expression += "%20";
408 expression += operator;
409 expression += "%20";
410 expression += &parsed_value_split_commas.join("%2C");
411 expression += "%20";
412 expression += "AND%20"
415 }
416 t => {
417 TypeOf::check(t, value, variable)?;
419
420 expression += "%20";
422 expression += &url_encoded_variable;
423 expression += "%20";
425 expression += operator;
426 expression += "%20";
427 expression += value;
428 expression += "%20";
429 expression += "AND%20"
432 }
433 }
434 }
435 1 => (),
436 _ => unreachable!(),
437 }
438
439 index += 1;
440 }
441 match expression.len() - 6 > 0 {
443 true => {
444 expression.drain(expression.len() - 6..);
445 Ok(expression)
446 }
447 false => {
448 bail!("Error in expression format. Expressions must be in the format:\n\t<variable> <operator> <value> AND ...")
449 }
450 }
451 }
452}
453
454#[derive(Debug)]
457struct SplitCaptures<'r, 't> {
458 finder: CaptureMatches<'r, 't>,
459 text: &'t str,
460 last: usize,
461 caps: Option<Captures<'t>>,
462}
463
464impl<'r, 't> SplitCaptures<'r, 't> {
465 pub fn new(re: &'r Regex, text: &'t str) -> SplitCaptures<'r, 't> {
466 SplitCaptures {
467 finder: re.captures_iter(text),
468 text,
469 last: 0,
470 caps: None,
471 }
472 }
473}
474
475#[derive(Debug)]
476enum SplitState<'t> {
477 Unmatched(&'t str),
478 Captured(Captures<'t>),
479}
480
481impl<'r, 't> Iterator for SplitCaptures<'r, 't> {
482 type Item = SplitState<'t>;
483
484 fn next(&mut self) -> Option<SplitState<'t>> {
485 if let Some(caps) = self.caps.take() {
486 return Some(SplitState::Captured(caps));
487 }
488 match self.finder.next() {
489 None => {
490 if self.last >= self.text.len() {
491 None
492 } else {
493 let s = &self.text[self.last..];
494 self.last = self.text.len();
495 Some(SplitState::Unmatched(s))
496 }
497 }
498 Some(caps) => {
499 let m = caps.get(0).unwrap();
500 let unmatched = &self.text[self.last..m.start()];
501 self.last = m.end();
502 self.caps = Some(caps);
503 Some(SplitState::Unmatched(unmatched))
504 }
505 }
506 }
507}