1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
use regex::{Regex, Error};
use crate::enums::{StringBounds, WordBounds};

/// Build a regular expression with an optional case-insenistive non-capturing group
/// If the source pattern starts with a non-capturing group, this will be ignored irrespective of the case_insenistive flag
pub fn build_regex(pattern: &str, case_insensitive: bool) -> Result<Regex, Error> {
  let mut parts: Vec<&str> = vec![];
  // do not case-insensitive flag if a similar flag is already in the regular expression
  if case_insensitive && pattern.starts_with("(?") == false {
    parts.push("(?i)");
  }
  parts.push(pattern);
  let regex_str = parts. concat();
  Regex::new(&regex_str)
}

// Miscellaneous utility functions that do not belong to structs
/// corrects a numeric string after it has been extracted by removing trailing dots or commas
pub(crate) fn add_sanitized_numeric_string(output: &mut Vec<String>, num_string: &str) {
  output.push(num_string.trim_end_matches(".").trim_end_matches(",").to_string());
}

// internal utility methods

/// build regex pattern with word boundaries and WordBounds options
pub(crate) fn build_word_pattern(word: &str, bounds: WordBounds) -> String {
  bounds.to_pattern(word)
}

/// build regex pattern with whole word matches only. Does allow multiple word matches
/// if wildcards allowing spaces or punctuation are in the regex patterm
pub(crate) fn build_whole_word_pattern(word: &str) -> String {
  build_word_pattern(word, WordBounds::Both)
}

/// constructs an optional match group for whole words from an array of strs
/// e.g. &["cat?", "dog"] will match strings where cat and/or dog appear as whole words.
/// should be used with build_regex above or pattern_match / pattern_replace
pub(crate) fn build_optional_whole_word_pattern(words: &[&str]) -> String {
  let word_pattern = ["(", &words.join("|"), ")"].concat();
  build_word_pattern(&word_pattern, WordBounds::Both)
}

/*
* Convert an str array to vector of tuple pairs with the second element having the same boolean value
* as used in many multple match methods where the boolean element indicates case-sensitivity
*/
pub(crate) fn strs_to_str_bool_pairs<'a>(strs: &'a [&str], bool_val: bool) -> Vec<(&'a str, bool)> {
  strs.into_iter().map(|s| (*s, bool_val)).collect()
}

/*
* Convert an array of strs to a vector of SimpleBounds with start/end/contains and case-sensity rules
* as used in matched_conditional
* Only used internally with interger mode
* 0 = Start, 1 = End, 2+ = Contains
*/
pub(crate) fn strs_to_string_bounds<'a>(strs: &'a [&str], case_sensitive: bool, mode: u8) -> Vec<StringBounds<'a>> {
  strs.into_iter().map(|txt| StringBounds::new(mode, *txt, true, case_sensitive)).collect()
}

/*
* Convert an array of str/boolean tuples to a vector of SimpleBounds with start/end/contains
* as used in matched_conditional
* Only used internally with interger mode
* 0 = Start, 1 = End, 2+ = Contains
*/
pub(crate) fn pairs_to_string_bounds<'a>(pairs: &'a [(&str, bool)], mode: u8) -> Vec<StringBounds<'a>> {
  pairs.into_iter().map(|(txt, ci)| StringBounds::new(mode, *txt, true, *ci)).collect()
}