1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
use regex::{Regex, Error};
use crate::enums::WordBounds;
/// Build a regular expression with an optional case-insenistive non-capturing group
/// If the source pattern starts with a non-capturing group, this will be ignored irrespective of the case_insenistive flag
pub fn build_regex(pattern: &str, case_insensitive: bool) -> Result<Regex, Error> {
let mut parts: Vec<&str> = vec![];
// do not case-insensitive flag if a similar flag is already in the regular expression
if case_insensitive && pattern.starts_with("(?") == false {
parts.push("(?i)");
}
parts.push(pattern);
let regex_str = parts. concat();
Regex::new(®ex_str)
}
// Miscellaneous utility functions that do not belong to structs
/// corrects a numeric string after it has been extracted by removing trailing dots or commas
pub(crate) fn add_sanitized_numeric_string(output: &mut Vec<String>, num_string: &str) {
output.push(num_string.trim_end_matches(".").trim_end_matches(",").to_string());
}
// internal utility methods
/// build regex pattern with word boundaries and WordBounds options
pub(crate) fn build_word_pattern(word: &str, bounds: WordBounds) -> String {
bounds.to_pattern(word)
}
/// build regex pattern with whole word matches only. Does allow multiple word matches
/// if wildcards allowing spaces or punctuation are in the regex patterm
pub(crate) fn build_whole_word_pattern(word: &str) -> String {
build_word_pattern(word, WordBounds::Both)
}
/// constructs an optional match group for whole words from an array of strs
/// e.g. &["cat?", "dog"] will match strings where cat and/or dog appear as whole words.
/// should be used with build_regex above or pattern_match / pattern_replace
pub(crate) fn build_optional_whole_word_pattern(words: &[&str]) -> String {
let word_pattern = ["(", &words.join("|"), ")"].concat();
build_word_pattern(&word_pattern, WordBounds::Both)
}