Skip to main content

rumtk_core/
search.rs

1/*
2 * rumtk attempts to implement HL7 and medical protocols for interoperability in medicine.
3 * This toolkit aims to be reliable, simple, performant, and standards compliant.
4 * Copyright (C) 2024  Luis M. Santos, M.D. <lsantos@medicalmasses.com>
5 * Copyright (C) 2025  MedicalMasses L.L.C. <contact@medicalmasses.com>
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19 */
20
21pub mod rumtk_search {
22    use crate::cache::{new_cache, LazyRUMCache};
23    use crate::core::RUMResult;
24    use crate::rumtk_cache_fetch;
25    use crate::strings::{rumtk_format, CompactStringExt, RUMString};
26    use crate::types::RUMHashMap;
27    use regex::Regex;
28    use std::fmt::Debug;
29    use std::str::FromStr;
30    /**************************** Globals **************************************/
31    static mut re_cache: RegexCache = new_cache();
32    /**************************** Constants**************************************/
33    const DEFAULT_REGEX_CACHE_PAGE_SIZE: usize = 10;
34    /**************************** Types *****************************************/
35    pub type RegexCache = LazyRUMCache<RUMString, Regex>;
36    pub type SearchGroups = RUMHashMap<RUMString, RUMString>;
37    pub type CapturedList = Vec<RUMString>;
38
39    /**************************** Traits ****************************************/
40
41    /**************************** Helpers ***************************************/
42    fn compile_regex(expr: &str) -> RUMResult<Regex> {
43        match Regex::new(expr) {
44            Ok(regex) => Ok(regex),
45            Err(e) => Err(rumtk_format!("Invalid regex => {}", e))
46        }
47    }
48
49    ///
50    /// Finds all of the named regex captures and generates a hash table with the results assorted
51    /// into key-value pairs. The keys are the names found in the regex expression. The value is
52    /// the match corresponding to the named capture.
53    ///
54    /// This function returns an instance of SearchGroup which is the hash map.
55    ///
56    pub fn string_search_named_captures(input: &str, expr: &str, default: &str) -> RUMResult<SearchGroups> {
57        let key = RUMString::from(expr);
58        let re: Regex = rumtk_cache_fetch!(&raw mut re_cache, &key, || {compile_regex(expr)})?;
59        let names: Vec<&str> = re
60            .capture_names()
61            .skip(1)
62            .map(|x| x.unwrap_or(""))
63            .collect();
64        let mut clean_names: Vec<&str> = Vec::with_capacity(names.len());
65        let mut groups = SearchGroups::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
66
67        for name in &names {
68            if !name.is_empty() {
69                clean_names.push(name);
70            }
71        }
72
73        if clean_names.is_empty() {
74            return Ok(groups);
75        }
76
77        for name in &clean_names {
78            groups.insert(RUMString::from(name.to_string()), RUMString::from(default));
79        }
80
81        for cap in re.captures_iter(input).map(|c| c) {
82            for name in &clean_names {
83                let val = cap.name(name).map_or("", |s| s.as_str());
84                if !val.is_empty() {
85                    groups.insert(RUMString::from(name.to_string()), RUMString::from(val));
86                }
87            }
88        }
89
90        Ok(groups)
91    }
92
93    ///
94    /// Finds all of the regex captures regardless of name status and compile them into a list
95    /// of strings. Elsewhere, this provides a simple way to iterate through the contents that
96    /// were inside a group \(\).
97    ///
98    /// This function returns an instance of CapturedList which is the list of strings.
99    ///
100    pub fn string_search_all_captures(input: &str, expr: &str, default: &str) -> RUMResult<CapturedList> {
101        let key = RUMString::from(expr);
102        let re: Regex = rumtk_cache_fetch!(&raw mut re_cache, &key, || {compile_regex(expr)})?;
103        let mut capture_list = CapturedList::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
104
105        for caps in re.captures_iter(input) {
106            for c in caps.iter().skip(1) {
107                let c_str = c.unwrap().as_str();
108                capture_list.push(RUMString::from(c_str));
109            }
110        }
111
112        Ok(capture_list)
113    }
114
115    ///
116    /// Given a string input and a compiled RegEx, look for all matches and put them in a string
117    /// list for easy iteration/access.
118    ///
119    pub fn string_list(input: &str, re: &Regex) -> CapturedList {
120        let mut list: Vec<RUMString> = Vec::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
121        for itm in re.find_iter(input) {
122            list.push(RUMString::from(itm.as_str()));
123        }
124        list
125    }
126
127    ///
128    /// Given a string input and a RegEx string,
129    /// ```text
130    ///     - Compile the regex if not done so already.
131    ///     - Do a string search for all regex matches.
132    ///     - Collapse/join the matches into a single output string using join_pattern as the join fragment.
133    /// ```
134    /// Use \" \" in join_pattern if you wish to have spaces in between matches.
135    ///
136    pub fn string_search(input: &str, expr: &str, join_pattern: &str) -> RUMResult<RUMString> {
137        Ok(string_search_list(input, expr)?.join_compact(join_pattern))
138    }
139
140    ///
141    /// Search for pattern and return all matches.
142    ///
143    pub fn string_search_list(input: &str, expr: &str) -> RUMResult<CapturedList> {
144        let key = RUMString::from(expr);
145        let re: Regex = rumtk_cache_fetch!(&raw mut re_cache, &key, || {compile_regex(expr)})?;
146        Ok(string_list(input, &re))
147    }
148
149    ///
150    /// Given a string input and a set of RegEx patterns, find the target value and return it as
151    /// the given target type `T`.
152    ///
153    /// ```
154    /// use rumtk_core::search::rumtk_search::string_find_value;
155    ///
156    /// let haystack = "Range (min \\xe2\\x80\\xa6 max):     0.6 ms \\xe2\\x80\\xa6   2.9 ms    1273 runs";
157    /// let patterns = ["\\d+ runs", "\\d+"];
158    /// let expected = 1273;
159    /// let result = string_find_value::<usize>(haystack, &patterns);
160    ///
161    /// assert_eq!(result, Ok(expected), "Did not find the needle in the haystack or returned the wrong type!");
162    /// ```
163    /// Use \" \" in join_pattern if you wish to have spaces in between matches.
164    ///
165    pub fn string_find_value<T: Default + FromStr>(input: &str, patterns: &[&str]) -> RUMResult<T> {
166        let mut haystack = input;
167        let mut needle = RUMString::default();
168        let mut result = T::default();
169
170        for expr in patterns {
171            needle = string_search(haystack, expr, " ")?;
172            haystack = &needle;
173        }
174
175        result = needle.trim().parse::<T>().unwrap_or_default();
176        Ok(result)
177    }
178
179    ///
180    /// Search for pattern and replace all matches.
181    ///
182    pub fn string_replace_all_matches(input: &str, expr: &str, replacement: &str) -> RUMResult<String> {
183        let matches = string_search_list(input, expr)?;
184        let mut result = String::from(input);
185
186        for pattern in matches.iter() {
187            result = result.as_str().replace(pattern.as_str(), replacement.as_str());
188        }
189
190        Ok(result)
191    }
192}