rumtk_core/
search.rs

1/*
2 * rumtk attempts to implement HL7 and medical protocols for interoperability in medicine.
3 * This toolkit aims to be reliable, simple, performant, and standards compliant.
4 * Copyright (C) 2024  Luis M. Santos, M.D.
5 * Copyright (C) 2025  MedicalMasses L.L.C.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
20 */
21
22pub mod rumtk_search {
23    use crate::cache::{new_cache, LazyRUMCache};
24    use crate::rumtk_cache_fetch;
25    use crate::strings::{CompactStringExt, RUMString};
26    use crate::types::RUMHashMap;
27    use regex::Regex;
28    /**************************** Globals **************************************/
29    static mut re_cache: RegexCache = new_cache();
30    /**************************** Constants**************************************/
31    const DEFAULT_REGEX_CACHE_PAGE_SIZE: usize = 10;
32    /**************************** Types *****************************************/
33    pub type RegexCache = LazyRUMCache<RUMString, Regex>;
34    pub type SearchGroups = RUMHashMap<RUMString, RUMString>;
35    pub type CapturedList = Vec<RUMString>;
36
37    /**************************** Traits ****************************************/
38
39    /**************************** Helpers ***************************************/
40    fn compile_regex(expr: &RUMString) -> Regex {
41        Regex::new(expr).unwrap()
42    }
43
44    ///
45    /// Finds all of the named regex captures and generates a hash table with the results assorted
46    /// into key-value pairs. The keys are the names found in the regex expression. The value is
47    /// the match corresponding to the named capture.
48    ///
49    /// This function returns an instance of SearchGroup which is the hash map.
50    ///
51    pub fn string_search_named_captures(input: &str, expr: &str, default: &str) -> SearchGroups {
52        let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
53        let names: Vec<&str> = re
54            .capture_names()
55            .skip(1)
56            .map(|x| x.unwrap_or(""))
57            .collect();
58        let mut clean_names: Vec<&str> = Vec::with_capacity(names.len());
59        let mut groups = SearchGroups::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
60
61        for name in &names {
62            if !name.is_empty() {
63                clean_names.push(name);
64            }
65        }
66
67        if clean_names.is_empty() {
68            return groups;
69        }
70
71        for name in &clean_names {
72            groups.insert(RUMString::from(name.to_string()), RUMString::from(default));
73        }
74
75        for cap in re.captures_iter(input).map(|c| c) {
76            for name in &clean_names {
77                let val = cap.name(name).map_or("", |s| s.as_str());
78                if !val.is_empty() {
79                    groups.insert(RUMString::from(name.to_string()), RUMString::from(val));
80                }
81            }
82        }
83
84        groups
85    }
86
87    ///
88    /// Finds all of the regex captures regardless of name status and compile them into a list
89    /// of strings. Elsewhere, this provides a simple way to iterate through the contents that
90    /// were inside a group \(\).
91    ///
92    /// This function returns an instance of CapturedList which is the list of strings.
93    ///
94    pub fn string_search_all_captures(input: &str, expr: &str, default: &str) -> CapturedList {
95        let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
96        let mut capture_list = CapturedList::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
97
98        for caps in re.captures_iter(input) {
99            for c in caps.iter().skip(1) {
100                let c_str = c.unwrap().as_str();
101                capture_list.push(RUMString::from(c_str));
102            }
103        }
104
105        capture_list
106    }
107
108    ///
109    /// Given a string input and a compiled RegEx, look for all matches and put them in a string
110    /// list for easy iteration/access.
111    ///
112    pub fn string_list(input: &str, re: &Regex) -> CapturedList {
113        let mut list: Vec<RUMString> = Vec::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
114        for itm in re.find_iter(input) {
115            list.push(RUMString::from(itm.as_str()));
116        }
117        list
118    }
119
120    ///
121    /// Given a string input and a RegEx string,
122    /// ```text
123    ///     - Compile the regex if not done so already.
124    ///     - Do a string search for all regex matches.
125    ///     - Collapse/join the matches into a single output string using join_pattern as the join fragment.
126    /// ```
127    /// Use \" \" in join_pattern if you wish to have spaces in between matches.
128    ///
129    pub fn string_search(input: &str, expr: &str, join_pattern: &str) -> RUMString {
130        let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
131        string_list(input, &re).join_compact(join_pattern)
132    }
133}