rumtk_core/search.rs
1/*
2 * rumtk attempts to implement HL7 and medical protocols for interoperability in medicine.
3 * This toolkit aims to be reliable, simple, performant, and standards compliant.
4 * Copyright (C) 2024 Luis M. Santos, M.D. <lsantos@medicalmasses.com>
5 * Copyright (C) 2025 MedicalMasses L.L.C. <contact@medicalmasses.com>
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
19 */
20
21pub mod rumtk_search {
22 use crate::cache::{new_cache, LazyRUMCache};
23 use crate::core::RUMResult;
24 use crate::rumtk_cache_fetch;
25 use crate::strings::{rumtk_format, CompactStringExt, RUMString};
26 use crate::types::RUMHashMap;
27 use regex::Regex;
28 /**************************** Globals **************************************/
29 static mut re_cache: RegexCache = new_cache();
30 /**************************** Constants**************************************/
31 const DEFAULT_REGEX_CACHE_PAGE_SIZE: usize = 10;
32 /**************************** Types *****************************************/
33 pub type RegexCache = LazyRUMCache<RUMString, Regex>;
34 pub type SearchGroups = RUMHashMap<RUMString, RUMString>;
35 pub type CapturedList = Vec<RUMString>;
36
37 /**************************** Traits ****************************************/
38
39 /**************************** Helpers ***************************************/
40 fn compile_regex(expr: &str) -> RUMResult<Regex> {
41 match Regex::new(expr) {
42 Ok(regex) => Ok(regex),
43 Err(e) => Err(rumtk_format!("Invalid regex => {}", e))
44 }
45 }
46
47 ///
48 /// Finds all of the named regex captures and generates a hash table with the results assorted
49 /// into key-value pairs. The keys are the names found in the regex expression. The value is
50 /// the match corresponding to the named capture.
51 ///
52 /// This function returns an instance of SearchGroup which is the hash map.
53 ///
54 pub fn string_search_named_captures(input: &str, expr: &str, default: &str) -> RUMResult<SearchGroups> {
55 let key = RUMString::from(expr);
56 let re: Regex = rumtk_cache_fetch!(&raw mut re_cache, &key, || {compile_regex(expr)})?;
57 let names: Vec<&str> = re
58 .capture_names()
59 .skip(1)
60 .map(|x| x.unwrap_or(""))
61 .collect();
62 let mut clean_names: Vec<&str> = Vec::with_capacity(names.len());
63 let mut groups = SearchGroups::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
64
65 for name in &names {
66 if !name.is_empty() {
67 clean_names.push(name);
68 }
69 }
70
71 if clean_names.is_empty() {
72 return Ok(groups);
73 }
74
75 for name in &clean_names {
76 groups.insert(RUMString::from(name.to_string()), RUMString::from(default));
77 }
78
79 for cap in re.captures_iter(input).map(|c| c) {
80 for name in &clean_names {
81 let val = cap.name(name).map_or("", |s| s.as_str());
82 if !val.is_empty() {
83 groups.insert(RUMString::from(name.to_string()), RUMString::from(val));
84 }
85 }
86 }
87
88 Ok(groups)
89 }
90
91 ///
92 /// Finds all of the regex captures regardless of name status and compile them into a list
93 /// of strings. Elsewhere, this provides a simple way to iterate through the contents that
94 /// were inside a group \(\).
95 ///
96 /// This function returns an instance of CapturedList which is the list of strings.
97 ///
98 pub fn string_search_all_captures(input: &str, expr: &str, default: &str) -> RUMResult<CapturedList> {
99 let key = RUMString::from(expr);
100 let re: Regex = rumtk_cache_fetch!(&raw mut re_cache, &key, || {compile_regex(expr)})?;
101 let mut capture_list = CapturedList::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
102
103 for caps in re.captures_iter(input) {
104 for c in caps.iter().skip(1) {
105 let c_str = c.unwrap().as_str();
106 capture_list.push(RUMString::from(c_str));
107 }
108 }
109
110 Ok(capture_list)
111 }
112
113 ///
114 /// Given a string input and a compiled RegEx, look for all matches and put them in a string
115 /// list for easy iteration/access.
116 ///
117 pub fn string_list(input: &str, re: &Regex) -> CapturedList {
118 let mut list: Vec<RUMString> = Vec::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
119 for itm in re.find_iter(input) {
120 list.push(RUMString::from(itm.as_str()));
121 }
122 list
123 }
124
125 ///
126 /// Given a string input and a RegEx string,
127 /// ```text
128 /// - Compile the regex if not done so already.
129 /// - Do a string search for all regex matches.
130 /// - Collapse/join the matches into a single output string using join_pattern as the join fragment.
131 /// ```
132 /// Use \" \" in join_pattern if you wish to have spaces in between matches.
133 ///
134 pub fn string_search(input: &str, expr: &str, join_pattern: &str) -> RUMResult<RUMString> {
135 let key = RUMString::from(expr);
136 let re: Regex = rumtk_cache_fetch!(&raw mut re_cache, &key, || {compile_regex(expr)})?;
137 Ok(string_list(input, &re).join_compact(join_pattern))
138 }
139}