rumtk_core/search.rs
1/*
2 * rumtk attempts to implement HL7 and medical protocols for interoperability in medicine.
3 * This toolkit aims to be reliable, simple, performant, and standards compliant.
4 * Copyright (C) 2024 Luis M. Santos, M.D.
5 * Copyright (C) 2025 MedicalMasses L.L.C.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22pub mod rumtk_search {
23 use crate::cache::{new_cache, LazyRUMCache};
24 use crate::rumtk_cache_fetch;
25 use crate::strings::{CompactStringExt, RUMString};
26 use crate::types::RUMHashMap;
27 use regex::Regex;
28 /**************************** Globals **************************************/
29 static mut re_cache: RegexCache = new_cache();
30 /**************************** Constants**************************************/
31 const DEFAULT_REGEX_CACHE_PAGE_SIZE: usize = 10;
32 /**************************** Types *****************************************/
33 pub type RegexCache = LazyRUMCache<RUMString, Regex>;
34 pub type SearchGroups = RUMHashMap<RUMString, RUMString>;
35 pub type CapturedList = Vec<RUMString>;
36
37 /**************************** Traits ****************************************/
38
39 /**************************** Helpers ***************************************/
40 fn compile_regex(expr: &RUMString) -> Regex {
41 Regex::new(expr).unwrap()
42 }
43
44 ///
45 /// Finds all of the named regex captures and generates a hash table with the results assorted
46 /// into key-value pairs. The keys are the names found in the regex expression. The value is
47 /// the match corresponding to the named capture.
48 ///
49 /// This function returns an instance of SearchGroup which is the hash map.
50 ///
51 pub fn string_search_named_captures(input: &str, expr: &str, default: &str) -> SearchGroups {
52 let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
53 let names: Vec<&str> = re
54 .capture_names()
55 .skip(1)
56 .map(|x| x.unwrap_or(""))
57 .collect();
58 let mut clean_names: Vec<&str> = Vec::with_capacity(names.len());
59 let mut groups = SearchGroups::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
60
61 for name in &names {
62 if !name.is_empty() {
63 clean_names.push(name);
64 }
65 }
66
67 if clean_names.is_empty() {
68 return groups;
69 }
70
71 for name in &clean_names {
72 groups.insert(RUMString::from(name.to_string()), RUMString::from(default));
73 }
74
75 for cap in re.captures_iter(input).map(|c| c) {
76 for name in &clean_names {
77 let val = cap.name(name).map_or("", |s| s.as_str());
78 if !val.is_empty() {
79 groups.insert(RUMString::from(name.to_string()), RUMString::from(val));
80 }
81 }
82 }
83
84 groups
85 }
86
87 ///
88 /// Finds all of the regex captures regardless of name status and compile them into a list
89 /// of strings. Elsewhere, this provides a simple way to iterate through the contents that
90 /// were inside a group \(\).
91 ///
92 /// This function returns an instance of CapturedList which is the list of strings.
93 ///
94 pub fn string_search_all_captures(input: &str, expr: &str, default: &str) -> CapturedList {
95 let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
96 let mut capture_list = CapturedList::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
97
98 for caps in re.captures_iter(input) {
99 for c in caps.iter().skip(1) {
100 let c_str = c.unwrap().as_str();
101 capture_list.push(RUMString::from(c_str));
102 }
103 }
104
105 capture_list
106 }
107
108 ///
109 /// Given a string input and a compiled RegEx, look for all matches and put them in a string
110 /// list for easy iteration/access.
111 ///
112 pub fn string_list(input: &str, re: &Regex) -> CapturedList {
113 let mut list: Vec<RUMString> = Vec::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
114 for itm in re.find_iter(input) {
115 list.push(RUMString::from(itm.as_str()));
116 }
117 list
118 }
119
120 ///
121 /// Given a string input and a RegEx string,
122 /// ```text
123 /// - Compile the regex if not done so already.
124 /// - Do a string search for all regex matches.
125 /// - Collapse/join the matches into a single output string using join_pattern as the join fragment.
126 /// ```
127 /// Use \" \" in join_pattern if you wish to have spaces in between matches.
128 ///
129 pub fn string_search(input: &str, expr: &str, join_pattern: &str) -> RUMString {
130 let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
131 string_list(input, &re).join_compact(join_pattern)
132 }
133}