rumtk_core/search.rs
1/*
2 * rumtk attempts to implement HL7 and medical protocols for interoperability in medicine.
3 * This toolkit aims to be reliable, simple, performant, and standards compliant.
4 * Copyright (C) 2024 Luis M. Santos, M.D.
5 * Copyright (C) 2025 MedicalMasses L.L.C.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22pub mod rumtk_search {
23 use crate::cache::{new_cache, AHashMap, LazyRUMCache};
24 use crate::rumtk_cache_fetch;
25 use crate::strings::{CompactStringExt, RUMString};
26 use regex::Regex;
27 /**************************** Globals **************************************/
28 static mut re_cache: RegexCache = new_cache();
29 /**************************** Constants**************************************/
30 const DEFAULT_REGEX_CACHE_PAGE_SIZE: usize = 10;
31 /**************************** Types *****************************************/
32 pub type RegexCache = LazyRUMCache<RUMString, Regex>;
33 pub type SearchGroups = AHashMap<RUMString, RUMString>;
34 pub type CapturedList = Vec<RUMString>;
35
36 /**************************** Traits ****************************************/
37
38 /**************************** Helpers ***************************************/
39 fn compile_regex(expr: &RUMString) -> Regex {
40 Regex::new(expr).unwrap()
41 }
42
43 ///
44 /// Finds all of the named regex captures and generates a hash table with the results assorted
45 /// into key-value pairs. The keys are the names found in the regex expression. The value is
46 /// the match corresponding to the named capture.
47 ///
48 /// This function returns an instance of SearchGroup which is the hash map.
49 ///
50 pub fn string_search_named_captures(input: &str, expr: &str, default: &str) -> SearchGroups {
51 let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
52 let names: Vec<&str> = re
53 .capture_names()
54 .skip(1)
55 .map(|x| x.unwrap_or(""))
56 .collect();
57 let mut clean_names: Vec<&str> = Vec::with_capacity(names.len());
58 let mut groups = SearchGroups::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
59
60 for name in &names {
61 if !name.is_empty() {
62 clean_names.push(name);
63 }
64 }
65
66 if clean_names.is_empty() {
67 return groups;
68 }
69
70 for name in &clean_names {
71 groups.insert(RUMString::from(name.to_string()), RUMString::from(default));
72 }
73
74 for cap in re.captures_iter(input).map(|c| c) {
75 for name in &clean_names {
76 let val = cap.name(name).map_or("", |s| s.as_str());
77 if !val.is_empty() {
78 groups.insert(RUMString::from(name.to_string()), RUMString::from(val));
79 }
80 }
81 }
82
83 groups
84 }
85
86 ///
87 /// Finds all of the regex captures regardless of name status and compile them into a list
88 /// of strings. Elsewhere, this provides a simple way to iterate through the contents that
89 /// were inside a group \(\).
90 ///
91 /// This function returns an instance of CapturedList which is the list of strings.
92 ///
93 pub fn string_search_all_captures(input: &str, expr: &str, default: &str) -> CapturedList {
94 let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
95 let mut capture_list = CapturedList::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
96
97 for caps in re.captures_iter(input) {
98 for c in caps.iter().skip(1) {
99 let c_str = c.unwrap().as_str();
100 capture_list.push(RUMString::from(c_str));
101 }
102 }
103
104 capture_list
105 }
106
107 ///
108 /// Given a string input and a compiled RegEx, look for all matches and put them in a string
109 /// list for easy iteration/access.
110 ///
111 pub fn string_list(input: &str, re: &Regex) -> CapturedList {
112 let mut list: Vec<RUMString> = Vec::with_capacity(DEFAULT_REGEX_CACHE_PAGE_SIZE);
113 for itm in re.find_iter(input) {
114 list.push(RUMString::from(itm.as_str()));
115 }
116 list
117 }
118
119 ///
120 /// Given a string input and a RegEx string,
121 /// ```text
122 /// - Compile the regex if not done so already.
123 /// - Do a string search for all regex matches.
124 /// - Collapse/join the matches into a single output string using join_pattern as the join fragment.
125 /// ```
126 /// Use \" \" in join_pattern if you wish to have spaces in between matches.
127 ///
128 pub fn string_search(input: &str, expr: &str, join_pattern: &str) -> RUMString {
129 let re = rumtk_cache_fetch!(&raw mut re_cache, &RUMString::from(expr), compile_regex);
130 string_list(input, &re).join_compact(join_pattern)
131 }
132}