regex_mel/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3
4use melodium_core::*;
5use melodium_macro::{check, mel_function, mel_package, mel_treatment};
6use regex::Regex;
7use std::collections::HashMap;
8use std::sync::Arc;
9use std_mel::data::*;
10
11/// Matches stream of strings against a regex.
12///
13/// Every string coming through the `text` stream is matched against `regex`.
14/// `matches` tells if matching were found or not.
15/// `error` is emitted only if regex contains error.
16///
17/// The regex engine is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
18/// in documentation for full syntax description.
19#[mel_treatment(
20    input text Stream<string>
21    output matches Stream<bool>
22    output error Block<string>
23)]
24pub async fn matches(#[mel(content(regex))] regex: string) {
25    match Regex::new(&regex) {
26        Ok(regex) => {
27            error.close().await;
28
29            while let Ok(text) = text
30                .recv_many()
31                .await
32                .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
33            {
34                check!(
35                    matches
36                        .send_many(
37                            text.into_iter()
38                                .map(|txt| regex.is_match(&txt))
39                                .collect::<VecDeque<_>>()
40                                .into()
41                        )
42                        .await
43                );
44            }
45        }
46        Err(err) => {
47            let _ = error.send_one(err.to_string().into()).await;
48        }
49    }
50}
51
52/// Matches a string against a regex.
53///
54/// `text` is matched against `regex`, returns wether the match were successful or not.
55///
56/// The regex engine is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
57/// in documentation for full syntax description.
58#[mel_function]
59pub fn matches(text: string, #[mel(content(regex))] regex: string) -> bool {
60    match Regex::new(&regex) {
61        Ok(regex) => regex.is_match(&text),
62        Err(_) => false,
63    }
64}
65
66/// Find in stream of strings according to a regex.
67///
68/// Every string coming through the `text` stream is looked up with `regex`.
69/// `found` contains the found strings (or _none_ if corresonding `text` input do not match).
70/// `error` is emitted only if regex contains error.
71///
72/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
73/// in documentation for full syntax description.
74#[mel_treatment(
75    input text Stream<string>
76    output found Stream<Option<string>>
77    output error Block<string>
78)]
79pub async fn find(#[mel(content(regex))] regex: string) {
80    match Regex::new(&regex) {
81        Ok(regex) => {
82            error.close().await;
83
84            while let Ok(text) = text
85                .recv_many()
86                .await
87                .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
88            {
89                let mut vec_found = VecDeque::with_capacity(text.len());
90
91                for text in text {
92                    match regex.find(&text) {
93                        Some(m) => {
94                            vec_found.push_back(Some(m.as_str().to_string()).into());
95                        }
96                        None => {
97                            vec_found.push_back(Value::Option(None));
98                        }
99                    }
100                }
101
102                check!(found.send_many(TransmissionValue::Other(vec_found)).await)
103            }
104        }
105        Err(err) => {
106            let _ = error.send_one(err.to_string().into()).await;
107        }
108    }
109}
110
111/// Find in string according to a regex.
112///
113/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
114/// in documentation for full syntax description.
115#[mel_function]
116pub fn find(text: string, #[mel(content(regex))] regex: string) -> Option<string> {
117    match Regex::new(&regex) {
118        Ok(regex) => regex.find(&text).map(|m| m.as_str().to_string()),
119        Err(_) => None,
120    }
121}
122
123/// Captures groups of text according to a regex.
124///
125/// Every string coming through the `text` stream is passed through `regex`.
126/// `captured` contains the **named** groups contents (or _none_ if group is not captured).
127/// `error` is emitted only if regex contains error.
128///
129/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
130/// in documentation for full syntax description.
131#[mel_treatment(
132    input text Stream<string>
133    output captured Stream<Option<Map>>
134    output error Block<string>
135)]
136pub async fn capture(#[mel(content(regex))] regex: string) {
137    match Regex::new(&regex) {
138        Ok(regex) => {
139            error.close().await;
140
141            while let Ok(text) = text
142                .recv_many()
143                .await
144                .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
145            {
146                let mut vec_captured = VecDeque::with_capacity(text.len());
147
148                for text in text {
149                    match regex.captures(&text) {
150                        Some(captures) => {
151                            let mut map_captured = HashMap::new();
152
153                            for name in regex.capture_names() {
154                                if let Some(name) = name {
155                                    if let Some(cap) = captures.name(name) {
156                                        map_captured.insert(
157                                            name.to_string(),
158                                            Value::String(cap.as_str().to_string()),
159                                        );
160                                    }
161                                }
162                            }
163
164                            vec_captured.push_back(Value::Option(Some(Box::new(Value::Data(
165                                Arc::new(Map::new_with(map_captured)),
166                            )))));
167                        }
168                        None => {
169                            vec_captured.push_back(Value::Option(None));
170                        }
171                    }
172                }
173
174                check!(
175                    captured
176                        .send_many(TransmissionValue::Other(vec_captured))
177                        .await
178                )
179            }
180        }
181        Err(err) => {
182            let _ = error.send_one(err.to_string().into()).await;
183        }
184    }
185}
186
187/// Captures groups of text according to a regex.
188///
189/// If match, return a `Map` containing the captured **named** groups.
190///
191/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
192/// in documentation for full syntax description.
193#[mel_function]
194pub fn capture(text: string, #[mel(content(regex))] regex: string) -> Option<Map> {
195    match Regex::new(&regex) {
196        Ok(regex) => match regex.captures(&text) {
197            Some(captures) => {
198                let mut map_captured = HashMap::new();
199
200                for name in regex.capture_names() {
201                    if let Some(name) = name {
202                        if let Some(cap) = captures.name(name) {
203                            map_captured
204                                .insert(name.to_string(), Value::String(cap.as_str().to_string()));
205                        }
206                    }
207                }
208
209                Some(Map::new_with(map_captured))
210            }
211            None => None,
212        },
213        Err(_) => None,
214    }
215}
216
217/// Replace text according to a regex.
218///
219/// Every string coming through the `text` stream is passed through `regex`,
220/// and `replacer` is applied.
221/// `error` is emitted only if regex contains error.
222///
223/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
224/// in documentation for full syntax description.
225#[mel_treatment(
226    input text Stream<string>
227    output replaced Stream<string>
228    output error Block<string>
229)]
230pub async fn replace(#[mel(content(regex))] regex: string, replacer: string) {
231    match Regex::new(&regex) {
232        Ok(regex) => {
233            error.close().await;
234
235            while let Ok(text) = text
236                .recv_many()
237                .await
238                .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
239            {
240                let mut vec_replaced = Vec::with_capacity(text.len());
241
242                for text in text {
243                    vec_replaced.push(regex.replace(&text, &replacer).to_string());
244                }
245
246                check!(replaced.send_many(vec_replaced.into()).await);
247            }
248        }
249        Err(err) => {
250            let _ = error.send_one(err.to_string().into()).await;
251        }
252    }
253}
254
255/// Replace text according to a regex and replacer.
256///
257/// Return string with replaced content, or _none_ if an error in regex occured.
258///
259/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
260/// in documentation for full syntax description.
261#[mel_function]
262pub fn replace(
263    text: string,
264    #[mel(content(regex))] regex: string,
265    replacer: string,
266) -> Option<string> {
267    match Regex::new(&regex) {
268        Ok(regex) => Some(regex.replace(&text, &replacer).to_string()),
269        Err(_) => None,
270    }
271}
272
273mel_package!();