regex_mel/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![doc = include_str!("../README.md")]
3
4use melodium_core::*;
5use melodium_macro::{check, mel_function, mel_package, mel_treatment};
6use regex::Regex;
7use std::collections::HashMap;
8use std::sync::Arc;
9use std_mel::data::string_map::*;
10
11/// Matches stream of strings against a regex.
12///
13/// Every string coming through the `text` stream is matched against `regex`.
14/// `matches` tells if matching were found or not.
15/// `error` is emitted only if regex contains error.
16///
17/// The regex engine is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
18/// in documentation for full syntax description.
19#[mel_treatment(
20    input text Stream<string>
21    output matches Stream<bool>
22    output error Block<string>
23)]
24pub async fn matches(#[mel(content(regex))] regex: string) {
25    match Regex::new(®ex) {
26        Ok(regex) => {
27            error.close().await;
28
29            while let Ok(text) = text
30                .recv_many()
31                .await
32                .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
33            {
34                check!(
35                    matches
36                        .send_many(
37                            text.into_iter()
38                                .map(|txt| regex.is_match(&txt))
39                                .collect::<VecDeque<_>>()
40                                .into()
41                        )
42                        .await
43                );
44            }
45        }
46        Err(err) => {
47            let _ = error.send_one(err.to_string().into()).await;
48        }
49    }
50}
51
52/// Matches a string against a regex.
53///
54/// `text` is matched against `regex`, returns wether the match were successful or not.
55///
56/// The regex engine is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
57/// in documentation for full syntax description.
58#[mel_function]
59pub fn matches(text: string, #[mel(content(regex))] regex: string) -> bool {
60    match Regex::new(®ex) {
61        Ok(regex) => regex.is_match(&text),
62        Err(_) => false,
63    }
64}
65
66/// Find in stream of strings according to a regex.
67///
68/// Every string coming through the `text` stream is looked up with `regex`.
69/// `found` contains the found strings (or _none_ if corresonding `text` input do not match).
70/// `error` is emitted only if regex contains error.
71///
72/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
73/// in documentation for full syntax description.
74#[mel_treatment(
75    input text Stream<string>
76    output found Stream<Option<string>>
77    output error Block<string>
78)]
79pub async fn find(#[mel(content(regex))] regex: string) {
80    match Regex::new(®ex) {
81        Ok(regex) => {
82            error.close().await;
83
84            while let Ok(text) = text
85                .recv_many()
86                .await
87                .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
88            {
89                let mut vec_found = VecDeque::with_capacity(text.len());
90
91                for text in text {
92                    match regex.find(&text) {
93                        Some(m) => {
94                            vec_found.push_back(Some(m.as_str().to_string()).into());
95                        }
96                        None => {
97                            vec_found.push_back(Value::Option(None));
98                        }
99                    }
100                }
101
102                check!(found.send_many(TransmissionValue::Other(vec_found)).await)
103            }
104        }
105        Err(err) => {
106            let _ = error.send_one(err.to_string().into()).await;
107        }
108    }
109}
110
111/// Find in string according to a regex.
112///
113/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
114/// in documentation for full syntax description.
115#[mel_function]
116pub fn find(text: string, #[mel(content(regex))] regex: string) -> Option<string> {
117    match Regex::new(®ex) {
118        Ok(regex) => regex.find(&text).map(|m| m.as_str().to_string()),
119        Err(_) => None,
120    }
121}
122
123/// Captures groups of text according to a regex.
124///
125/// Every string coming through the `text` stream is passed through `regex`.
126/// `captured` contains the **named** groups contents (or _none_ if group is not captured).
127/// `error` is emitted only if regex contains error.
128///
129/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
130/// in documentation for full syntax description.
131#[mel_treatment(
132    input text Stream<string>
133    output captured Stream<Option<StringMap>>
134    output error Block<string>
135)]
136pub async fn capture(#[mel(content(regex))] regex: string) {
137    match Regex::new(®ex) {
138        Ok(regex) => {
139            error.close().await;
140
141            while let Ok(text) = text
142                .recv_many()
143                .await
144                .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
145            {
146                let mut vec_captured = VecDeque::with_capacity(text.len());
147
148                for text in text {
149                    match regex.captures(&text) {
150                        Some(captures) => {
151                            let mut map_captured = HashMap::new();
152
153                            for name in regex.capture_names() {
154                                if let Some(name) = name {
155                                    if let Some(cap) = captures.name(name) {
156                                        map_captured
157                                            .insert(name.to_string(), cap.as_str().to_string());
158                                    }
159                                }
160                            }
161
162                            vec_captured.push_back(Value::Option(Some(Box::new(Value::Data(
163                                Arc::new(StringMap::new_with(map_captured)),
164                            )))));
165                        }
166                        None => {
167                            vec_captured.push_back(Value::Option(None));
168                        }
169                    }
170                }
171
172                check!(
173                    captured
174                        .send_many(TransmissionValue::Other(vec_captured))
175                        .await
176                )
177            }
178        }
179        Err(err) => {
180            let _ = error.send_one(err.to_string().into()).await;
181        }
182    }
183}
184
185/// Captures groups of text according to a regex.
186///
187/// If match, return a `StringMap` containing the captured **named** groups.
188///
189/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
190/// in documentation for full syntax description.
191#[mel_function]
192pub fn capture(text: string, #[mel(content(regex))] regex: string) -> Option<StringMap> {
193    match Regex::new(®ex) {
194        Ok(regex) => match regex.captures(&text) {
195            Some(captures) => {
196                let mut map_captured = HashMap::new();
197
198                for name in regex.capture_names() {
199                    if let Some(name) = name {
200                        if let Some(cap) = captures.name(name) {
201                            map_captured.insert(name.to_string(), cap.as_str().to_string());
202                        }
203                    }
204                }
205
206                Some(StringMap::new_with(map_captured))
207            }
208            None => None,
209        },
210        Err(_) => None,
211    }
212}
213
214/// Replace text according to a regex.
215///
216/// Every string coming through the `text` stream is passed through `regex`,
217/// and `replacer` is applied.
218/// `error` is emitted only if regex contains error.
219///
220/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
221/// in documentation for full syntax description.
222#[mel_treatment(
223    input text Stream<string>
224    output replaced Stream<string>
225    output error Block<string>
226)]
227pub async fn replace(#[mel(content(regex))] regex: string, replacer: string) {
228    match Regex::new(®ex) {
229        Ok(regex) => {
230            error.close().await;
231
232            while let Ok(text) = text
233                .recv_many()
234                .await
235                .map(|values| TryInto::<Vec<string>>::try_into(values).unwrap())
236            {
237                let mut vec_replaced = Vec::with_capacity(text.len());
238
239                for text in text {
240                    vec_replaced.push(regex.replace(&text, &replacer).to_string());
241                }
242
243                check!(replaced.send_many(vec_replaced.into()).await);
244            }
245        }
246        Err(err) => {
247            let _ = error.send_one(err.to_string().into()).await;
248        }
249    }
250}
251
252/// Replace text according to a regex and replacer.
253///
254/// Return string with replaced content, or _none_ if an error in regex occured.
255///
256/// The regex syntax is Unicode-aware. Please refer to [Regex Syntax](https://docs.rs/regex/latest/regex/index.html#syntax)
257/// in documentation for full syntax description.
258#[mel_function]
259pub fn replace(
260    text: string,
261    #[mel(content(regex))] regex: string,
262    replacer: string,
263) -> Option<string> {
264    match Regex::new(®ex) {
265        Ok(regex) => Some(regex.replace(&text, &replacer).to_string()),
266        Err(_) => None,
267    }
268}
269
270mel_package!();