streamson_lib/handler/
regex.rs

1//! Handler which pefroms regex conversion on mathed data
2//!
3//! # Example
4//! ```
5//! use streamson_lib::{matcher, strategy::{self, Strategy}, handler};
6//! use std::sync::{Arc, Mutex};
7//! use regex;
8//!
9//! let converter =
10//! Arc::new(Mutex::new(handler::Regex::new().add_regex("s/bad/good/g".to_string())));
11//! let matcher = matcher::Simple::new(r#"{"users"}[]{"name"}"#).unwrap();
12//!
13//! let mut convert = strategy::Convert::new();
14//!
15//! // Set the matcher for convert strategy
16//! convert.add_matcher(Box::new(matcher), converter);
17//!
18//! for input in vec![
19//!     br#"{"users": [{"password": "1234", "name": "User1"}, {"#.to_vec(),
20//!     br#""password": "0000", "name": "user2}]}"#.to_vec(),
21//! ] {
22//!     for converted_data in convert.process(&input).unwrap() {
23//!         println!("{:?}", converted_data);
24//!     }
25//! }
26//! ```
27
28use super::Handler;
29use crate::{error, path::Path, streamer::Token};
30use std::{any::Any, str, str::FromStr};
31
32/// Converts data using regex
33#[derive(Default)]
34pub struct Regex {
35    /// All replacements (sed string)
36    replacements: Vec<String>,
37    /// Buffer to collect input
38    buffer: Vec<u8>,
39}
40
41impl Handler for Regex {
42    fn feed(
43        &mut self,
44        data: &[u8],
45        _matcher_idx: usize,
46    ) -> Result<Option<Vec<u8>>, error::Handler> {
47        self.buffer.extend(data);
48        Ok(None)
49    }
50
51    fn end(
52        &mut self,
53        _path: &Path,
54        _matcher_idx: usize,
55        _token: Token,
56    ) -> Result<Option<Vec<u8>>, error::Handler> {
57        let mut output: String = str::from_utf8(&self.buffer)
58            .map_err(|e| error::Handler::new(e.to_string()))?
59            .to_string();
60        output = sedregex::find_and_replace(&output, &self.replacements)
61            .map_err(error::Handler::new)?
62            .to_string();
63
64        // Clear the buffer so it can be reused later
65        self.buffer.clear();
66
67        Ok(Some(output.as_bytes().to_vec()))
68    }
69
70    fn is_converter(&self) -> bool {
71        true
72    }
73
74    fn as_any(&self) -> &dyn Any {
75        self
76    }
77}
78
79impl FromStr for Regex {
80    type Err = error::Handler;
81    fn from_str(input: &str) -> Result<Self, Self::Err> {
82        // Check format
83        sedregex::ReplaceCommand::new(input).map_err(error::Handler::new)?;
84        let mut new = Regex::new();
85        new = new.add_regex(input.to_string());
86        Ok(new)
87    }
88}
89
90impl Regex {
91    /// Creates a new regex converter
92    pub fn new() -> Self {
93        Self::default()
94    }
95
96    /// Adds new regex conversion which will be applied
97    ///
98    /// # Arguments
99    /// * `sedregex` - sed regex used to convert the data
100    pub fn add_regex(mut self, sedregex: String) -> Self {
101        self.replacements.push(sedregex);
102        self
103    }
104}
105
106#[cfg(test)]
107mod tests {
108    use crate::{
109        handler,
110        matcher::Simple,
111        strategy::{Convert, OutputConverter, Strategy},
112    };
113    use std::sync::{Arc, Mutex};
114
115    #[test]
116    fn regex_converter() {
117        let mut convert = Convert::new();
118
119        let regex_converter =
120            handler::Regex::new().add_regex("s/[Uu]ser([0-9]+)/user$1/".to_string());
121
122        let matcher = Simple::new(r#"[]{"name"}"#).unwrap();
123        convert.add_matcher(Box::new(matcher), Arc::new(Mutex::new(regex_converter)));
124
125        let output: Vec<u8> = OutputConverter::new()
126            .convert(
127                &convert
128                    .process(br#"[{"name": "User1 User1"}, {"name": "user2"}]"#)
129                    .unwrap(),
130            )
131            .into_iter()
132            .map(|e| e.1)
133            .flatten()
134            .collect();
135
136        assert_eq!(
137            String::from_utf8(output).unwrap(),
138            r#"[{"name": "user1 User1"}, {"name": "user2"}]"#
139        );
140    }
141}