1use serde::{Deserialize, Serialize};
4use serde_json::Value;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
10#[serde(tag = "type", content = "params")]
11pub enum Transformation {
12 Trim,
14
15 Lowercase,
17
18 Uppercase,
20
21 RemoveWhitespace,
23
24 Regex {
26 pattern: String,
27 replacement: String,
28 },
29
30 RegexExtract { pattern: String, group: usize },
32
33 Coerce {
35 target_type: String, },
37
38 Filter { pattern: String },
40
41 NormalizeWhitespace,
43
44 StripHtml,
46
47 DecodeHtml,
49
50 ParseJson,
52
53 #[cfg(feature = "javascript")]
55 JavaScript { code: String },
56}
57
58impl Transformation {
59 fn apply_regex(value: &str, pattern: &str, replacement: &str) -> crate::Result<String> {
60 let re = regex::Regex::new(pattern).map_err(|e| {
61 crate::error::PluginError::InvalidTransformation(format!("Invalid regex: {e}"))
62 })?;
63 Ok(re.replace_all(value, replacement).to_string())
64 }
65
66 fn apply_regex_extract(value: &str, pattern: &str, group: usize) -> crate::Result<String> {
67 let re = regex::Regex::new(pattern).map_err(|e| {
68 crate::error::PluginError::InvalidTransformation(format!("Invalid regex: {e}"))
69 })?;
70 let caps = re.captures(value).ok_or_else(|| {
71 crate::error::PluginError::ExtractionError(format!("No match for pattern: {pattern}"))
72 })?;
73 caps.get(group)
74 .ok_or_else(|| {
75 crate::error::PluginError::ExtractionError(format!(
76 "Capture group {group} not found"
77 ))
78 })
79 .map(|m| m.as_str().to_string())
80 }
81
82 fn apply_coerce(value: &str, target_type: &str) -> crate::Result<String> {
83 match target_type {
84 "string" => Ok(value.to_string()),
85 "number" => {
86 value.parse::<f64>().map_err(|_| {
87 crate::error::PluginError::InvalidTransformation(format!(
88 "Cannot coerce '{value}' to number"
89 ))
90 })?;
91 Ok(value.to_string())
92 }
93 "boolean" => {
94 let b = matches!(value.to_lowercase().as_str(), "true" | "yes" | "1");
95 Ok(b.to_string())
96 }
97 "date" => {
98 chrono::DateTime::parse_from_rfc3339(value).map_err(|_| {
99 crate::error::PluginError::InvalidTransformation(format!(
100 "Invalid date: {value}"
101 ))
102 })?;
103 Ok(value.to_string())
104 }
105 _ => Err(crate::error::PluginError::InvalidTransformation(format!(
106 "Unknown type: {target_type}"
107 ))),
108 }
109 }
110
111 fn apply_filter(value: &str, pattern: &str) -> crate::Result<String> {
112 let re = regex::Regex::new(pattern).map_err(|e| {
113 crate::error::PluginError::InvalidTransformation(format!("Invalid regex: {e}"))
114 })?;
115 if re.is_match(value) {
116 Ok(value.to_string())
117 } else {
118 Err(crate::error::PluginError::ExtractionError(
119 "Value did not match filter pattern".to_string(),
120 ))
121 }
122 }
123
124 pub fn apply(&self, value: &str) -> crate::Result<String> {
135 match self {
136 Self::Trim => Ok(value.trim().to_string()),
137 Self::Lowercase => Ok(value.to_lowercase()),
138 Self::Uppercase => Ok(value.to_uppercase()),
139 Self::RemoveWhitespace => Ok(value.chars().filter(|c| !c.is_whitespace()).collect()),
140 Self::Regex {
141 pattern,
142 replacement,
143 } => Self::apply_regex(value, pattern, replacement),
144 Self::RegexExtract { pattern, group } => {
145 Self::apply_regex_extract(value, pattern, *group)
146 }
147 Self::Coerce { target_type } => Self::apply_coerce(value, target_type),
148 Self::Filter { pattern } => Self::apply_filter(value, pattern),
149 Self::NormalizeWhitespace => Ok(value.split_whitespace().collect::<Vec<_>>().join(" ")),
150 Self::StripHtml => {
151 static RE: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| {
152 #[expect(clippy::expect_used, reason = "hardcoded regex pattern is valid")]
153 regex::Regex::new(r"<[^>]+>").expect("valid hardcoded HTML tag pattern")
154 });
155 Ok(RE.replace_all(value, "").to_string())
156 }
157 Self::DecodeHtml => Ok(value
158 .replace("<", "<")
159 .replace(">", ">")
160 .replace("&", "&")
161 .replace(""", "\"")
162 .replace("'", "'")),
163 Self::ParseJson => serde_json::from_str::<Value>(value)
164 .map(|v| v.to_string())
165 .map_err(|e| crate::error::PluginError::InvalidTransformation(e.to_string())),
166 #[cfg(feature = "javascript")]
167 Self::JavaScript { code: _ } => Err(crate::error::PluginError::InvalidTransformation(
168 "JavaScript transformations not yet implemented".to_string(),
169 )),
170 }
171 }
172
173 pub fn apply_chain(transformations: &[Self], mut value: String) -> crate::Result<String> {
181 for transformation in transformations {
182 value = transformation.apply(&value)?;
183 }
184 Ok(value)
185 }
186}
187
188impl std::fmt::Display for Transformation {
189 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190 match self {
191 Self::Trim => write!(f, "Trim"),
192 Self::Lowercase => write!(f, "Lowercase"),
193 Self::Uppercase => write!(f, "Uppercase"),
194 Self::RemoveWhitespace => write!(f, "RemoveWhitespace"),
195 Self::Regex { pattern, .. } => write!(f, "Regex({pattern})"),
196 Self::RegexExtract { pattern, group } => {
197 write!(f, "RegexExtract({pattern}, group {group})")
198 }
199 Self::Coerce { target_type } => write!(f, "Coerce({target_type})"),
200 Self::Filter { pattern } => write!(f, "Filter({pattern})"),
201 Self::NormalizeWhitespace => write!(f, "NormalizeWhitespace"),
202 Self::StripHtml => write!(f, "StripHtml"),
203 Self::DecodeHtml => write!(f, "DecodeHtml"),
204 Self::ParseJson => write!(f, "ParseJson"),
205 #[cfg(feature = "javascript")]
206 Self::JavaScript { .. } => write!(f, "JavaScript"),
207 }
208 }
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn test_trim() -> std::result::Result<(), Box<dyn std::error::Error>> {
217 let t = Transformation::Trim;
218 assert_eq!(t.apply(" hello ")?, "hello");
219 Ok(())
220 }
221
222 #[test]
223 fn test_lowercase() -> std::result::Result<(), Box<dyn std::error::Error>> {
224 let t = Transformation::Lowercase;
225 assert_eq!(t.apply("HELLO")?, "hello");
226 Ok(())
227 }
228
229 #[test]
230 fn test_regex_replace() -> std::result::Result<(), Box<dyn std::error::Error>> {
231 let t = Transformation::Regex {
232 pattern: r"(\d{3})-(\d{4})".to_string(),
233 replacement: "($1) $2".to_string(),
234 };
235 assert_eq!(t.apply("123-4567")?, "(123) 4567");
236 Ok(())
237 }
238
239 #[test]
240 fn test_regex_extract() -> std::result::Result<(), Box<dyn std::error::Error>> {
241 let t = Transformation::RegexExtract {
242 pattern: r"Price: \$(\d+\.\d{2})".to_string(),
243 group: 1,
244 };
245 assert_eq!(t.apply("Price: $19.99")?, "19.99");
246 Ok(())
247 }
248
249 #[test]
250 fn test_coerce_number() {
251 let t = Transformation::Coerce {
252 target_type: "number".to_string(),
253 };
254 assert!(t.apply("123.45").is_ok());
255 assert!(t.apply("not a number").is_err());
256 }
257
258 #[test]
259 fn test_normalize_whitespace() -> std::result::Result<(), Box<dyn std::error::Error>> {
260 let t = Transformation::NormalizeWhitespace;
261 assert_eq!(t.apply("hello world foo")?, "hello world foo");
262 Ok(())
263 }
264
265 #[test]
266 fn test_strip_html() -> std::result::Result<(), Box<dyn std::error::Error>> {
267 let t = Transformation::StripHtml;
268 assert_eq!(t.apply("<p>Hello <b>world</b></p>")?, "Hello world");
269 Ok(())
270 }
271
272 #[test]
273 fn test_transformation_chain() -> std::result::Result<(), Box<dyn std::error::Error>> {
274 let transforms = vec![
275 Transformation::StripHtml,
276 Transformation::Trim,
277 Transformation::NormalizeWhitespace,
278 ];
279 let result =
280 Transformation::apply_chain(&transforms, " <p>hello world</p> ".to_string())?;
281 assert_eq!(result, "hello world");
282 Ok(())
283 }
284}