1use serde::{Deserialize, Serialize};
4use serde_json::Value;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
10#[serde(tag = "type", content = "params")]
11pub enum Transformation {
12 Trim,
14
15 Lowercase,
17
18 Uppercase,
20
21 RemoveWhitespace,
23
24 Regex {
26 pattern: String,
27 replacement: String,
28 },
29
30 RegexExtract { pattern: String, group: usize },
32
33 Coerce {
35 target_type: String, },
37
38 Filter { pattern: String },
40
41 NormalizeWhitespace,
43
44 StripHtml,
46
47 DecodeHtml,
49
50 ParseJson,
52
53 #[cfg(feature = "javascript")]
55 JavaScript { code: String },
56}
57
58impl Transformation {
59 fn apply_regex(value: &str, pattern: &str, replacement: &str) -> crate::Result<String> {
60 let re = regex::Regex::new(pattern).map_err(|e| {
61 crate::error::PluginError::InvalidTransformation(format!("Invalid regex: {e}"))
62 })?;
63 Ok(re.replace_all(value, replacement).to_string())
64 }
65
66 fn apply_regex_extract(value: &str, pattern: &str, group: usize) -> crate::Result<String> {
67 let re = regex::Regex::new(pattern).map_err(|e| {
68 crate::error::PluginError::InvalidTransformation(format!("Invalid regex: {e}"))
69 })?;
70 let caps = re.captures(value).ok_or_else(|| {
71 crate::error::PluginError::ExtractionError(format!("No match for pattern: {pattern}"))
72 })?;
73 caps.get(group)
74 .ok_or_else(|| {
75 crate::error::PluginError::ExtractionError(format!(
76 "Capture group {group} not found"
77 ))
78 })
79 .map(|m| m.as_str().to_string())
80 }
81
82 fn apply_coerce(value: &str, target_type: &str) -> crate::Result<String> {
83 match target_type {
84 "string" => Ok(value.to_string()),
85 "number" => {
86 value.parse::<f64>().map_err(|_| {
87 crate::error::PluginError::InvalidTransformation(format!(
88 "Cannot coerce '{value}' to number"
89 ))
90 })?;
91 Ok(value.to_string())
92 }
93 "boolean" => {
94 let b = matches!(value.to_lowercase().as_str(), "true" | "yes" | "1");
95 Ok(b.to_string())
96 }
97 "date" => {
98 chrono::DateTime::parse_from_rfc3339(value).map_err(|_| {
99 crate::error::PluginError::InvalidTransformation(format!(
100 "Invalid date: {value}"
101 ))
102 })?;
103 Ok(value.to_string())
104 }
105 _ => Err(crate::error::PluginError::InvalidTransformation(format!(
106 "Unknown type: {target_type}"
107 ))),
108 }
109 }
110
111 fn apply_filter(value: &str, pattern: &str) -> crate::Result<String> {
112 let re = regex::Regex::new(pattern).map_err(|e| {
113 crate::error::PluginError::InvalidTransformation(format!("Invalid regex: {e}"))
114 })?;
115 if re.is_match(value) {
116 Ok(value.to_string())
117 } else {
118 Err(crate::error::PluginError::ExtractionError(
119 "Value did not match filter pattern".to_string(),
120 ))
121 }
122 }
123
124 pub fn apply(&self, value: &str) -> crate::Result<String> {
126 match self {
127 Self::Trim => Ok(value.trim().to_string()),
128 Self::Lowercase => Ok(value.to_lowercase()),
129 Self::Uppercase => Ok(value.to_uppercase()),
130 Self::RemoveWhitespace => Ok(value.chars().filter(|c| !c.is_whitespace()).collect()),
131 Self::Regex {
132 pattern,
133 replacement,
134 } => Self::apply_regex(value, pattern, replacement),
135 Self::RegexExtract { pattern, group } => {
136 Self::apply_regex_extract(value, pattern, *group)
137 }
138 Self::Coerce { target_type } => Self::apply_coerce(value, target_type),
139 Self::Filter { pattern } => Self::apply_filter(value, pattern),
140 Self::NormalizeWhitespace => Ok(value.split_whitespace().collect::<Vec<_>>().join(" ")),
141 Self::StripHtml => {
142 static RE: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| {
143 #[expect(clippy::expect_used, reason = "hardcoded regex pattern is valid")]
144 regex::Regex::new(r"<[^>]+>").expect("valid hardcoded HTML tag pattern")
145 });
146 Ok(RE.replace_all(value, "").to_string())
147 }
148 Self::DecodeHtml => Ok(value
149 .replace("<", "<")
150 .replace(">", ">")
151 .replace("&", "&")
152 .replace(""", "\"")
153 .replace("'", "'")),
154 Self::ParseJson => serde_json::from_str::<Value>(value)
155 .map(|v| v.to_string())
156 .map_err(|e| crate::error::PluginError::InvalidTransformation(e.to_string())),
157 #[cfg(feature = "javascript")]
158 Self::JavaScript { code: _ } => Err(crate::error::PluginError::InvalidTransformation(
159 "JavaScript transformations not yet implemented".to_string(),
160 )),
161 }
162 }
163
164 pub fn apply_chain(transformations: &[Self], mut value: String) -> crate::Result<String> {
166 for transformation in transformations {
167 value = transformation.apply(&value)?;
168 }
169 Ok(value)
170 }
171}
172
173impl std::fmt::Display for Transformation {
174 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
175 match self {
176 Self::Trim => write!(f, "Trim"),
177 Self::Lowercase => write!(f, "Lowercase"),
178 Self::Uppercase => write!(f, "Uppercase"),
179 Self::RemoveWhitespace => write!(f, "RemoveWhitespace"),
180 Self::Regex { pattern, .. } => write!(f, "Regex({pattern})"),
181 Self::RegexExtract { pattern, group } => {
182 write!(f, "RegexExtract({pattern}, group {group})")
183 }
184 Self::Coerce { target_type } => write!(f, "Coerce({target_type})"),
185 Self::Filter { pattern } => write!(f, "Filter({pattern})"),
186 Self::NormalizeWhitespace => write!(f, "NormalizeWhitespace"),
187 Self::StripHtml => write!(f, "StripHtml"),
188 Self::DecodeHtml => write!(f, "DecodeHtml"),
189 Self::ParseJson => write!(f, "ParseJson"),
190 #[cfg(feature = "javascript")]
191 Self::JavaScript { .. } => write!(f, "JavaScript"),
192 }
193 }
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199
200 #[test]
201 fn test_trim() -> std::result::Result<(), Box<dyn std::error::Error>> {
202 let t = Transformation::Trim;
203 assert_eq!(t.apply(" hello ")?, "hello");
204 Ok(())
205 }
206
207 #[test]
208 fn test_lowercase() -> std::result::Result<(), Box<dyn std::error::Error>> {
209 let t = Transformation::Lowercase;
210 assert_eq!(t.apply("HELLO")?, "hello");
211 Ok(())
212 }
213
214 #[test]
215 fn test_regex_replace() -> std::result::Result<(), Box<dyn std::error::Error>> {
216 let t = Transformation::Regex {
217 pattern: r"(\d{3})-(\d{4})".to_string(),
218 replacement: "($1) $2".to_string(),
219 };
220 assert_eq!(t.apply("123-4567")?, "(123) 4567");
221 Ok(())
222 }
223
224 #[test]
225 fn test_regex_extract() -> std::result::Result<(), Box<dyn std::error::Error>> {
226 let t = Transformation::RegexExtract {
227 pattern: r"Price: \$(\d+\.\d{2})".to_string(),
228 group: 1,
229 };
230 assert_eq!(t.apply("Price: $19.99")?, "19.99");
231 Ok(())
232 }
233
234 #[test]
235 fn test_coerce_number() {
236 let t = Transformation::Coerce {
237 target_type: "number".to_string(),
238 };
239 assert!(t.apply("123.45").is_ok());
240 assert!(t.apply("not a number").is_err());
241 }
242
243 #[test]
244 fn test_normalize_whitespace() -> std::result::Result<(), Box<dyn std::error::Error>> {
245 let t = Transformation::NormalizeWhitespace;
246 assert_eq!(t.apply("hello world foo")?, "hello world foo");
247 Ok(())
248 }
249
250 #[test]
251 fn test_strip_html() -> std::result::Result<(), Box<dyn std::error::Error>> {
252 let t = Transformation::StripHtml;
253 assert_eq!(t.apply("<p>Hello <b>world</b></p>")?, "Hello world");
254 Ok(())
255 }
256
257 #[test]
258 fn test_transformation_chain() -> std::result::Result<(), Box<dyn std::error::Error>> {
259 let transforms = vec![
260 Transformation::StripHtml,
261 Transformation::Trim,
262 Transformation::NormalizeWhitespace,
263 ];
264 let result =
265 Transformation::apply_chain(&transforms, " <p>hello world</p> ".to_string())?;
266 assert_eq!(result, "hello world");
267 Ok(())
268 }
269}