Skip to main content

jpx_core/extensions/
url_fns.rs

1//! URL parsing and manipulation functions.
2
3use std::collections::HashSet;
4
5use form_urlencoded;
6use serde_json::Value;
7
8use crate::functions::{Function, custom_error};
9use crate::interpreter::SearchResult;
10use crate::registry::register_if_enabled;
11use crate::{Context, Runtime, arg, defn};
12
13/// Register URL functions with the runtime, filtered by the enabled set.
14pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
15    register_if_enabled(runtime, "url_encode", enabled, Box::new(UrlEncodeFn::new()));
16    register_if_enabled(runtime, "url_decode", enabled, Box::new(UrlDecodeFn::new()));
17    register_if_enabled(runtime, "url_parse", enabled, Box::new(UrlParseFn::new()));
18    register_if_enabled(runtime, "url_build", enabled, Box::new(UrlBuildFn::new()));
19    register_if_enabled(
20        runtime,
21        "query_string_parse",
22        enabled,
23        Box::new(QueryStringParseFn::new()),
24    );
25    register_if_enabled(
26        runtime,
27        "query_string_build",
28        enabled,
29        Box::new(QueryStringBuildFn::new()),
30    );
31}
32
33// =============================================================================
34// url_encode(string) -> string
35// =============================================================================
36
37defn!(UrlEncodeFn, vec![arg!(string)], None);
38
39impl Function for UrlEncodeFn {
40    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
41        self.signature.validate(args, ctx)?;
42
43        let input = args[0].as_str().ok_or_else(|| {
44            crate::JmespathError::from_ctx(
45                ctx,
46                crate::ErrorReason::Parse("Expected string argument".to_owned()),
47            )
48        })?;
49
50        let encoded = urlencoding::encode(input);
51        Ok(Value::String(encoded.into_owned()))
52    }
53}
54
55// =============================================================================
56// url_decode(string) -> string
57// =============================================================================
58
59defn!(UrlDecodeFn, vec![arg!(string)], None);
60
61impl Function for UrlDecodeFn {
62    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
63        self.signature.validate(args, ctx)?;
64
65        let input = args[0].as_str().ok_or_else(|| {
66            crate::JmespathError::from_ctx(
67                ctx,
68                crate::ErrorReason::Parse("Expected string argument".to_owned()),
69            )
70        })?;
71
72        match urlencoding::decode(input) {
73            Ok(decoded) => Ok(Value::String(decoded.into_owned())),
74            Err(_) => Err(crate::JmespathError::from_ctx(
75                ctx,
76                crate::ErrorReason::Parse("Invalid URL-encoded input".to_owned()),
77            )),
78        }
79    }
80}
81
82// =============================================================================
83// url_parse(string) -> object (parse URL into components)
84// =============================================================================
85
86defn!(UrlParseFn, vec![arg!(string)], None);
87
88impl Function for UrlParseFn {
89    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
90        self.signature.validate(args, ctx)?;
91
92        let input = args[0].as_str().ok_or_else(|| {
93            crate::JmespathError::from_ctx(
94                ctx,
95                crate::ErrorReason::Parse("Expected string argument".to_owned()),
96            )
97        })?;
98
99        match url::Url::parse(input) {
100            Ok(parsed) => {
101                let mut result = serde_json::Map::new();
102
103                result.insert(
104                    "scheme".to_string(),
105                    Value::String(parsed.scheme().to_string()),
106                );
107
108                if let Some(host) = parsed.host_str() {
109                    result.insert("host".to_string(), Value::String(host.to_string()));
110                } else {
111                    result.insert("host".to_string(), Value::Null);
112                }
113
114                if let Some(port) = parsed.port() {
115                    result.insert(
116                        "port".to_string(),
117                        Value::Number(serde_json::Number::from(port)),
118                    );
119                } else {
120                    result.insert("port".to_string(), Value::Null);
121                }
122
123                result.insert("path".to_string(), Value::String(parsed.path().to_string()));
124
125                if let Some(query) = parsed.query() {
126                    result.insert("query".to_string(), Value::String(query.to_string()));
127                } else {
128                    result.insert("query".to_string(), Value::Null);
129                }
130
131                if let Some(fragment) = parsed.fragment() {
132                    result.insert("fragment".to_string(), Value::String(fragment.to_string()));
133                } else {
134                    result.insert("fragment".to_string(), Value::Null);
135                }
136
137                if !parsed.username().is_empty() {
138                    result.insert(
139                        "username".to_string(),
140                        Value::String(parsed.username().to_string()),
141                    );
142                }
143
144                if let Some(password) = parsed.password() {
145                    result.insert("password".to_string(), Value::String(password.to_string()));
146                }
147
148                // Add origin field (scheme + host + port)
149                let origin = parsed.origin().ascii_serialization();
150                result.insert("origin".to_string(), Value::String(origin));
151
152                Ok(Value::Object(result))
153            }
154            // Return null for invalid URLs instead of an error
155            Err(_) => Ok(Value::Null),
156        }
157    }
158}
159
160// =============================================================================
161// url_build(object) -> string (build URL from components)
162// =============================================================================
163
164defn!(UrlBuildFn, vec![arg!(object)], None);
165
166impl Function for UrlBuildFn {
167    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
168        self.signature.validate(args, ctx)?;
169
170        let obj = args[0]
171            .as_object()
172            .ok_or_else(|| custom_error(ctx, "Expected object argument"))?;
173
174        let scheme = obj
175            .get("scheme")
176            .and_then(|v| v.as_str())
177            .ok_or_else(|| custom_error(ctx, "url_build: 'scheme' is required"))?;
178
179        let host = obj
180            .get("host")
181            .and_then(|v| v.as_str())
182            .ok_or_else(|| custom_error(ctx, "url_build: 'host' is required"))?;
183
184        let base = format!("{scheme}://{host}");
185        let mut url = url::Url::parse(&base)
186            .map_err(|e| custom_error(ctx, &format!("url_build: invalid scheme/host: {e}")))?;
187
188        if let Some(port) = obj.get("port")
189            && let Some(p) = port.as_u64()
190        {
191            url.set_port(Some(p as u16))
192                .map_err(|()| custom_error(ctx, "url_build: cannot set port on this URL"))?;
193        }
194
195        if let Some(path) = obj.get("path").and_then(|v| v.as_str()) {
196            url.set_path(path);
197        }
198
199        if let Some(query) = obj.get("query").and_then(|v| v.as_str()) {
200            url.set_query(Some(query));
201        }
202
203        if let Some(fragment) = obj.get("fragment").and_then(|v| v.as_str()) {
204            url.set_fragment(Some(fragment));
205        }
206
207        if let Some(username) = obj.get("username").and_then(|v| v.as_str()) {
208            url.set_username(username)
209                .map_err(|()| custom_error(ctx, "url_build: cannot set username on this URL"))?;
210        }
211
212        if let Some(password) = obj.get("password").and_then(|v| v.as_str()) {
213            url.set_password(Some(password))
214                .map_err(|()| custom_error(ctx, "url_build: cannot set password on this URL"))?;
215        }
216
217        Ok(Value::String(url.to_string()))
218    }
219}
220
221// =============================================================================
222// query_string_parse(string) -> object
223// =============================================================================
224
225defn!(QueryStringParseFn, vec![arg!(string)], None);
226
227impl Function for QueryStringParseFn {
228    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
229        self.signature.validate(args, ctx)?;
230
231        let input = args[0]
232            .as_str()
233            .ok_or_else(|| custom_error(ctx, "Expected string argument"))?;
234
235        let mut map = serde_json::Map::new();
236        for (key, value) in form_urlencoded::parse(input.as_bytes()) {
237            map.insert(key.into_owned(), Value::String(value.into_owned()));
238        }
239
240        Ok(Value::Object(map))
241    }
242}
243
244// =============================================================================
245// query_string_build(object) -> string
246// =============================================================================
247
248defn!(QueryStringBuildFn, vec![arg!(object)], None);
249
250impl Function for QueryStringBuildFn {
251    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
252        self.signature.validate(args, ctx)?;
253
254        let obj = args[0]
255            .as_object()
256            .ok_or_else(|| custom_error(ctx, "Expected object argument"))?;
257
258        let mut serializer = form_urlencoded::Serializer::new(String::new());
259        for (key, value) in obj {
260            let val_str = match value {
261                Value::String(s) => s.clone(),
262                Value::Number(n) => n.to_string(),
263                Value::Bool(b) => b.to_string(),
264                Value::Null => "null".to_string(),
265                _ => serde_json::to_string(value).unwrap_or_default(),
266            };
267            serializer.append_pair(key, &val_str);
268        }
269
270        Ok(Value::String(serializer.finish()))
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use crate::Runtime;
277    use serde_json::json;
278
279    fn setup_runtime() -> Runtime {
280        Runtime::builder()
281            .with_standard()
282            .with_all_extensions()
283            .build()
284    }
285
286    #[test]
287    fn test_url_encode() {
288        let runtime = setup_runtime();
289        let expr = runtime.compile("url_encode(@)").unwrap();
290        let data = json!("hello world");
291        let result = expr.search(&data).unwrap();
292        assert_eq!(result.as_str().unwrap(), "hello%20world");
293    }
294
295    #[test]
296    fn test_url_decode() {
297        let runtime = setup_runtime();
298        let expr = runtime.compile("url_decode(@)").unwrap();
299        let data = json!("hello%20world");
300        let result = expr.search(&data).unwrap();
301        assert_eq!(result.as_str().unwrap(), "hello world");
302    }
303
304    #[test]
305    fn test_url_parse() {
306        let runtime = setup_runtime();
307        let expr = runtime.compile("url_parse(@)").unwrap();
308        let data = json!("https://example.com:8080/path?query=1#frag");
309        let result = expr.search(&data).unwrap();
310        let obj = result.as_object().unwrap();
311        assert_eq!(obj.get("scheme").unwrap().as_str().unwrap(), "https");
312        assert_eq!(obj.get("host").unwrap().as_str().unwrap(), "example.com");
313        assert_eq!(obj.get("port").unwrap().as_f64().unwrap() as u16, 8080);
314    }
315
316    #[test]
317    fn test_url_parse_origin() {
318        let runtime = setup_runtime();
319        let expr = runtime.compile("url_parse(@)").unwrap();
320        let data = json!("https://example.com:8080/path");
321        let result = expr.search(&data).unwrap();
322        let obj = result.as_object().unwrap();
323        assert_eq!(
324            obj.get("origin").unwrap().as_str().unwrap(),
325            "https://example.com:8080"
326        );
327    }
328
329    #[test]
330    fn test_url_parse_invalid_returns_null() {
331        let runtime = setup_runtime();
332        let expr = runtime.compile("url_parse(@)").unwrap();
333        let data = json!("not a valid url");
334        let result = expr.search(&data).unwrap();
335        assert!(result.is_null());
336    }
337
338    // url_build tests
339
340    #[test]
341    fn test_url_build_minimal() {
342        let runtime = setup_runtime();
343        let expr = runtime.compile("url_build(@)").unwrap();
344        let data = json!({"scheme": "https", "host": "example.com"});
345        let result = expr.search(&data).unwrap();
346        assert_eq!(result.as_str().unwrap(), "https://example.com/");
347    }
348
349    #[test]
350    fn test_url_build_full() {
351        let runtime = setup_runtime();
352        let expr = runtime.compile("url_build(@)").unwrap();
353        let data = json!({
354            "scheme": "https",
355            "host": "example.com",
356            "port": 8080,
357            "path": "/api/v1",
358            "query": "key=value",
359            "fragment": "section",
360            "username": "user",
361            "password": "pass"
362        });
363        let result = expr.search(&data).unwrap();
364        assert_eq!(
365            result.as_str().unwrap(),
366            "https://user:pass@example.com:8080/api/v1?key=value#section"
367        );
368    }
369
370    #[test]
371    fn test_url_build_roundtrip() {
372        let runtime = setup_runtime();
373        let original = "https://example.com:8080/path?q=1#frag";
374        let parse_expr = runtime.compile("url_parse(@)").unwrap();
375        let parsed = parse_expr.search(&json!(original)).unwrap();
376
377        let build_expr = runtime.compile("url_build(@)").unwrap();
378        let rebuilt = build_expr.search(&parsed).unwrap();
379        assert_eq!(rebuilt.as_str().unwrap(), original);
380    }
381
382    // query_string_parse tests
383
384    #[test]
385    fn test_query_string_parse_basic() {
386        let runtime = setup_runtime();
387        let expr = runtime.compile("query_string_parse(@)").unwrap();
388        let data = json!("foo=bar&baz=qux");
389        let result = expr.search(&data).unwrap();
390        let obj = result.as_object().unwrap();
391        assert_eq!(obj.get("foo").unwrap().as_str().unwrap(), "bar");
392        assert_eq!(obj.get("baz").unwrap().as_str().unwrap(), "qux");
393    }
394
395    #[test]
396    fn test_query_string_parse_encoded() {
397        let runtime = setup_runtime();
398        let expr = runtime.compile("query_string_parse(@)").unwrap();
399        let data = json!("greeting=hello%20world&special=a%2Bb");
400        let result = expr.search(&data).unwrap();
401        let obj = result.as_object().unwrap();
402        assert_eq!(
403            obj.get("greeting").unwrap().as_str().unwrap(),
404            "hello world"
405        );
406        assert_eq!(obj.get("special").unwrap().as_str().unwrap(), "a+b");
407    }
408
409    #[test]
410    fn test_query_string_parse_empty() {
411        let runtime = setup_runtime();
412        let expr = runtime.compile("query_string_parse(@)").unwrap();
413        let data = json!("");
414        let result = expr.search(&data).unwrap();
415        let obj = result.as_object().unwrap();
416        assert!(obj.is_empty());
417    }
418
419    // query_string_build tests
420
421    #[test]
422    fn test_query_string_build_basic() {
423        let runtime = setup_runtime();
424        let expr = runtime.compile("query_string_build(@)").unwrap();
425        let data = json!({"foo": "bar", "baz": "qux"});
426        let result = expr.search(&data).unwrap();
427        let qs = result.as_str().unwrap();
428        // Object key order is deterministic in serde_json
429        assert!(qs.contains("foo=bar"));
430        assert!(qs.contains("baz=qux"));
431    }
432
433    #[test]
434    fn test_query_string_build_special_chars() {
435        let runtime = setup_runtime();
436        let expr = runtime.compile("query_string_build(@)").unwrap();
437        let data = json!({"greeting": "hello world", "op": "a+b"});
438        let result = expr.search(&data).unwrap();
439        let qs = result.as_str().unwrap();
440        assert!(qs.contains("greeting=hello+world"));
441        assert!(qs.contains("op=a%2Bb"));
442    }
443
444    #[test]
445    fn test_query_string_build_empty() {
446        let runtime = setup_runtime();
447        let expr = runtime.compile("query_string_build(@)").unwrap();
448        let data = json!({});
449        let result = expr.search(&data).unwrap();
450        assert_eq!(result.as_str().unwrap(), "");
451    }
452
453    #[test]
454    fn test_url_encode_special_chars() {
455        let runtime = setup_runtime();
456        let expr = runtime.compile("url_encode(@)").unwrap();
457
458        let result = expr.search(&json!("a&b=c")).unwrap();
459        assert_eq!(result.as_str().unwrap(), "a%26b%3Dc");
460
461        let result = expr.search(&json!("foo/bar?baz")).unwrap();
462        assert_eq!(result.as_str().unwrap(), "foo%2Fbar%3Fbaz");
463    }
464
465    #[test]
466    fn test_url_decode_passthrough() {
467        let runtime = setup_runtime();
468        let expr = runtime.compile("url_decode(@)").unwrap();
469
470        // Plain text without percent-encoding passes through
471        let result = expr.search(&json!("hello")).unwrap();
472        assert_eq!(result.as_str().unwrap(), "hello");
473    }
474
475    #[test]
476    fn test_url_encode_decode_roundtrip() {
477        let runtime = setup_runtime();
478        let data = json!("hello world & goodbye=yes");
479        let encode = runtime.compile("url_encode(@)").unwrap();
480        let encoded = encode.search(&data).unwrap();
481
482        let decode = runtime.compile("url_decode(@)").unwrap();
483        let decoded = decode.search(&encoded).unwrap();
484        assert_eq!(decoded.as_str().unwrap(), "hello world & goodbye=yes");
485    }
486
487    #[test]
488    fn test_url_parse_no_port() {
489        let runtime = setup_runtime();
490        let expr = runtime.compile("url_parse(@)").unwrap();
491        let data = json!("https://example.com/path");
492        let result = expr.search(&data).unwrap();
493        let obj = result.as_object().unwrap();
494        assert_eq!(obj.get("host").unwrap().as_str().unwrap(), "example.com");
495        assert!(obj.get("port").unwrap().is_null());
496        assert_eq!(obj.get("path").unwrap().as_str().unwrap(), "/path");
497    }
498
499    #[test]
500    fn test_url_parse_query_and_fragment() {
501        let runtime = setup_runtime();
502        let expr = runtime.compile("url_parse(@)").unwrap();
503        let data = json!("https://example.com/path?key=val#section");
504        let result = expr.search(&data).unwrap();
505        let obj = result.as_object().unwrap();
506        assert_eq!(obj.get("query").unwrap().as_str().unwrap(), "key=val");
507        assert_eq!(obj.get("fragment").unwrap().as_str().unwrap(), "section");
508    }
509
510    #[test]
511    fn test_query_string_parse_no_value() {
512        let runtime = setup_runtime();
513        let expr = runtime.compile("query_string_parse(@)").unwrap();
514        // Key with no value
515        let data = json!("flag&key=value");
516        let result = expr.search(&data).unwrap();
517        let obj = result.as_object().unwrap();
518        assert_eq!(obj.get("flag").unwrap().as_str().unwrap(), "");
519        assert_eq!(obj.get("key").unwrap().as_str().unwrap(), "value");
520    }
521
522    #[test]
523    fn test_query_string_roundtrip() {
524        let runtime = setup_runtime();
525        let original = json!({"name": "John Doe", "age": "30"});
526        let build = runtime.compile("query_string_build(@)").unwrap();
527        let qs = build.search(&original).unwrap();
528
529        let parse = runtime.compile("query_string_parse(@)").unwrap();
530        let parsed = parse.search(&qs).unwrap();
531        let obj = parsed.as_object().unwrap();
532        assert_eq!(obj.get("name").unwrap().as_str().unwrap(), "John Doe");
533        assert_eq!(obj.get("age").unwrap().as_str().unwrap(), "30");
534    }
535}