Skip to main content

jpx_core/extensions/
encoding.rs

1//! Encoding and decoding functions.
2
3use std::collections::HashSet;
4
5use serde_json::Value;
6
7use crate::functions::Function;
8use crate::interpreter::SearchResult;
9use crate::registry::register_if_enabled;
10use crate::{Context, Runtime, arg, defn};
11
12use base64::{
13    Engine,
14    engine::general_purpose::{STANDARD as BASE64_STANDARD, URL_SAFE_NO_PAD as BASE64_URL_SAFE},
15};
16
17/// Register encoding functions with the runtime, filtered by the enabled set.
18pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
19    register_if_enabled(
20        runtime,
21        "base64_encode",
22        enabled,
23        Box::new(Base64EncodeFn::new()),
24    );
25    register_if_enabled(
26        runtime,
27        "base64_decode",
28        enabled,
29        Box::new(Base64DecodeFn::new()),
30    );
31    register_if_enabled(runtime, "hex_encode", enabled, Box::new(HexEncodeFn::new()));
32    register_if_enabled(runtime, "hex_decode", enabled, Box::new(HexDecodeFn::new()));
33    register_if_enabled(runtime, "jwt_decode", enabled, Box::new(JwtDecodeFn::new()));
34    register_if_enabled(runtime, "jwt_header", enabled, Box::new(JwtHeaderFn::new()));
35    register_if_enabled(
36        runtime,
37        "html_escape",
38        enabled,
39        Box::new(HtmlEscapeFn::new()),
40    );
41    register_if_enabled(
42        runtime,
43        "html_unescape",
44        enabled,
45        Box::new(HtmlUnescapeFn::new()),
46    );
47    register_if_enabled(
48        runtime,
49        "shell_escape",
50        enabled,
51        Box::new(ShellEscapeFn::new()),
52    );
53}
54
55// =============================================================================
56// base64_encode(string) -> string
57// =============================================================================
58
59defn!(Base64EncodeFn, vec![arg!(string)], None);
60
61impl Function for Base64EncodeFn {
62    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
63        self.signature.validate(args, ctx)?;
64
65        let input = args[0].as_str().ok_or_else(|| {
66            crate::JmespathError::from_ctx(
67                ctx,
68                crate::ErrorReason::Parse("Expected string argument".to_owned()),
69            )
70        })?;
71
72        let encoded = BASE64_STANDARD.encode(input.as_bytes());
73        Ok(Value::String(encoded))
74    }
75}
76
77// =============================================================================
78// base64_decode(string) -> string
79// =============================================================================
80
81defn!(Base64DecodeFn, vec![arg!(string)], None);
82
83impl Function for Base64DecodeFn {
84    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
85        self.signature.validate(args, ctx)?;
86
87        let input = args[0].as_str().ok_or_else(|| {
88            crate::JmespathError::from_ctx(
89                ctx,
90                crate::ErrorReason::Parse("Expected string argument".to_owned()),
91            )
92        })?;
93
94        match BASE64_STANDARD.decode(input.as_bytes()) {
95            Ok(decoded) => {
96                let s = String::from_utf8(decoded).map_err(|_| {
97                    crate::JmespathError::from_ctx(
98                        ctx,
99                        crate::ErrorReason::Parse("Decoded bytes are not valid UTF-8".to_owned()),
100                    )
101                })?;
102                Ok(Value::String(s))
103            }
104            Err(_) => Err(crate::JmespathError::from_ctx(
105                ctx,
106                crate::ErrorReason::Parse("Invalid base64 input".to_owned()),
107            )),
108        }
109    }
110}
111
112// =============================================================================
113// hex_encode(string) -> string
114// =============================================================================
115
116defn!(HexEncodeFn, vec![arg!(string)], None);
117
118impl Function for HexEncodeFn {
119    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
120        self.signature.validate(args, ctx)?;
121
122        let input = args[0].as_str().ok_or_else(|| {
123            crate::JmespathError::from_ctx(
124                ctx,
125                crate::ErrorReason::Parse("Expected string argument".to_owned()),
126            )
127        })?;
128
129        let encoded = hex::encode(input.as_bytes());
130        Ok(Value::String(encoded))
131    }
132}
133
134// =============================================================================
135// hex_decode(string) -> string
136// =============================================================================
137
138defn!(HexDecodeFn, vec![arg!(string)], None);
139
140impl Function for HexDecodeFn {
141    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
142        self.signature.validate(args, ctx)?;
143
144        let input = args[0].as_str().ok_or_else(|| {
145            crate::JmespathError::from_ctx(
146                ctx,
147                crate::ErrorReason::Parse("Expected string argument".to_owned()),
148            )
149        })?;
150
151        match hex::decode(input) {
152            Ok(decoded) => {
153                // Return null if decoded bytes are not valid UTF-8
154                match String::from_utf8(decoded) {
155                    Ok(s) => Ok(Value::String(s)),
156                    Err(_) => Ok(Value::Null),
157                }
158            }
159            // Return null for invalid hex input
160            Err(_) => Ok(Value::Null),
161        }
162    }
163}
164
165// =============================================================================
166// JWT Helper Functions
167// =============================================================================
168
169/// Decode a base64url-encoded JWT part (header or payload) to JSON
170fn decode_jwt_part(part: &str) -> Option<serde_json::Value> {
171    // JWT uses base64url encoding (no padding)
172    let decoded = BASE64_URL_SAFE.decode(part).ok()?;
173    let json_str = String::from_utf8(decoded).ok()?;
174    serde_json::from_str(&json_str).ok()
175}
176
177// =============================================================================
178// jwt_decode(token) -> object (JWT payload/claims)
179// =============================================================================
180
181defn!(JwtDecodeFn, vec![arg!(string)], None);
182
183impl Function for JwtDecodeFn {
184    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
185        self.signature.validate(args, ctx)?;
186
187        let token = args[0].as_str().ok_or_else(|| {
188            crate::JmespathError::from_ctx(
189                ctx,
190                crate::ErrorReason::Parse("Expected string argument".to_owned()),
191            )
192        })?;
193
194        // JWT format: header.payload.signature
195        let parts: Vec<&str> = token.split('.').collect();
196        if parts.len() != 3 {
197            return Ok(Value::Null);
198        }
199
200        // Decode the payload (second part)
201        match decode_jwt_part(parts[1]) {
202            Some(json) => Ok(json),
203            None => Ok(Value::Null),
204        }
205    }
206}
207
208// =============================================================================
209// jwt_header(token) -> object (JWT header)
210// =============================================================================
211
212defn!(JwtHeaderFn, vec![arg!(string)], None);
213
214impl Function for JwtHeaderFn {
215    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
216        self.signature.validate(args, ctx)?;
217
218        let token = args[0].as_str().ok_or_else(|| {
219            crate::JmespathError::from_ctx(
220                ctx,
221                crate::ErrorReason::Parse("Expected string argument".to_owned()),
222            )
223        })?;
224
225        // JWT format: header.payload.signature
226        let parts: Vec<&str> = token.split('.').collect();
227        if parts.len() != 3 {
228            return Ok(Value::Null);
229        }
230
231        // Decode the header (first part)
232        match decode_jwt_part(parts[0]) {
233            Some(json) => Ok(json),
234            None => Ok(Value::Null),
235        }
236    }
237}
238
239// =============================================================================
240// html_escape(string) -> string
241// =============================================================================
242
243defn!(HtmlEscapeFn, vec![arg!(string)], None);
244
245impl Function for HtmlEscapeFn {
246    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
247        self.signature.validate(args, ctx)?;
248
249        let s = args[0].as_str().ok_or_else(|| {
250            crate::JmespathError::from_ctx(
251                ctx,
252                crate::ErrorReason::Parse("Expected string argument".to_owned()),
253            )
254        })?;
255
256        let escaped = s
257            .replace('&', "&amp;")
258            .replace('<', "&lt;")
259            .replace('>', "&gt;")
260            .replace('"', "&quot;")
261            .replace('\'', "&#x27;");
262
263        Ok(Value::String(escaped))
264    }
265}
266
267// =============================================================================
268// html_unescape(string) -> string
269// =============================================================================
270
271defn!(HtmlUnescapeFn, vec![arg!(string)], None);
272
273impl Function for HtmlUnescapeFn {
274    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
275        self.signature.validate(args, ctx)?;
276
277        let s = args[0].as_str().ok_or_else(|| {
278            crate::JmespathError::from_ctx(
279                ctx,
280                crate::ErrorReason::Parse("Expected string argument".to_owned()),
281            )
282        })?;
283
284        // Order matters: decode &amp; last to avoid double-decoding
285        let unescaped = s
286            .replace("&#x27;", "'")
287            .replace("&#39;", "'")
288            .replace("&apos;", "'")
289            .replace("&quot;", "\"")
290            .replace("&gt;", ">")
291            .replace("&lt;", "<")
292            .replace("&amp;", "&");
293
294        Ok(Value::String(unescaped))
295    }
296}
297
298// =============================================================================
299// shell_escape(string) -> string
300// =============================================================================
301
302defn!(ShellEscapeFn, vec![arg!(string)], None);
303
304impl Function for ShellEscapeFn {
305    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
306        self.signature.validate(args, ctx)?;
307
308        let s = args[0].as_str().ok_or_else(|| {
309            crate::JmespathError::from_ctx(
310                ctx,
311                crate::ErrorReason::Parse("Expected string argument".to_owned()),
312            )
313        })?;
314
315        // Shell escaping: wrap in single quotes and escape internal single quotes
316        // The pattern is: replace ' with '\'' (end quote, escaped quote, start quote)
317        let escaped = format!("'{}'", s.replace('\'', "'\\''"));
318
319        Ok(Value::String(escaped))
320    }
321}
322
323#[cfg(test)]
324mod tests {
325    use crate::Runtime;
326    use serde_json::json;
327
328    fn setup_runtime() -> Runtime {
329        Runtime::builder()
330            .with_standard()
331            .with_all_extensions()
332            .build()
333    }
334
335    #[test]
336    fn test_base64_encode() {
337        let runtime = setup_runtime();
338        let expr = runtime.compile("base64_encode(@)").unwrap();
339        let data = json!("hello");
340        let result = expr.search(&data).unwrap();
341        assert_eq!(result, json!("aGVsbG8="));
342    }
343
344    #[test]
345    fn test_base64_decode() {
346        let runtime = setup_runtime();
347        let expr = runtime.compile("base64_decode(@)").unwrap();
348        let data = json!("aGVsbG8=");
349        let result = expr.search(&data).unwrap();
350        assert_eq!(result, json!("hello"));
351    }
352
353    #[test]
354    fn test_hex_encode() {
355        let runtime = setup_runtime();
356        let expr = runtime.compile("hex_encode(@)").unwrap();
357        let data = json!("hello");
358        let result = expr.search(&data).unwrap();
359        assert_eq!(result, json!("68656c6c6f"));
360    }
361
362    #[test]
363    fn test_hex_decode() {
364        let runtime = setup_runtime();
365        let expr = runtime.compile("hex_decode(@)").unwrap();
366        let data = json!("68656c6c6f");
367        let result = expr.search(&data).unwrap();
368        assert_eq!(result, json!("hello"));
369    }
370
371    #[test]
372    fn test_hex_decode_invalid_returns_null() {
373        let runtime = setup_runtime();
374        let expr = runtime.compile("hex_decode(@)").unwrap();
375        let data = json!("invalid");
376        let result = expr.search(&data).unwrap();
377        assert_eq!(result, json!(null));
378    }
379
380    #[test]
381    fn test_hex_decode_odd_length_returns_null() {
382        let runtime = setup_runtime();
383        let expr = runtime.compile("hex_decode(@)").unwrap();
384        let data = json!("123");
385        let result = expr.search(&data).unwrap();
386        assert_eq!(result, json!(null));
387    }
388
389    // =========================================================================
390    // JWT function tests
391    // =========================================================================
392
393    // Test JWT from jwt.io: {"sub": "1234567890", "name": "John Doe", "iat": 1516239022}
394    const TEST_JWT: &str = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
395
396    #[test]
397    fn test_jwt_decode_payload() {
398        let runtime = setup_runtime();
399        let expr = runtime.compile("jwt_decode(@)").unwrap();
400        let data = json!(TEST_JWT);
401        let result = expr.search(&data).unwrap();
402
403        // Check it's an object with expected claims
404        assert_eq!(result["sub"], json!("1234567890"));
405        assert_eq!(result["name"], json!("John Doe"));
406        assert_eq!(result["iat"], json!(1516239022));
407    }
408
409    #[test]
410    fn test_jwt_decode_extract_claim() {
411        let runtime = setup_runtime();
412        let expr = runtime.compile("jwt_decode(@).sub").unwrap();
413        let data = json!(TEST_JWT);
414        let result = expr.search(&data).unwrap();
415        assert_eq!(result, json!("1234567890"));
416    }
417
418    #[test]
419    fn test_jwt_header() {
420        let runtime = setup_runtime();
421        let expr = runtime.compile("jwt_header(@)").unwrap();
422        let data = json!(TEST_JWT);
423        let result = expr.search(&data).unwrap();
424
425        // Check header fields
426        assert_eq!(result["alg"], json!("HS256"));
427        assert_eq!(result["typ"], json!("JWT"));
428    }
429
430    #[test]
431    fn test_jwt_header_extract_alg() {
432        let runtime = setup_runtime();
433        let expr = runtime.compile("jwt_header(@).alg").unwrap();
434        let data = json!(TEST_JWT);
435        let result = expr.search(&data).unwrap();
436        assert_eq!(result, json!("HS256"));
437    }
438
439    #[test]
440    fn test_jwt_decode_invalid_format() {
441        let runtime = setup_runtime();
442        let expr = runtime.compile("jwt_decode(@)").unwrap();
443
444        // Not a valid JWT (no dots)
445        let data = json!("not-a-jwt");
446        let result = expr.search(&data).unwrap();
447        assert_eq!(result, json!(null));
448
449        // Only two parts
450        let data = json!("part1.part2");
451        let result = expr.search(&data).unwrap();
452        assert_eq!(result, json!(null));
453    }
454
455    #[test]
456    fn test_jwt_decode_invalid_base64() {
457        let runtime = setup_runtime();
458        let expr = runtime.compile("jwt_decode(@)").unwrap();
459
460        // Three parts but invalid base64
461        let data = json!("!!!.@@@.###");
462        let result = expr.search(&data).unwrap();
463        assert_eq!(result, json!(null));
464    }
465
466    #[test]
467    fn test_jwt_decode_invalid_json() {
468        let runtime = setup_runtime();
469        let expr = runtime.compile("jwt_decode(@)").unwrap();
470
471        // Valid base64 but not valid JSON - "not json" encoded
472        let data = json!("eyJhbGciOiJIUzI1NiJ9.bm90IGpzb24.sig");
473        let result = expr.search(&data).unwrap();
474        assert_eq!(result, json!(null));
475    }
476
477    #[test]
478    fn test_html_escape_basic() {
479        let runtime = setup_runtime();
480        let expr = runtime.compile("html_escape(@)").unwrap();
481        let data = json!("<div class=\"test\">Hello & goodbye</div>");
482        let result = expr.search(&data).unwrap();
483        assert_eq!(
484            result,
485            json!("&lt;div class=&quot;test&quot;&gt;Hello &amp; goodbye&lt;/div&gt;")
486        );
487    }
488
489    #[test]
490    fn test_html_escape_quotes() {
491        let runtime = setup_runtime();
492        let expr = runtime.compile("html_escape(@)").unwrap();
493        let data = json!("It's a \"test\"");
494        let result = expr.search(&data).unwrap();
495        assert_eq!(result, json!("It&#x27;s a &quot;test&quot;"));
496    }
497
498    #[test]
499    fn test_html_escape_no_change() {
500        let runtime = setup_runtime();
501        let expr = runtime.compile("html_escape(@)").unwrap();
502        let data = json!("Hello World");
503        let result = expr.search(&data).unwrap();
504        assert_eq!(result, json!("Hello World"));
505    }
506
507    #[test]
508    fn test_html_unescape_basic() {
509        let runtime = setup_runtime();
510        let expr = runtime.compile("html_unescape(@)").unwrap();
511        let data = json!("&lt;div class=&quot;test&quot;&gt;Hello &amp; goodbye&lt;/div&gt;");
512        let result = expr.search(&data).unwrap();
513        assert_eq!(result, json!("<div class=\"test\">Hello & goodbye</div>"));
514    }
515
516    #[test]
517    fn test_html_unescape_quotes() {
518        let runtime = setup_runtime();
519        let expr = runtime.compile("html_unescape(@)").unwrap();
520        let data = json!("It&#x27;s a &quot;test&quot;");
521        let result = expr.search(&data).unwrap();
522        assert_eq!(result, json!("It's a \"test\""));
523    }
524
525    #[test]
526    fn test_html_roundtrip() {
527        let runtime = setup_runtime();
528        let escape = runtime.compile("html_escape(@)").unwrap();
529        let unescape = runtime.compile("html_unescape(@)").unwrap();
530        let original = "<script>alert('xss')</script>";
531        let data = json!(original);
532        let escaped = escape.search(&data).unwrap();
533        let roundtrip = unescape.search(&escaped).unwrap();
534        assert_eq!(roundtrip, json!(original));
535    }
536
537    #[test]
538    fn test_shell_escape_simple() {
539        let runtime = setup_runtime();
540        let expr = runtime.compile("shell_escape(@)").unwrap();
541        let data = json!("hello world");
542        let result = expr.search(&data).unwrap();
543        assert_eq!(result, json!("'hello world'"));
544    }
545
546    #[test]
547    fn test_shell_escape_with_single_quote() {
548        let runtime = setup_runtime();
549        let expr = runtime.compile("shell_escape(@)").unwrap();
550        let data = json!("it's here");
551        let result = expr.search(&data).unwrap();
552        assert_eq!(result, json!("'it'\\''s here'"));
553    }
554
555    #[test]
556    fn test_shell_escape_special_chars() {
557        let runtime = setup_runtime();
558        let expr = runtime.compile("shell_escape(@)").unwrap();
559        let data = json!("$HOME; rm -rf /");
560        let result = expr.search(&data).unwrap();
561        // Should be safely quoted
562        assert_eq!(result, json!("'$HOME; rm -rf /'"));
563    }
564
565    #[test]
566    fn test_shell_escape_empty() {
567        let runtime = setup_runtime();
568        let expr = runtime.compile("shell_escape(@)").unwrap();
569        let data = json!("");
570        let result = expr.search(&data).unwrap();
571        assert_eq!(result, json!("''"));
572    }
573
574    #[test]
575    fn test_shell_escape_multiple_quotes() {
576        let runtime = setup_runtime();
577        let expr = runtime.compile("shell_escape(@)").unwrap();
578        let data = json!("don't say 'hello'");
579        let result = expr.search(&data).unwrap();
580        assert_eq!(result, json!("'don'\\''t say '\\''hello'\\'''"));
581    }
582}