Skip to main content

jpx_core/extensions/
validation.rs

1//! Data validation functions.
2
3use std::collections::HashSet;
4use std::sync::LazyLock;
5
6use regex::Regex;
7use serde_json::Value;
8
9use crate::functions::Function;
10use crate::interpreter::SearchResult;
11use crate::registry::register_if_enabled;
12use crate::{Context, Runtime, arg, defn};
13
14// Constant validation patterns, compiled once instead of on every call (these
15// were previously recompiled per invocation, e.g. once per element in a map).
16static EMAIL_RE: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap());
18static URL_RE: LazyLock<Regex> =
19    LazyLock::new(|| Regex::new(r"^https?://[^\s/$.?#].[^\s]*$").unwrap());
20static UUID_RE: LazyLock<Regex> = LazyLock::new(|| {
21    Regex::new(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
22        .unwrap()
23});
24static PHONE_RE: LazyLock<Regex> =
25    LazyLock::new(|| Regex::new(r"^\+?[\d\s\-\(\)\.]{7,}$").unwrap());
26
27/// Register validation functions filtered by the enabled set.
28pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
29    register_if_enabled(runtime, "is_email", enabled, Box::new(IsEmailFn::new()));
30    register_if_enabled(runtime, "is_url", enabled, Box::new(IsUrlFn::new()));
31    register_if_enabled(runtime, "is_uuid", enabled, Box::new(IsUuidFn::new()));
32    register_if_enabled(runtime, "is_phone", enabled, Box::new(IsPhoneFn::new()));
33    register_if_enabled(runtime, "is_ipv4", enabled, Box::new(IsIpv4Fn::new()));
34    register_if_enabled(runtime, "is_ipv6", enabled, Box::new(IsIpv6Fn::new()));
35    register_if_enabled(runtime, "luhn_check", enabled, Box::new(LuhnCheckFn::new()));
36    register_if_enabled(
37        runtime,
38        "is_credit_card",
39        enabled,
40        Box::new(IsCreditCardFn::new()),
41    );
42    register_if_enabled(runtime, "is_jwt", enabled, Box::new(IsJwtFn::new()));
43    register_if_enabled(
44        runtime,
45        "is_iso_date",
46        enabled,
47        Box::new(IsIsoDateFn::new()),
48    );
49    register_if_enabled(runtime, "is_json", enabled, Box::new(IsJsonFn::new()));
50    register_if_enabled(runtime, "is_base64", enabled, Box::new(IsBase64Fn::new()));
51    register_if_enabled(runtime, "is_hex", enabled, Box::new(IsHexFn::new()));
52}
53
54// =============================================================================
55// is_email(string) -> boolean
56// =============================================================================
57
58defn!(IsEmailFn, vec![arg!(string)], None);
59
60impl Function for IsEmailFn {
61    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
62        self.signature.validate(args, ctx)?;
63
64        let s = args[0].as_str().unwrap();
65
66        Ok(Value::Bool(EMAIL_RE.is_match(s)))
67    }
68}
69
70// =============================================================================
71// is_url(string) -> boolean
72// =============================================================================
73
74defn!(IsUrlFn, vec![arg!(string)], None);
75
76impl Function for IsUrlFn {
77    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
78        self.signature.validate(args, ctx)?;
79
80        let s = args[0].as_str().unwrap();
81
82        Ok(Value::Bool(URL_RE.is_match(s)))
83    }
84}
85
86// =============================================================================
87// is_uuid(string) -> boolean
88// =============================================================================
89
90defn!(IsUuidFn, vec![arg!(string)], None);
91
92impl Function for IsUuidFn {
93    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
94        self.signature.validate(args, ctx)?;
95
96        let s = args[0].as_str().unwrap();
97
98        Ok(Value::Bool(UUID_RE.is_match(s)))
99    }
100}
101
102// =============================================================================
103// is_ipv4(string) -> boolean
104// =============================================================================
105
106defn!(IsIpv4Fn, vec![arg!(string)], None);
107
108impl Function for IsIpv4Fn {
109    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
110        self.signature.validate(args, ctx)?;
111
112        let s = args[0].as_str().unwrap();
113
114        let is_valid = s.parse::<std::net::Ipv4Addr>().is_ok();
115        Ok(Value::Bool(is_valid))
116    }
117}
118
119// =============================================================================
120// is_ipv6(string) -> boolean
121// =============================================================================
122
123defn!(IsIpv6Fn, vec![arg!(string)], None);
124
125impl Function for IsIpv6Fn {
126    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
127        self.signature.validate(args, ctx)?;
128
129        let s = args[0].as_str().unwrap();
130
131        let is_valid = s.parse::<std::net::Ipv6Addr>().is_ok();
132        Ok(Value::Bool(is_valid))
133    }
134}
135
136// =============================================================================
137// luhn_check(string) -> boolean
138// =============================================================================
139
140defn!(LuhnCheckFn, vec![arg!(string)], None);
141
142impl Function for LuhnCheckFn {
143    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
144        self.signature.validate(args, ctx)?;
145
146        let s = args[0].as_str().unwrap();
147
148        Ok(Value::Bool(luhn_validate(s)))
149    }
150}
151
152fn luhn_validate(s: &str) -> bool {
153    // Remove spaces and dashes
154    let digits: String = s.chars().filter(|c| c.is_ascii_digit()).collect();
155
156    if digits.is_empty() {
157        return false;
158    }
159
160    let mut sum = 0;
161    let mut double = false;
162
163    for c in digits.chars().rev() {
164        if let Some(digit) = c.to_digit(10) {
165            let mut d = digit;
166            if double {
167                d *= 2;
168                if d > 9 {
169                    d -= 9;
170                }
171            }
172            sum += d;
173            double = !double;
174        } else {
175            return false;
176        }
177    }
178
179    sum % 10 == 0
180}
181
182// =============================================================================
183// is_credit_card(string) -> boolean
184// =============================================================================
185
186defn!(IsCreditCardFn, vec![arg!(string)], None);
187
188impl Function for IsCreditCardFn {
189    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
190        self.signature.validate(args, ctx)?;
191
192        let s = args[0].as_str().unwrap();
193
194        // Remove spaces and dashes
195        let digits: String = s.chars().filter(|c| c.is_ascii_digit()).collect();
196
197        // Credit cards are typically 13-19 digits
198        if digits.len() < 13 || digits.len() > 19 {
199            return Ok(Value::Bool(false));
200        }
201
202        // Must pass Luhn check
203        Ok(Value::Bool(luhn_validate(&digits)))
204    }
205}
206
207// =============================================================================
208// is_phone(string) -> boolean
209// =============================================================================
210
211defn!(IsPhoneFn, vec![arg!(string)], None);
212
213impl Function for IsPhoneFn {
214    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
215        self.signature.validate(args, ctx)?;
216
217        let s = args[0].as_str().unwrap();
218
219        // Basic phone pattern: optional + followed by digits, spaces, dashes, parens
220        // Minimum 7 digits for a valid phone number
221        if !PHONE_RE.is_match(s) {
222            return Ok(Value::Bool(false));
223        }
224
225        // Count actual digits - need at least 7
226        let digit_count = s.chars().filter(|c| c.is_ascii_digit()).count();
227        Ok(Value::Bool((7..=15).contains(&digit_count)))
228    }
229}
230
231// =============================================================================
232// is_jwt(string) -> boolean
233// =============================================================================
234
235defn!(IsJwtFn, vec![arg!(string)], None);
236
237impl Function for IsJwtFn {
238    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
239        self.signature.validate(args, ctx)?;
240
241        let s = args[0].as_str().unwrap();
242
243        // JWT has 3 base64url-encoded parts separated by dots
244        let parts: Vec<&str> = s.split('.').collect();
245        if parts.len() != 3 {
246            return Ok(Value::Bool(false));
247        }
248
249        // Check each part is valid base64url (alphanumeric, -, _, no padding required)
250        let is_valid = parts.iter().all(|part| {
251            !part.is_empty()
252                && part
253                    .chars()
254                    .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '=')
255        });
256
257        Ok(Value::Bool(is_valid))
258    }
259}
260
261// =============================================================================
262// is_iso_date(string) -> boolean
263// =============================================================================
264
265defn!(IsIsoDateFn, vec![arg!(string)], None);
266
267impl Function for IsIsoDateFn {
268    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
269        self.signature.validate(args, ctx)?;
270
271        let s = args[0].as_str().unwrap();
272
273        // Try parsing as RFC3339 (subset of ISO 8601)
274        if chrono::DateTime::parse_from_rfc3339(s).is_ok() {
275            return Ok(Value::Bool(true));
276        }
277
278        // Try parsing as date only (YYYY-MM-DD)
279        if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok() {
280            return Ok(Value::Bool(true));
281        }
282
283        // Try parsing as datetime without timezone
284        if chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").is_ok() {
285            return Ok(Value::Bool(true));
286        }
287
288        Ok(Value::Bool(false))
289    }
290}
291
292// =============================================================================
293// is_json(string) -> boolean
294// =============================================================================
295
296defn!(IsJsonFn, vec![arg!(string)], None);
297
298impl Function for IsJsonFn {
299    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
300        self.signature.validate(args, ctx)?;
301
302        let s = args[0].as_str().unwrap();
303
304        let is_valid = serde_json::from_str::<serde_json::Value>(s).is_ok();
305        Ok(Value::Bool(is_valid))
306    }
307}
308
309// =============================================================================
310// is_base64(string) -> boolean
311// =============================================================================
312
313defn!(IsBase64Fn, vec![arg!(string)], None);
314
315impl Function for IsBase64Fn {
316    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
317        self.signature.validate(args, ctx)?;
318
319        let s = args[0].as_str().unwrap();
320
321        use base64::{Engine, engine::general_purpose::STANDARD};
322        let is_valid = STANDARD.decode(s).is_ok();
323        Ok(Value::Bool(is_valid))
324    }
325}
326
327// =============================================================================
328// is_hex(string) -> boolean
329// =============================================================================
330
331defn!(IsHexFn, vec![arg!(string)], None);
332
333impl Function for IsHexFn {
334    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
335        self.signature.validate(args, ctx)?;
336
337        let s = args[0].as_str().unwrap();
338
339        // Must be non-empty and all hex chars
340        let is_valid = !s.is_empty() && s.chars().all(|c| c.is_ascii_hexdigit());
341        Ok(Value::Bool(is_valid))
342    }
343}
344
345#[cfg(test)]
346mod tests {
347    use crate::Runtime;
348    use serde_json::json;
349
350    fn setup_runtime() -> Runtime {
351        Runtime::builder()
352            .with_standard()
353            .with_all_extensions()
354            .build()
355    }
356
357    #[test]
358    fn test_is_ipv4() {
359        let runtime = setup_runtime();
360        let expr = runtime.compile("is_ipv4(@)").unwrap();
361
362        let data = json!("192.168.1.1");
363        let result = expr.search(&data).unwrap();
364        assert_eq!(result, json!(true));
365
366        let data = json!("not an ip");
367        let result = expr.search(&data).unwrap();
368        assert_eq!(result, json!(false));
369    }
370
371    #[test]
372    fn test_is_ipv6() {
373        let runtime = setup_runtime();
374        let expr = runtime.compile("is_ipv6(@)").unwrap();
375
376        let data = json!("::1");
377        let result = expr.search(&data).unwrap();
378        assert_eq!(result, json!(true));
379
380        let data = json!("2001:db8::1");
381        let result = expr.search(&data).unwrap();
382        assert_eq!(result, json!(true));
383    }
384
385    #[test]
386    fn test_is_email() {
387        let runtime = setup_runtime();
388        let expr = runtime.compile("is_email(@)").unwrap();
389
390        let data = json!("test@example.com");
391        let result = expr.search(&data).unwrap();
392        assert_eq!(result, json!(true));
393
394        let data = json!("not-an-email");
395        let result = expr.search(&data).unwrap();
396        assert_eq!(result, json!(false));
397    }
398
399    #[test]
400    fn test_luhn_check_valid() {
401        let runtime = setup_runtime();
402        let expr = runtime.compile("luhn_check(@)").unwrap();
403
404        // Valid Luhn number
405        let data = json!("79927398713");
406        let result = expr.search(&data).unwrap();
407        assert_eq!(result, json!(true));
408    }
409
410    #[test]
411    fn test_luhn_check_invalid() {
412        let runtime = setup_runtime();
413        let expr = runtime.compile("luhn_check(@)").unwrap();
414
415        let data = json!("79927398710");
416        let result = expr.search(&data).unwrap();
417        assert_eq!(result, json!(false));
418    }
419
420    #[test]
421    fn test_is_credit_card_valid() {
422        let runtime = setup_runtime();
423        let expr = runtime.compile("is_credit_card(@)").unwrap();
424
425        // Test Visa number (passes Luhn)
426        let data = json!("4111111111111111");
427        let result = expr.search(&data).unwrap();
428        assert_eq!(result, json!(true));
429    }
430
431    #[test]
432    fn test_is_credit_card_invalid() {
433        let runtime = setup_runtime();
434        let expr = runtime.compile("is_credit_card(@)").unwrap();
435
436        // Invalid number
437        let data = json!("1234567890123456");
438        let result = expr.search(&data).unwrap();
439        assert_eq!(result, json!(false));
440    }
441
442    #[test]
443    fn test_is_credit_card_too_short() {
444        let runtime = setup_runtime();
445        let expr = runtime.compile("is_credit_card(@)").unwrap();
446
447        let data = json!("123456");
448        let result = expr.search(&data).unwrap();
449        assert_eq!(result, json!(false));
450    }
451
452    #[test]
453    fn test_is_phone_valid() {
454        let runtime = setup_runtime();
455        let expr = runtime.compile("is_phone(@)").unwrap();
456
457        let data = json!("+1-555-123-4567");
458        let result = expr.search(&data).unwrap();
459        assert_eq!(result, json!(true));
460
461        let data = json!("(555) 123-4567");
462        let result = expr.search(&data).unwrap();
463        assert_eq!(result, json!(true));
464    }
465
466    #[test]
467    fn test_is_phone_invalid() {
468        let runtime = setup_runtime();
469        let expr = runtime.compile("is_phone(@)").unwrap();
470
471        let data = json!("123");
472        let result = expr.search(&data).unwrap();
473        assert_eq!(result, json!(false));
474    }
475
476    #[test]
477    fn test_is_jwt_valid() {
478        let runtime = setup_runtime();
479        let expr = runtime.compile("is_jwt(@)").unwrap();
480
481        let data = json!(
482            "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"
483        );
484        let result = expr.search(&data).unwrap();
485        assert_eq!(result, json!(true));
486    }
487
488    #[test]
489    fn test_is_jwt_invalid() {
490        let runtime = setup_runtime();
491        let expr = runtime.compile("is_jwt(@)").unwrap();
492
493        // Only two parts - invalid
494        let data = json!("only.twoparts");
495        let result = expr.search(&data).unwrap();
496        assert_eq!(result, json!(false));
497
498        // Contains invalid characters for base64url
499        let data = json!("abc.def!ghi.jkl");
500        let result = expr.search(&data).unwrap();
501        assert_eq!(result, json!(false));
502    }
503
504    #[test]
505    fn test_is_iso_date_valid() {
506        let runtime = setup_runtime();
507        let expr = runtime.compile("is_iso_date(@)").unwrap();
508
509        let data = json!("2023-12-13T15:30:00Z");
510        let result = expr.search(&data).unwrap();
511        assert_eq!(result, json!(true));
512
513        let data = json!("2023-12-13");
514        let result = expr.search(&data).unwrap();
515        assert_eq!(result, json!(true));
516    }
517
518    #[test]
519    fn test_is_iso_date_invalid() {
520        let runtime = setup_runtime();
521        let expr = runtime.compile("is_iso_date(@)").unwrap();
522
523        let data = json!("12/13/2023");
524        let result = expr.search(&data).unwrap();
525        assert_eq!(result, json!(false));
526    }
527
528    #[test]
529    fn test_is_json_valid() {
530        let runtime = setup_runtime();
531        let expr = runtime.compile("is_json(@)").unwrap();
532
533        let data = json!(r#"{"a": 1, "b": [2, 3]}"#);
534        let result = expr.search(&data).unwrap();
535        assert_eq!(result, json!(true));
536    }
537
538    #[test]
539    fn test_is_json_invalid() {
540        let runtime = setup_runtime();
541        let expr = runtime.compile("is_json(@)").unwrap();
542
543        let data = json!("not json");
544        let result = expr.search(&data).unwrap();
545        assert_eq!(result, json!(false));
546    }
547
548    #[test]
549    fn test_is_base64_valid() {
550        let runtime = setup_runtime();
551        let expr = runtime.compile("is_base64(@)").unwrap();
552
553        let data = json!("SGVsbG8gV29ybGQ=");
554        let result = expr.search(&data).unwrap();
555        assert_eq!(result, json!(true));
556    }
557
558    #[test]
559    fn test_is_base64_invalid() {
560        let runtime = setup_runtime();
561        let expr = runtime.compile("is_base64(@)").unwrap();
562
563        let data = json!("not valid base64!!!");
564        let result = expr.search(&data).unwrap();
565        assert_eq!(result, json!(false));
566    }
567
568    #[test]
569    fn test_is_hex_valid() {
570        let runtime = setup_runtime();
571        let expr = runtime.compile("is_hex(@)").unwrap();
572
573        let data = json!("deadbeef");
574        let result = expr.search(&data).unwrap();
575        assert_eq!(result, json!(true));
576
577        let data = json!("ABCDEF0123456789");
578        let result = expr.search(&data).unwrap();
579        assert_eq!(result, json!(true));
580    }
581
582    #[test]
583    fn test_is_hex_invalid() {
584        let runtime = setup_runtime();
585        let expr = runtime.compile("is_hex(@)").unwrap();
586
587        let data = json!("not hex!");
588        let result = expr.search(&data).unwrap();
589        assert_eq!(result, json!(false));
590
591        let data = json!("");
592        let result = expr.search(&data).unwrap();
593        assert_eq!(result, json!(false));
594    }
595}