Skip to main content

jpx_core/extensions/
validation.rs

1//! Data validation functions.
2
3use std::collections::HashSet;
4
5use regex::Regex;
6use serde_json::Value;
7
8use crate::functions::Function;
9use crate::interpreter::SearchResult;
10use crate::registry::register_if_enabled;
11use crate::{Context, Runtime, arg, defn};
12
13/// Register validation functions filtered by the enabled set.
14pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
15    register_if_enabled(runtime, "is_email", enabled, Box::new(IsEmailFn::new()));
16    register_if_enabled(runtime, "is_url", enabled, Box::new(IsUrlFn::new()));
17    register_if_enabled(runtime, "is_uuid", enabled, Box::new(IsUuidFn::new()));
18    register_if_enabled(runtime, "is_phone", enabled, Box::new(IsPhoneFn::new()));
19    register_if_enabled(runtime, "is_ipv4", enabled, Box::new(IsIpv4Fn::new()));
20    register_if_enabled(runtime, "is_ipv6", enabled, Box::new(IsIpv6Fn::new()));
21    register_if_enabled(runtime, "luhn_check", enabled, Box::new(LuhnCheckFn::new()));
22    register_if_enabled(
23        runtime,
24        "is_credit_card",
25        enabled,
26        Box::new(IsCreditCardFn::new()),
27    );
28    register_if_enabled(runtime, "is_jwt", enabled, Box::new(IsJwtFn::new()));
29    register_if_enabled(
30        runtime,
31        "is_iso_date",
32        enabled,
33        Box::new(IsIsoDateFn::new()),
34    );
35    register_if_enabled(runtime, "is_json", enabled, Box::new(IsJsonFn::new()));
36    register_if_enabled(runtime, "is_base64", enabled, Box::new(IsBase64Fn::new()));
37    register_if_enabled(runtime, "is_hex", enabled, Box::new(IsHexFn::new()));
38}
39
40// =============================================================================
41// is_email(string) -> boolean
42// =============================================================================
43
44defn!(IsEmailFn, vec![arg!(string)], None);
45
46impl Function for IsEmailFn {
47    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
48        self.signature.validate(args, ctx)?;
49
50        let s = args[0].as_str().unwrap();
51
52        let email_re = Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap();
53        Ok(Value::Bool(email_re.is_match(s)))
54    }
55}
56
57// =============================================================================
58// is_url(string) -> boolean
59// =============================================================================
60
61defn!(IsUrlFn, vec![arg!(string)], None);
62
63impl Function for IsUrlFn {
64    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
65        self.signature.validate(args, ctx)?;
66
67        let s = args[0].as_str().unwrap();
68
69        let url_re = Regex::new(r"^https?://[^\s/$.?#].[^\s]*$").unwrap();
70        Ok(Value::Bool(url_re.is_match(s)))
71    }
72}
73
74// =============================================================================
75// is_uuid(string) -> boolean
76// =============================================================================
77
78defn!(IsUuidFn, vec![arg!(string)], None);
79
80impl Function for IsUuidFn {
81    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
82        self.signature.validate(args, ctx)?;
83
84        let s = args[0].as_str().unwrap();
85
86        let uuid_re = Regex::new(
87            r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$",
88        )
89        .unwrap();
90        Ok(Value::Bool(uuid_re.is_match(s)))
91    }
92}
93
94// =============================================================================
95// is_ipv4(string) -> boolean
96// =============================================================================
97
98defn!(IsIpv4Fn, vec![arg!(string)], None);
99
100impl Function for IsIpv4Fn {
101    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
102        self.signature.validate(args, ctx)?;
103
104        let s = args[0].as_str().unwrap();
105
106        let is_valid = s.parse::<std::net::Ipv4Addr>().is_ok();
107        Ok(Value::Bool(is_valid))
108    }
109}
110
111// =============================================================================
112// is_ipv6(string) -> boolean
113// =============================================================================
114
115defn!(IsIpv6Fn, vec![arg!(string)], None);
116
117impl Function for IsIpv6Fn {
118    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
119        self.signature.validate(args, ctx)?;
120
121        let s = args[0].as_str().unwrap();
122
123        let is_valid = s.parse::<std::net::Ipv6Addr>().is_ok();
124        Ok(Value::Bool(is_valid))
125    }
126}
127
128// =============================================================================
129// luhn_check(string) -> boolean
130// =============================================================================
131
132defn!(LuhnCheckFn, vec![arg!(string)], None);
133
134impl Function for LuhnCheckFn {
135    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
136        self.signature.validate(args, ctx)?;
137
138        let s = args[0].as_str().unwrap();
139
140        Ok(Value::Bool(luhn_validate(s)))
141    }
142}
143
144fn luhn_validate(s: &str) -> bool {
145    // Remove spaces and dashes
146    let digits: String = s.chars().filter(|c| c.is_ascii_digit()).collect();
147
148    if digits.is_empty() {
149        return false;
150    }
151
152    let mut sum = 0;
153    let mut double = false;
154
155    for c in digits.chars().rev() {
156        if let Some(digit) = c.to_digit(10) {
157            let mut d = digit;
158            if double {
159                d *= 2;
160                if d > 9 {
161                    d -= 9;
162                }
163            }
164            sum += d;
165            double = !double;
166        } else {
167            return false;
168        }
169    }
170
171    sum % 10 == 0
172}
173
174// =============================================================================
175// is_credit_card(string) -> boolean
176// =============================================================================
177
178defn!(IsCreditCardFn, vec![arg!(string)], None);
179
180impl Function for IsCreditCardFn {
181    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
182        self.signature.validate(args, ctx)?;
183
184        let s = args[0].as_str().unwrap();
185
186        // Remove spaces and dashes
187        let digits: String = s.chars().filter(|c| c.is_ascii_digit()).collect();
188
189        // Credit cards are typically 13-19 digits
190        if digits.len() < 13 || digits.len() > 19 {
191            return Ok(Value::Bool(false));
192        }
193
194        // Must pass Luhn check
195        Ok(Value::Bool(luhn_validate(&digits)))
196    }
197}
198
199// =============================================================================
200// is_phone(string) -> boolean
201// =============================================================================
202
203defn!(IsPhoneFn, vec![arg!(string)], None);
204
205impl Function for IsPhoneFn {
206    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
207        self.signature.validate(args, ctx)?;
208
209        let s = args[0].as_str().unwrap();
210
211        // Basic phone pattern: optional + followed by digits, spaces, dashes, parens
212        // Minimum 7 digits for a valid phone number
213        let phone_re = Regex::new(r"^\+?[\d\s\-\(\)\.]{7,}$").unwrap();
214        if !phone_re.is_match(s) {
215            return Ok(Value::Bool(false));
216        }
217
218        // Count actual digits - need at least 7
219        let digit_count = s.chars().filter(|c| c.is_ascii_digit()).count();
220        Ok(Value::Bool((7..=15).contains(&digit_count)))
221    }
222}
223
224// =============================================================================
225// is_jwt(string) -> boolean
226// =============================================================================
227
228defn!(IsJwtFn, vec![arg!(string)], None);
229
230impl Function for IsJwtFn {
231    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
232        self.signature.validate(args, ctx)?;
233
234        let s = args[0].as_str().unwrap();
235
236        // JWT has 3 base64url-encoded parts separated by dots
237        let parts: Vec<&str> = s.split('.').collect();
238        if parts.len() != 3 {
239            return Ok(Value::Bool(false));
240        }
241
242        // Check each part is valid base64url (alphanumeric, -, _, no padding required)
243        let is_valid = parts.iter().all(|part| {
244            !part.is_empty()
245                && part
246                    .chars()
247                    .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '=')
248        });
249
250        Ok(Value::Bool(is_valid))
251    }
252}
253
254// =============================================================================
255// is_iso_date(string) -> boolean
256// =============================================================================
257
258defn!(IsIsoDateFn, vec![arg!(string)], None);
259
260impl Function for IsIsoDateFn {
261    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
262        self.signature.validate(args, ctx)?;
263
264        let s = args[0].as_str().unwrap();
265
266        // Try parsing as RFC3339 (subset of ISO 8601)
267        if chrono::DateTime::parse_from_rfc3339(s).is_ok() {
268            return Ok(Value::Bool(true));
269        }
270
271        // Try parsing as date only (YYYY-MM-DD)
272        if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok() {
273            return Ok(Value::Bool(true));
274        }
275
276        // Try parsing as datetime without timezone
277        if chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S").is_ok() {
278            return Ok(Value::Bool(true));
279        }
280
281        Ok(Value::Bool(false))
282    }
283}
284
285// =============================================================================
286// is_json(string) -> boolean
287// =============================================================================
288
289defn!(IsJsonFn, vec![arg!(string)], None);
290
291impl Function for IsJsonFn {
292    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
293        self.signature.validate(args, ctx)?;
294
295        let s = args[0].as_str().unwrap();
296
297        let is_valid = serde_json::from_str::<serde_json::Value>(s).is_ok();
298        Ok(Value::Bool(is_valid))
299    }
300}
301
302// =============================================================================
303// is_base64(string) -> boolean
304// =============================================================================
305
306defn!(IsBase64Fn, vec![arg!(string)], None);
307
308impl Function for IsBase64Fn {
309    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
310        self.signature.validate(args, ctx)?;
311
312        let s = args[0].as_str().unwrap();
313
314        use base64::{Engine, engine::general_purpose::STANDARD};
315        let is_valid = STANDARD.decode(s).is_ok();
316        Ok(Value::Bool(is_valid))
317    }
318}
319
320// =============================================================================
321// is_hex(string) -> boolean
322// =============================================================================
323
324defn!(IsHexFn, vec![arg!(string)], None);
325
326impl Function for IsHexFn {
327    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
328        self.signature.validate(args, ctx)?;
329
330        let s = args[0].as_str().unwrap();
331
332        // Must be non-empty and all hex chars
333        let is_valid = !s.is_empty() && s.chars().all(|c| c.is_ascii_hexdigit());
334        Ok(Value::Bool(is_valid))
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use crate::Runtime;
341    use serde_json::json;
342
343    fn setup_runtime() -> Runtime {
344        Runtime::builder()
345            .with_standard()
346            .with_all_extensions()
347            .build()
348    }
349
350    #[test]
351    fn test_is_ipv4() {
352        let runtime = setup_runtime();
353        let expr = runtime.compile("is_ipv4(@)").unwrap();
354
355        let data = json!("192.168.1.1");
356        let result = expr.search(&data).unwrap();
357        assert_eq!(result, json!(true));
358
359        let data = json!("not an ip");
360        let result = expr.search(&data).unwrap();
361        assert_eq!(result, json!(false));
362    }
363
364    #[test]
365    fn test_is_ipv6() {
366        let runtime = setup_runtime();
367        let expr = runtime.compile("is_ipv6(@)").unwrap();
368
369        let data = json!("::1");
370        let result = expr.search(&data).unwrap();
371        assert_eq!(result, json!(true));
372
373        let data = json!("2001:db8::1");
374        let result = expr.search(&data).unwrap();
375        assert_eq!(result, json!(true));
376    }
377
378    #[test]
379    fn test_is_email() {
380        let runtime = setup_runtime();
381        let expr = runtime.compile("is_email(@)").unwrap();
382
383        let data = json!("test@example.com");
384        let result = expr.search(&data).unwrap();
385        assert_eq!(result, json!(true));
386
387        let data = json!("not-an-email");
388        let result = expr.search(&data).unwrap();
389        assert_eq!(result, json!(false));
390    }
391
392    #[test]
393    fn test_luhn_check_valid() {
394        let runtime = setup_runtime();
395        let expr = runtime.compile("luhn_check(@)").unwrap();
396
397        // Valid Luhn number
398        let data = json!("79927398713");
399        let result = expr.search(&data).unwrap();
400        assert_eq!(result, json!(true));
401    }
402
403    #[test]
404    fn test_luhn_check_invalid() {
405        let runtime = setup_runtime();
406        let expr = runtime.compile("luhn_check(@)").unwrap();
407
408        let data = json!("79927398710");
409        let result = expr.search(&data).unwrap();
410        assert_eq!(result, json!(false));
411    }
412
413    #[test]
414    fn test_is_credit_card_valid() {
415        let runtime = setup_runtime();
416        let expr = runtime.compile("is_credit_card(@)").unwrap();
417
418        // Test Visa number (passes Luhn)
419        let data = json!("4111111111111111");
420        let result = expr.search(&data).unwrap();
421        assert_eq!(result, json!(true));
422    }
423
424    #[test]
425    fn test_is_credit_card_invalid() {
426        let runtime = setup_runtime();
427        let expr = runtime.compile("is_credit_card(@)").unwrap();
428
429        // Invalid number
430        let data = json!("1234567890123456");
431        let result = expr.search(&data).unwrap();
432        assert_eq!(result, json!(false));
433    }
434
435    #[test]
436    fn test_is_credit_card_too_short() {
437        let runtime = setup_runtime();
438        let expr = runtime.compile("is_credit_card(@)").unwrap();
439
440        let data = json!("123456");
441        let result = expr.search(&data).unwrap();
442        assert_eq!(result, json!(false));
443    }
444
445    #[test]
446    fn test_is_phone_valid() {
447        let runtime = setup_runtime();
448        let expr = runtime.compile("is_phone(@)").unwrap();
449
450        let data = json!("+1-555-123-4567");
451        let result = expr.search(&data).unwrap();
452        assert_eq!(result, json!(true));
453
454        let data = json!("(555) 123-4567");
455        let result = expr.search(&data).unwrap();
456        assert_eq!(result, json!(true));
457    }
458
459    #[test]
460    fn test_is_phone_invalid() {
461        let runtime = setup_runtime();
462        let expr = runtime.compile("is_phone(@)").unwrap();
463
464        let data = json!("123");
465        let result = expr.search(&data).unwrap();
466        assert_eq!(result, json!(false));
467    }
468
469    #[test]
470    fn test_is_jwt_valid() {
471        let runtime = setup_runtime();
472        let expr = runtime.compile("is_jwt(@)").unwrap();
473
474        let data = json!(
475            "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"
476        );
477        let result = expr.search(&data).unwrap();
478        assert_eq!(result, json!(true));
479    }
480
481    #[test]
482    fn test_is_jwt_invalid() {
483        let runtime = setup_runtime();
484        let expr = runtime.compile("is_jwt(@)").unwrap();
485
486        // Only two parts - invalid
487        let data = json!("only.twoparts");
488        let result = expr.search(&data).unwrap();
489        assert_eq!(result, json!(false));
490
491        // Contains invalid characters for base64url
492        let data = json!("abc.def!ghi.jkl");
493        let result = expr.search(&data).unwrap();
494        assert_eq!(result, json!(false));
495    }
496
497    #[test]
498    fn test_is_iso_date_valid() {
499        let runtime = setup_runtime();
500        let expr = runtime.compile("is_iso_date(@)").unwrap();
501
502        let data = json!("2023-12-13T15:30:00Z");
503        let result = expr.search(&data).unwrap();
504        assert_eq!(result, json!(true));
505
506        let data = json!("2023-12-13");
507        let result = expr.search(&data).unwrap();
508        assert_eq!(result, json!(true));
509    }
510
511    #[test]
512    fn test_is_iso_date_invalid() {
513        let runtime = setup_runtime();
514        let expr = runtime.compile("is_iso_date(@)").unwrap();
515
516        let data = json!("12/13/2023");
517        let result = expr.search(&data).unwrap();
518        assert_eq!(result, json!(false));
519    }
520
521    #[test]
522    fn test_is_json_valid() {
523        let runtime = setup_runtime();
524        let expr = runtime.compile("is_json(@)").unwrap();
525
526        let data = json!(r#"{"a": 1, "b": [2, 3]}"#);
527        let result = expr.search(&data).unwrap();
528        assert_eq!(result, json!(true));
529    }
530
531    #[test]
532    fn test_is_json_invalid() {
533        let runtime = setup_runtime();
534        let expr = runtime.compile("is_json(@)").unwrap();
535
536        let data = json!("not json");
537        let result = expr.search(&data).unwrap();
538        assert_eq!(result, json!(false));
539    }
540
541    #[test]
542    fn test_is_base64_valid() {
543        let runtime = setup_runtime();
544        let expr = runtime.compile("is_base64(@)").unwrap();
545
546        let data = json!("SGVsbG8gV29ybGQ=");
547        let result = expr.search(&data).unwrap();
548        assert_eq!(result, json!(true));
549    }
550
551    #[test]
552    fn test_is_base64_invalid() {
553        let runtime = setup_runtime();
554        let expr = runtime.compile("is_base64(@)").unwrap();
555
556        let data = json!("not valid base64!!!");
557        let result = expr.search(&data).unwrap();
558        assert_eq!(result, json!(false));
559    }
560
561    #[test]
562    fn test_is_hex_valid() {
563        let runtime = setup_runtime();
564        let expr = runtime.compile("is_hex(@)").unwrap();
565
566        let data = json!("deadbeef");
567        let result = expr.search(&data).unwrap();
568        assert_eq!(result, json!(true));
569
570        let data = json!("ABCDEF0123456789");
571        let result = expr.search(&data).unwrap();
572        assert_eq!(result, json!(true));
573    }
574
575    #[test]
576    fn test_is_hex_invalid() {
577        let runtime = setup_runtime();
578        let expr = runtime.compile("is_hex(@)").unwrap();
579
580        let data = json!("not hex!");
581        let result = expr.search(&data).unwrap();
582        assert_eq!(result, json!(false));
583
584        let data = json!("");
585        let result = expr.search(&data).unwrap();
586        assert_eq!(result, json!(false));
587    }
588}