boon/
formats.rs

1use std::{
2    collections::HashMap,
3    error::Error,
4    net::{Ipv4Addr, Ipv6Addr},
5};
6
7use once_cell::sync::Lazy;
8use percent_encoding::percent_decode_str;
9use serde_json::Value;
10use url::Url;
11
12use crate::ecma;
13
14/// Defines format for `format` keyword.
15#[derive(Clone, Copy)]
16pub struct Format {
17    /// Name of the format
18    pub name: &'static str,
19
20    /// validates given value.
21    pub func: fn(v: &Value) -> Result<(), Box<dyn Error>>,
22}
23
24pub(crate) static FORMATS: Lazy<HashMap<&'static str, Format>> = Lazy::new(|| {
25    let mut m = HashMap::<&'static str, Format>::new();
26    let mut register = |name, func| m.insert(name, Format { name, func });
27    register("regex", validate_regex);
28    register("ipv4", validate_ipv4);
29    register("ipv6", validate_ipv6);
30    register("hostname", validate_hostname);
31    register("idn-hostname", validate_idn_hostname);
32    register("email", validate_email);
33    register("idn-email", validate_idn_email);
34    register("date", validate_date);
35    register("time", validate_time);
36    register("date-time", validate_date_time);
37    register("duration", validate_duration);
38    register("period", validate_period);
39    register("json-pointer", validate_json_pointer);
40    register("relative-json-pointer", validate_relative_json_pointer);
41    register("uuid", validate_uuid);
42    register("uri", validate_uri);
43    register("iri", validate_iri);
44    register("uri-reference", validate_uri_reference);
45    register("iri-reference", validate_iri_reference);
46    register("uri-template", validate_uri_template);
47    m
48});
49
50fn validate_regex(v: &Value) -> Result<(), Box<dyn Error>> {
51    let Value::String(s) = v else {
52        return Ok(());
53    };
54    ecma::convert(s).map(|_| ())
55}
56
57fn validate_ipv4(v: &Value) -> Result<(), Box<dyn Error>> {
58    let Value::String(s) = v else {
59        return Ok(());
60    };
61    s.parse::<Ipv4Addr>()?;
62    Ok(())
63}
64
65fn validate_ipv6(v: &Value) -> Result<(), Box<dyn Error>> {
66    let Value::String(s) = v else {
67        return Ok(());
68    };
69    s.parse::<Ipv6Addr>()?;
70    Ok(())
71}
72
73fn validate_date(v: &Value) -> Result<(), Box<dyn Error>> {
74    let Value::String(s) = v else {
75        return Ok(());
76    };
77    check_date(s)
78}
79
80fn matches_char(s: &str, index: usize, ch: char) -> bool {
81    s.is_char_boundary(index) && s[index..].starts_with(ch)
82}
83
84// see https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
85fn check_date(s: &str) -> Result<(), Box<dyn Error>> {
86    // yyyy-mm-dd
87    if s.len() != 10 {
88        Err("must be 10 characters long")?;
89    }
90    if !matches_char(s, 4, '-') || !matches_char(s, 7, '-') {
91        Err("missing hyphen in correct place")?;
92    }
93
94    let mut ymd = s.splitn(3, '-').filter_map(|t| t.parse::<usize>().ok());
95    let (Some(y), Some(m), Some(d)) = (ymd.next(), ymd.next(), ymd.next()) else {
96        Err("non-positive year/month/day")?
97    };
98
99    if !matches!(m, 1..=12) {
100        Err(format!("{m} months in year"))?;
101    }
102    if !matches!(d, 1..=31) {
103        Err(format!("{d} days in month"))?;
104    }
105
106    match m {
107        2 => {
108            let mut feb_days = 28;
109            if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
110                feb_days += 1; // leap year
111            };
112            if d > feb_days {
113                Err(format!("february has {feb_days} days only"))?;
114            }
115        }
116        4 | 6 | 9 | 11 => {
117            if d > 30 {
118                Err("month has 30 days only")?;
119            }
120        }
121        _ => {}
122    }
123    Ok(())
124}
125
126fn validate_time(v: &Value) -> Result<(), Box<dyn Error>> {
127    let Value::String(s) = v else {
128        return Ok(());
129    };
130    check_time(s)
131}
132
133fn check_time(mut str: &str) -> Result<(), Box<dyn Error>> {
134    // min: hh:mm:ssZ
135    if str.len() < 9 {
136        Err("less than 9 characters long")?
137    }
138    if !matches_char(str, 2, ':') || !matches_char(str, 5, ':') {
139        Err("missing colon in correct place")?
140    }
141
142    // parse hh:mm:ss
143    if !str.is_char_boundary(8) {
144        Err("contains non-ascii char")?
145    }
146    let mut hms = (str[..8])
147        .splitn(3, ':')
148        .filter_map(|t| t.parse::<usize>().ok());
149    let (Some(mut h), Some(mut m), Some(s)) = (hms.next(), hms.next(), hms.next()) else {
150        Err("non-positive hour/min/sec")?
151    };
152    if h > 23 || m > 59 || s > 60 {
153        Err("hour/min/sec out of range")?
154    }
155    str = &str[8..];
156
157    // parse sec-frac if present
158    if let Some(rem) = str.strip_prefix('.') {
159        let n_digits = rem.chars().take_while(char::is_ascii_digit).count();
160        if n_digits == 0 {
161            Err("no digits in second fraction")?;
162        }
163        str = &rem[n_digits..];
164    }
165
166    if str != "z" && str != "Z" {
167        // parse time-numoffset
168        if str.len() != 6 {
169            Err("offset must be 6 characters long")?;
170        }
171        let sign: isize = match str.chars().next() {
172            Some('+') => -1,
173            Some('-') => 1,
174            _ => return Err("offset must begin with plus/minus")?,
175        };
176        str = &str[1..];
177        if !matches_char(str, 2, ':') {
178            Err("missing colon in offset at correct place")?
179        }
180
181        let mut zhm = str.splitn(2, ':').filter_map(|t| t.parse::<usize>().ok());
182        let (Some(zh), Some(zm)) = (zhm.next(), zhm.next()) else {
183            Err("non-positive hour/min in offset")?
184        };
185        if zh > 23 || zm > 59 {
186            Err("hour/min in offset out of range")?
187        }
188
189        // apply timezone
190        let mut hm = (h * 60 + m) as isize + sign * (zh * 60 + zm) as isize;
191        if hm < 0 {
192            hm += 24 * 60;
193            debug_assert!(hm >= 0);
194        }
195        let hm = hm as usize;
196        (h, m) = (hm / 60, hm % 60);
197    }
198
199    // check leap second
200    if !(s < 60 || (h == 23 && m == 59)) {
201        Err("invalid leap second")?
202    }
203    Ok(())
204}
205
206fn validate_date_time(v: &Value) -> Result<(), Box<dyn Error>> {
207    let Value::String(s) = v else {
208        return Ok(());
209    };
210    check_date_time(s)
211}
212
213fn check_date_time(s: &str) -> Result<(), Box<dyn Error>> {
214    // min: yyyy-mm-ddThh:mm:ssZ
215    if s.len() < 20 {
216        Err("less than 20 characters long")?;
217    }
218    if !s.is_char_boundary(10) || !s[10..].starts_with(['t', 'T']) {
219        Err("11th character must be t or T")?;
220    }
221    if let Err(e) = check_date(&s[..10]) {
222        Err(format!("invalid date element: {e}"))?;
223    }
224    if let Err(e) = check_time(&s[11..]) {
225        Err(format!("invalid time element: {e}"))?;
226    }
227    Ok(())
228}
229
230fn validate_duration(v: &Value) -> Result<(), Box<dyn Error>> {
231    let Value::String(s) = v else {
232        return Ok(());
233    };
234    check_duration(s)
235}
236
237// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
238fn check_duration(s: &str) -> Result<(), Box<dyn Error>> {
239    // must start with 'P'
240    let Some(s) = s.strip_prefix('P') else {
241        Err("must start with P")?
242    };
243    if s.is_empty() {
244        Err("nothing after P")?
245    }
246
247    // dur-week
248    if let Some(s) = s.strip_suffix('W') {
249        if s.is_empty() {
250            Err("no number in week")?
251        }
252        if !s.chars().all(|c| c.is_ascii_digit()) {
253            Err("invalid week")?
254        }
255        return Ok(());
256    }
257
258    static UNITS: [&str; 2] = ["YMD", "HMS"];
259    for (i, s) in s.split('T').enumerate() {
260        let mut s = s;
261        if i != 0 && s.is_empty() {
262            Err("no time elements")?
263        }
264        let Some(mut units) = UNITS.get(i).cloned() else {
265            Err("more than one T")?
266        };
267        while !s.is_empty() {
268            let digit_count = s.chars().take_while(char::is_ascii_digit).count();
269            if digit_count == 0 {
270                Err("missing number")?
271            }
272            s = &s[digit_count..];
273            let Some(unit) = s.chars().next() else {
274                Err("missing unit")?
275            };
276            let Some(j) = units.find(unit) else {
277                if UNITS[i].contains(unit) {
278                    Err(format!("unit {unit} out of order"))?
279                }
280                Err(format!("invalid unit {unit}"))?
281            };
282            units = &units[j + 1..];
283            s = &s[1..];
284        }
285    }
286
287    Ok(())
288}
289
290// see https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
291fn validate_period(v: &Value) -> Result<(), Box<dyn Error>> {
292    let Value::String(s) = v else {
293        return Ok(());
294    };
295
296    let Some(slash) = s.find('/') else {
297        Err("missing slash")?
298    };
299
300    let (start, end) = (&s[..slash], &s[slash + 1..]);
301    if start.starts_with('P') {
302        if let Err(e) = check_duration(start) {
303            Err(format!("invalid start duration: {e}"))?
304        }
305        if let Err(e) = check_date_time(end) {
306            Err(format!("invalid end date-time: {e}"))?
307        }
308    } else {
309        if let Err(e) = check_date_time(start) {
310            Err(format!("invalid start date-time: {e}"))?
311        }
312        if end.starts_with('P') {
313            if let Err(e) = check_duration(end) {
314                Err(format!("invalid end duration: {e}"))?;
315            }
316        } else if let Err(e) = check_date_time(end) {
317            Err(format!("invalid end date-time: {e}"))?;
318        }
319    }
320    Ok(())
321}
322
323fn validate_hostname(v: &Value) -> Result<(), Box<dyn Error>> {
324    let Value::String(s) = v else {
325        return Ok(());
326    };
327    check_hostname(s)
328}
329
330// see https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
331fn check_hostname(mut s: &str) -> Result<(), Box<dyn Error>> {
332    // entire hostname (including the delimiting dots but not a trailing dot) has a maximum of 253 ASCII characters
333    s = s.strip_suffix('.').unwrap_or(s);
334    if s.len() > 253 {
335        Err("more than 253 characters long")?
336    }
337
338    // Hostnames are composed of series of labels concatenated with dots, as are all domain names
339    for label in s.split('.') {
340        // Each label must be from 1 to 63 characters long
341        if !matches!(label.len(), 1..=63) {
342            Err("label must be 1 to 63 characters long")?;
343        }
344
345        // labels must not start or end with a hyphen
346        if label.starts_with('-') {
347            Err("label starts with hyphen")?;
348        }
349
350        if label.ends_with('-') {
351            Err("label ends with hyphen")?;
352        }
353
354        // labels may contain only the ASCII letters 'a' through 'z' (in a case-insensitive manner),
355        // the digits '0' through '9', and the hyphen ('-')
356        if let Some(ch) = label
357            .chars()
358            .find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-'))
359        {
360            Err(format!("invalid character {ch:?}"))?;
361        }
362    }
363
364    Ok(())
365}
366
367fn validate_idn_hostname(v: &Value) -> Result<(), Box<dyn Error>> {
368    let Value::String(s) = v else {
369        return Ok(());
370    };
371    check_idn_hostname(s)
372}
373
374fn check_idn_hostname(s: &str) -> Result<(), Box<dyn Error>> {
375    let s = idna::domain_to_ascii_strict(s)?;
376    let unicode = idna::domain_to_unicode(&s).0;
377
378    // see https://www.rfc-editor.org/rfc/rfc5892#section-2.6
379    {
380        static DISALLOWED: [char; 10] = [
381            '\u{0640}', //  ARABIC TATWEEL
382            '\u{07FA}', //  NKO LAJANYALAN
383            '\u{302E}', //  HANGUL SINGLE DOT TONE MARK
384            '\u{302F}', //  HANGUL DOUBLE DOT TONE MARK
385            '\u{3031}', //  VERTICAL KANA REPEAT MARK
386            '\u{3032}', //  VERTICAL KANA REPEAT WITH VOICED SOUND MARK
387            '\u{3033}', //  VERTICAL KANA REPEAT MARK UPPER HALF
388            '\u{3034}', //  VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HA
389            '\u{3035}', //  VERTICAL KANA REPEAT MARK LOWER HALF
390            '\u{303B}', //  VERTICAL IDEOGRAPHIC ITERATION MARK
391        ];
392        if unicode.contains(DISALLOWED) {
393            Err("contains disallowed character")?;
394        }
395    }
396
397    // unicode string must not contain "--" in 3rd and 4th position
398    // and must not start and end with a '-'
399    // see https://www.rfc-editor.org/rfc/rfc5891#section-4.2.3.1
400    {
401        let count: usize = unicode
402            .chars()
403            .skip(2)
404            .take(2)
405            .map(|c| if c == '-' { 1 } else { 0 })
406            .sum();
407        if count == 2 {
408            Err("unicode string must not contain '--' in 3rd and 4th position")?;
409        }
410    }
411
412    // MIDDLE DOT is allowed between 'l' characters only
413    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.3
414    {
415        let middle_dot = '\u{00b7}';
416        let mut s = unicode.as_str();
417        while let Some(i) = s.find(middle_dot) {
418            let prefix = &s[..i];
419            let suffix = &s[i + middle_dot.len_utf8()..];
420            if !prefix.ends_with('l') || !suffix.ends_with('l') {
421                Err("MIDDLE DOT is allowed between 'l' characters only")?;
422            }
423            s = suffix;
424        }
425    }
426
427    // Greek KERAIA must be followed by Greek character
428    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.4
429    {
430        let keralia = '\u{0375}';
431        let greek = '\u{0370}'..='\u{03FF}';
432        let mut s = unicode.as_str();
433        while let Some(i) = s.find(keralia) {
434            let suffix = &s[i + keralia.len_utf8()..];
435            if !suffix.starts_with(|c| greek.contains(&c)) {
436                Err("Greek KERAIA must be followed by Greek character")?;
437            }
438            s = suffix;
439        }
440    }
441
442    // Hebrew GERESH must be preceded by Hebrew character
443    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.5
444    //
445    // Hebrew GERSHAYIM must be preceded by Hebrew character
446    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.6
447    {
448        let geresh = '\u{05F3}';
449        let gereshayim = '\u{05F4}';
450        let hebrew = '\u{0590}'..='\u{05FF}';
451        for ch in [geresh, gereshayim] {
452            let mut s = unicode.as_str();
453            while let Some(i) = s.find(ch) {
454                let prefix = &s[..i];
455                let suffix = &s[i + ch.len_utf8()..];
456                if !prefix.ends_with(|c| hebrew.contains(&c)) {
457                    if i == 0 {
458                        Err("Hebrew GERESH must be preceded by Hebrew character")?;
459                    } else {
460                        Err("Hebrew GERESHYIM must be preceded by Hebrew character")?;
461                    }
462                }
463                s = suffix;
464            }
465        }
466    }
467
468    // KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han
469    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.7
470    {
471        let katakana_middle_dot = '\u{30FB}';
472        let hiragana = '\u{3040}'..='\u{309F}';
473        let katakana = '\u{30A0}'..='\u{30FF}';
474        let han = '\u{4E00}'..='\u{9FFF}'; // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block): is this range correct??
475        if unicode.contains(katakana_middle_dot) {
476            if unicode.contains(|c| hiragana.contains(&c))
477                || unicode.contains(|c| c != katakana_middle_dot && katakana.contains(&c))
478                || unicode.contains(|c| han.contains(&c))
479            {
480                // ok
481            } else {
482                Err("KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han")?;
483            }
484        }
485    }
486
487    // ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed
488    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.8
489    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.9
490    {
491        let arabic_indic_digits = '\u{0660}'..='\u{0669}';
492        let extended_arabic_indic_digits = '\u{06F0}'..='\u{06F9}';
493        if unicode.contains(|c| arabic_indic_digits.contains(&c))
494            && unicode.contains(|c| extended_arabic_indic_digits.contains(&c))
495        {
496            Err("ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed")?;
497        }
498    }
499
500    // ZERO WIDTH JOINER must be preceded by Virama
501    // see https://www.rfc-editor.org/rfc/rfc5892#appendix-A.2
502    {
503        let zero_width_jointer = '\u{200D}';
504        static VIRAMA: [char; 61] = [
505            '\u{094D}',
506            '\u{09CD}',
507            '\u{0A4D}',
508            '\u{0ACD}',
509            '\u{0B4D}',
510            '\u{0BCD}',
511            '\u{0C4D}',
512            '\u{0CCD}',
513            '\u{0D3B}',
514            '\u{0D3C}',
515            '\u{0D4D}',
516            '\u{0DCA}',
517            '\u{0E3A}',
518            '\u{0EBA}',
519            '\u{0F84}',
520            '\u{1039}',
521            '\u{103A}',
522            '\u{1714}',
523            '\u{1734}',
524            '\u{17D2}',
525            '\u{1A60}',
526            '\u{1B44}',
527            '\u{1BAA}',
528            '\u{1BAB}',
529            '\u{1BF2}',
530            '\u{1BF3}',
531            '\u{2D7F}',
532            '\u{A806}',
533            '\u{A82C}',
534            '\u{A8C4}',
535            '\u{A953}',
536            '\u{A9C0}',
537            '\u{AAF6}',
538            '\u{ABED}',
539            '\u{10A3F}',
540            '\u{11046}',
541            '\u{1107F}',
542            '\u{110B9}',
543            '\u{11133}',
544            '\u{11134}',
545            '\u{111C0}',
546            '\u{11235}',
547            '\u{112EA}',
548            '\u{1134D}',
549            '\u{11442}',
550            '\u{114C2}',
551            '\u{115BF}',
552            '\u{1163F}',
553            '\u{116B6}',
554            '\u{1172B}',
555            '\u{11839}',
556            '\u{1193D}',
557            '\u{1193E}',
558            '\u{119E0}',
559            '\u{11A34}',
560            '\u{11A47}',
561            '\u{11A99}',
562            '\u{11C3F}',
563            '\u{11D44}',
564            '\u{11D45}',
565            '\u{11D97}',
566        ]; // https://www.compart.com/en/unicode/combining/9
567        let mut s = unicode.as_str();
568        while let Some(i) = s.find(zero_width_jointer) {
569            let prefix = &s[..i];
570            let suffix = &s[i + zero_width_jointer.len_utf8()..];
571            if !prefix.ends_with(VIRAMA) {
572                Err("ZERO WIDTH JOINER must be preceded by Virama")?;
573            }
574            s = suffix;
575        }
576    }
577
578    check_hostname(&s)
579}
580
581fn validate_email(v: &Value) -> Result<(), Box<dyn Error>> {
582    let Value::String(s) = v else {
583        return Ok(());
584    };
585    check_email(s)
586}
587
588// see https://en.wikipedia.org/wiki/Email_address
589fn check_email(s: &str) -> Result<(), Box<dyn Error>> {
590    // entire email address to be no more than 254 characters long
591    if s.len() > 254 {
592        Err("more than 254 characters long")?
593    }
594
595    // email address is generally recognized as having two parts joined with an at-sign
596    let Some(at) = s.rfind('@') else {
597        Err("missing @")?
598    };
599    let (local, domain) = (&s[..at], &s[at + 1..]);
600
601    // local part may be up to 64 characters long
602    if local.len() > 64 {
603        Err("local part more than 64 characters long")?
604    }
605
606    if local.len() > 1 && local.starts_with('"') && local.ends_with('"') {
607        // quoted
608        let local = &local[1..local.len() - 1];
609        if local.contains(['\\', '"']) {
610            Err("backslash and quote not allowed within quoted local part")?
611        }
612    } else {
613        // unquoted
614
615        if local.starts_with('.') {
616            Err("starts with dot")?
617        }
618        if local.ends_with('.') {
619            Err("ends with dot")?
620        }
621
622        // consecutive dots not allowed
623        if local.contains("..") {
624            Err("consecutive dots")?
625        }
626
627        // check allowd chars
628        if let Some(ch) = local
629            .chars()
630            .find(|c| !(c.is_ascii_alphanumeric() || ".!#$%&'*+-/=?^_`{|}~".contains(*c)))
631        {
632            Err(format!("invalid character {ch:?}"))?
633        }
634    }
635
636    // domain if enclosed in brackets, must match an IP address
637    if domain.starts_with('[') && domain.ends_with(']') {
638        let s = &domain[1..domain.len() - 1];
639        if let Some(s) = s.strip_prefix("IPv6:") {
640            if let Err(e) = s.parse::<Ipv6Addr>() {
641                Err(format!("invalid ipv6 address: {e}"))?
642            }
643            return Ok(());
644        }
645        if let Err(e) = s.parse::<Ipv4Addr>() {
646            Err(format!("invalid ipv4 address: {e}"))?
647        }
648        return Ok(());
649    }
650
651    // domain must match the requirements for a hostname
652    if let Err(e) = check_hostname(domain) {
653        Err(format!("invalid domain: {e}"))?
654    }
655
656    Ok(())
657}
658
659fn validate_idn_email(v: &Value) -> Result<(), Box<dyn Error>> {
660    let Value::String(s) = v else {
661        return Ok(());
662    };
663
664    let Some(at) = s.rfind('@') else {
665        Err("missing @")?
666    };
667    let (local, domain) = (&s[..at], &s[at + 1..]);
668
669    let local = idna::domain_to_ascii_strict(local)?;
670    let domain = idna::domain_to_ascii_strict(domain)?;
671    if let Err(e) = check_idn_hostname(&domain) {
672        Err(format!("invalid domain: {e}"))?
673    }
674    check_email(&format!("{local}@{domain}"))
675}
676
677fn validate_json_pointer(v: &Value) -> Result<(), Box<dyn Error>> {
678    let Value::String(s) = v else {
679        return Ok(());
680    };
681    check_json_pointer(s)
682}
683
684// see https://www.rfc-editor.org/rfc/rfc6901#section-3
685fn check_json_pointer(s: &str) -> Result<(), Box<dyn Error>> {
686    if s.is_empty() {
687        return Ok(());
688    }
689    if !s.starts_with('/') {
690        Err("not starting with slash")?;
691    }
692    for token in s.split('/').skip(1) {
693        let mut chars = token.chars();
694        while let Some(ch) = chars.next() {
695            if ch == '~' {
696                if !matches!(chars.next(), Some('0' | '1')) {
697                    Err("~ must be followed by 0 or 1")?;
698                }
699            } else if !matches!(ch, '\x00'..='\x2E' | '\x30'..='\x7D' | '\x7F'..='\u{10FFFF}') {
700                Err("contains disallowed character")?;
701            }
702        }
703    }
704    Ok(())
705}
706
707// see https://tools.ietf.org/html/draft-handrews-relative-json-pointer-01#section-3
708fn validate_relative_json_pointer(v: &Value) -> Result<(), Box<dyn Error>> {
709    let Value::String(s) = v else {
710        return Ok(());
711    };
712
713    // start with non-negative-integer
714    let num_digits = s.chars().take_while(char::is_ascii_digit).count();
715    if num_digits == 0 {
716        Err("must start with non-negative integer")?;
717    }
718    if num_digits > 1 && s.starts_with('0') {
719        Err("starts with zero")?;
720    }
721    let s = &s[num_digits..];
722
723    // followed by either json-pointer or '#'
724    if s == "#" {
725        return Ok(());
726    }
727    if let Err(e) = check_json_pointer(s) {
728        Err(format!("invalid json-pointer element: {e}"))?;
729    }
730    Ok(())
731}
732
733// see https://datatracker.ietf.org/doc/html/rfc4122#page-4
734fn validate_uuid(v: &Value) -> Result<(), Box<dyn Error>> {
735    let Value::String(s) = v else {
736        return Ok(());
737    };
738
739    static HEX_GROUPS: [usize; 5] = [8, 4, 4, 4, 12];
740    let mut i = 0;
741    for group in s.split('-') {
742        if i >= HEX_GROUPS.len() {
743            Err("more than 5 elements")?;
744        }
745        if group.len() != HEX_GROUPS[i] {
746            Err(format!(
747                "element {} must be {} characters long",
748                i + 1,
749                HEX_GROUPS[i]
750            ))?;
751        }
752        if let Some(ch) = group.chars().find(|c| !c.is_ascii_hexdigit()) {
753            Err(format!("non-hex character {ch:?}"))?;
754        }
755        i += 1;
756    }
757    if i != HEX_GROUPS.len() {
758        Err("must have 5 elements")?;
759    }
760    Ok(())
761}
762
763fn validate_uri(v: &Value) -> Result<(), Box<dyn Error>> {
764    let Value::String(s) = v else {
765        return Ok(());
766    };
767    if fluent_uri::UriRef::parse(s.as_str())?.scheme().is_none() {
768        Err("relative url")?;
769    };
770    Ok(())
771}
772
773fn validate_iri(v: &Value) -> Result<(), Box<dyn Error>> {
774    let Value::String(s) = v else {
775        return Ok(());
776    };
777    match Url::parse(s) {
778        Ok(_) => Ok(()),
779        Err(url::ParseError::RelativeUrlWithoutBase) => Err("relative url")?,
780        Err(e) => Err(e)?,
781    }
782}
783
784static TEMP_URL: Lazy<Url> = Lazy::new(|| Url::parse("http://temp.com").unwrap());
785
786fn parse_uri_reference(s: &str) -> Result<Url, Box<dyn Error>> {
787    if s.contains('\\') {
788        Err("contains \\\\")?;
789    }
790    Ok(TEMP_URL.join(s)?)
791}
792
793fn validate_uri_reference(v: &Value) -> Result<(), Box<dyn Error>> {
794    let Value::String(s) = v else {
795        return Ok(());
796    };
797    fluent_uri::UriRef::parse(s.as_str())?;
798    Ok(())
799}
800
801fn validate_iri_reference(v: &Value) -> Result<(), Box<dyn Error>> {
802    let Value::String(s) = v else {
803        return Ok(());
804    };
805    parse_uri_reference(s)?;
806    Ok(())
807}
808
809fn validate_uri_template(v: &Value) -> Result<(), Box<dyn Error>> {
810    let Value::String(s) = v else {
811        return Ok(());
812    };
813
814    let url = parse_uri_reference(s)?;
815
816    let path = url.path();
817    // path we got has curly bases percent encoded
818    let path = percent_decode_str(path).decode_utf8()?;
819
820    // ensure curly brackets are not nested and balanced
821    for part in path.as_ref().split('/') {
822        let mut want = true;
823        for got in part
824            .chars()
825            .filter(|c| matches!(c, '{' | '}'))
826            .map(|c| c == '{')
827        {
828            if got != want {
829                Err("nested curly braces")?;
830            }
831            want = !want;
832        }
833        if !want {
834            Err("no matching closing brace")?
835        }
836    }
837    Ok(())
838}