1use std::{
2 collections::HashMap,
3 error::Error,
4 net::{Ipv4Addr, Ipv6Addr},
5};
6
7use once_cell::sync::Lazy;
8use percent_encoding::percent_decode_str;
9use serde_json::Value;
10use url::Url;
11
12use crate::ecma;
13
14#[derive(Clone, Copy)]
16pub struct Format {
17 pub name: &'static str,
19
20 pub func: fn(v: &Value) -> Result<(), Box<dyn Error>>,
22}
23
24pub(crate) static FORMATS: Lazy<HashMap<&'static str, Format>> = Lazy::new(|| {
25 let mut m = HashMap::<&'static str, Format>::new();
26 let mut register = |name, func| m.insert(name, Format { name, func });
27 register("regex", validate_regex);
28 register("ipv4", validate_ipv4);
29 register("ipv6", validate_ipv6);
30 register("hostname", validate_hostname);
31 register("idn-hostname", validate_idn_hostname);
32 register("email", validate_email);
33 register("idn-email", validate_idn_email);
34 register("date", validate_date);
35 register("time", validate_time);
36 register("date-time", validate_date_time);
37 register("duration", validate_duration);
38 register("period", validate_period);
39 register("json-pointer", validate_json_pointer);
40 register("relative-json-pointer", validate_relative_json_pointer);
41 register("uuid", validate_uuid);
42 register("uri", validate_uri);
43 register("iri", validate_iri);
44 register("uri-reference", validate_uri_reference);
45 register("iri-reference", validate_iri_reference);
46 register("uri-template", validate_uri_template);
47 m
48});
49
50fn validate_regex(v: &Value) -> Result<(), Box<dyn Error>> {
51 let Value::String(s) = v else {
52 return Ok(());
53 };
54 ecma::convert(s).map(|_| ())
55}
56
57fn validate_ipv4(v: &Value) -> Result<(), Box<dyn Error>> {
58 let Value::String(s) = v else {
59 return Ok(());
60 };
61 s.parse::<Ipv4Addr>()?;
62 Ok(())
63}
64
65fn validate_ipv6(v: &Value) -> Result<(), Box<dyn Error>> {
66 let Value::String(s) = v else {
67 return Ok(());
68 };
69 s.parse::<Ipv6Addr>()?;
70 Ok(())
71}
72
73fn validate_date(v: &Value) -> Result<(), Box<dyn Error>> {
74 let Value::String(s) = v else {
75 return Ok(());
76 };
77 check_date(s)
78}
79
80fn matches_char(s: &str, index: usize, ch: char) -> bool {
81 s.is_char_boundary(index) && s[index..].starts_with(ch)
82}
83
84fn check_date(s: &str) -> Result<(), Box<dyn Error>> {
86 if s.len() != 10 {
88 Err("must be 10 characters long")?;
89 }
90 if !matches_char(s, 4, '-') || !matches_char(s, 7, '-') {
91 Err("missing hyphen in correct place")?;
92 }
93
94 let mut ymd = s.splitn(3, '-').filter_map(|t| t.parse::<usize>().ok());
95 let (Some(y), Some(m), Some(d)) = (ymd.next(), ymd.next(), ymd.next()) else {
96 Err("non-positive year/month/day")?
97 };
98
99 if !matches!(m, 1..=12) {
100 Err(format!("{m} months in year"))?;
101 }
102 if !matches!(d, 1..=31) {
103 Err(format!("{d} days in month"))?;
104 }
105
106 match m {
107 2 => {
108 let mut feb_days = 28;
109 if y % 4 == 0 && (y % 100 != 0 || y % 400 == 0) {
110 feb_days += 1; };
112 if d > feb_days {
113 Err(format!("february has {feb_days} days only"))?;
114 }
115 }
116 4 | 6 | 9 | 11 => {
117 if d > 30 {
118 Err("month has 30 days only")?;
119 }
120 }
121 _ => {}
122 }
123 Ok(())
124}
125
126fn validate_time(v: &Value) -> Result<(), Box<dyn Error>> {
127 let Value::String(s) = v else {
128 return Ok(());
129 };
130 check_time(s)
131}
132
133fn check_time(mut str: &str) -> Result<(), Box<dyn Error>> {
134 if str.len() < 9 {
136 Err("less than 9 characters long")?
137 }
138 if !matches_char(str, 2, ':') || !matches_char(str, 5, ':') {
139 Err("missing colon in correct place")?
140 }
141
142 if !str.is_char_boundary(8) {
144 Err("contains non-ascii char")?
145 }
146 let mut hms = (str[..8])
147 .splitn(3, ':')
148 .filter_map(|t| t.parse::<usize>().ok());
149 let (Some(mut h), Some(mut m), Some(s)) = (hms.next(), hms.next(), hms.next()) else {
150 Err("non-positive hour/min/sec")?
151 };
152 if h > 23 || m > 59 || s > 60 {
153 Err("hour/min/sec out of range")?
154 }
155 str = &str[8..];
156
157 if let Some(rem) = str.strip_prefix('.') {
159 let n_digits = rem.chars().take_while(char::is_ascii_digit).count();
160 if n_digits == 0 {
161 Err("no digits in second fraction")?;
162 }
163 str = &rem[n_digits..];
164 }
165
166 if str != "z" && str != "Z" {
167 if str.len() != 6 {
169 Err("offset must be 6 characters long")?;
170 }
171 let sign: isize = match str.chars().next() {
172 Some('+') => -1,
173 Some('-') => 1,
174 _ => return Err("offset must begin with plus/minus")?,
175 };
176 str = &str[1..];
177 if !matches_char(str, 2, ':') {
178 Err("missing colon in offset at correct place")?
179 }
180
181 let mut zhm = str.splitn(2, ':').filter_map(|t| t.parse::<usize>().ok());
182 let (Some(zh), Some(zm)) = (zhm.next(), zhm.next()) else {
183 Err("non-positive hour/min in offset")?
184 };
185 if zh > 23 || zm > 59 {
186 Err("hour/min in offset out of range")?
187 }
188
189 let mut hm = (h * 60 + m) as isize + sign * (zh * 60 + zm) as isize;
191 if hm < 0 {
192 hm += 24 * 60;
193 debug_assert!(hm >= 0);
194 }
195 let hm = hm as usize;
196 (h, m) = (hm / 60, hm % 60);
197 }
198
199 if !(s < 60 || (h == 23 && m == 59)) {
201 Err("invalid leap second")?
202 }
203 Ok(())
204}
205
206fn validate_date_time(v: &Value) -> Result<(), Box<dyn Error>> {
207 let Value::String(s) = v else {
208 return Ok(());
209 };
210 check_date_time(s)
211}
212
213fn check_date_time(s: &str) -> Result<(), Box<dyn Error>> {
214 if s.len() < 20 {
216 Err("less than 20 characters long")?;
217 }
218 if !s.is_char_boundary(10) || !s[10..].starts_with(['t', 'T']) {
219 Err("11th character must be t or T")?;
220 }
221 if let Err(e) = check_date(&s[..10]) {
222 Err(format!("invalid date element: {e}"))?;
223 }
224 if let Err(e) = check_time(&s[11..]) {
225 Err(format!("invalid time element: {e}"))?;
226 }
227 Ok(())
228}
229
230fn validate_duration(v: &Value) -> Result<(), Box<dyn Error>> {
231 let Value::String(s) = v else {
232 return Ok(());
233 };
234 check_duration(s)
235}
236
237fn check_duration(s: &str) -> Result<(), Box<dyn Error>> {
239 let Some(s) = s.strip_prefix('P') else {
241 Err("must start with P")?
242 };
243 if s.is_empty() {
244 Err("nothing after P")?
245 }
246
247 if let Some(s) = s.strip_suffix('W') {
249 if s.is_empty() {
250 Err("no number in week")?
251 }
252 if !s.chars().all(|c| c.is_ascii_digit()) {
253 Err("invalid week")?
254 }
255 return Ok(());
256 }
257
258 static UNITS: [&str; 2] = ["YMD", "HMS"];
259 for (i, s) in s.split('T').enumerate() {
260 let mut s = s;
261 if i != 0 && s.is_empty() {
262 Err("no time elements")?
263 }
264 let Some(mut units) = UNITS.get(i).cloned() else {
265 Err("more than one T")?
266 };
267 while !s.is_empty() {
268 let digit_count = s.chars().take_while(char::is_ascii_digit).count();
269 if digit_count == 0 {
270 Err("missing number")?
271 }
272 s = &s[digit_count..];
273 let Some(unit) = s.chars().next() else {
274 Err("missing unit")?
275 };
276 let Some(j) = units.find(unit) else {
277 if UNITS[i].contains(unit) {
278 Err(format!("unit {unit} out of order"))?
279 }
280 Err(format!("invalid unit {unit}"))?
281 };
282 units = &units[j + 1..];
283 s = &s[1..];
284 }
285 }
286
287 Ok(())
288}
289
290fn validate_period(v: &Value) -> Result<(), Box<dyn Error>> {
292 let Value::String(s) = v else {
293 return Ok(());
294 };
295
296 let Some(slash) = s.find('/') else {
297 Err("missing slash")?
298 };
299
300 let (start, end) = (&s[..slash], &s[slash + 1..]);
301 if start.starts_with('P') {
302 if let Err(e) = check_duration(start) {
303 Err(format!("invalid start duration: {e}"))?
304 }
305 if let Err(e) = check_date_time(end) {
306 Err(format!("invalid end date-time: {e}"))?
307 }
308 } else {
309 if let Err(e) = check_date_time(start) {
310 Err(format!("invalid start date-time: {e}"))?
311 }
312 if end.starts_with('P') {
313 if let Err(e) = check_duration(end) {
314 Err(format!("invalid end duration: {e}"))?;
315 }
316 } else if let Err(e) = check_date_time(end) {
317 Err(format!("invalid end date-time: {e}"))?;
318 }
319 }
320 Ok(())
321}
322
323fn validate_hostname(v: &Value) -> Result<(), Box<dyn Error>> {
324 let Value::String(s) = v else {
325 return Ok(());
326 };
327 check_hostname(s)
328}
329
330fn check_hostname(mut s: &str) -> Result<(), Box<dyn Error>> {
332 s = s.strip_suffix('.').unwrap_or(s);
334 if s.len() > 253 {
335 Err("more than 253 characters long")?
336 }
337
338 for label in s.split('.') {
340 if !matches!(label.len(), 1..=63) {
342 Err("label must be 1 to 63 characters long")?;
343 }
344
345 if label.starts_with('-') {
347 Err("label starts with hyphen")?;
348 }
349
350 if label.ends_with('-') {
351 Err("label ends with hyphen")?;
352 }
353
354 if let Some(ch) = label
357 .chars()
358 .find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-'))
359 {
360 Err(format!("invalid character {ch:?}"))?;
361 }
362 }
363
364 Ok(())
365}
366
367fn validate_idn_hostname(v: &Value) -> Result<(), Box<dyn Error>> {
368 let Value::String(s) = v else {
369 return Ok(());
370 };
371 check_idn_hostname(s)
372}
373
374fn check_idn_hostname(s: &str) -> Result<(), Box<dyn Error>> {
375 let s = idna::domain_to_ascii_strict(s)?;
376 let unicode = idna::domain_to_unicode(&s).0;
377
378 {
380 static DISALLOWED: [char; 10] = [
381 '\u{0640}', '\u{07FA}', '\u{302E}', '\u{302F}', '\u{3031}', '\u{3032}', '\u{3033}', '\u{3034}', '\u{3035}', '\u{303B}', ];
392 if unicode.contains(DISALLOWED) {
393 Err("contains disallowed character")?;
394 }
395 }
396
397 {
401 let count: usize = unicode
402 .chars()
403 .skip(2)
404 .take(2)
405 .map(|c| if c == '-' { 1 } else { 0 })
406 .sum();
407 if count == 2 {
408 Err("unicode string must not contain '--' in 3rd and 4th position")?;
409 }
410 }
411
412 {
415 let middle_dot = '\u{00b7}';
416 let mut s = unicode.as_str();
417 while let Some(i) = s.find(middle_dot) {
418 let prefix = &s[..i];
419 let suffix = &s[i + middle_dot.len_utf8()..];
420 if !prefix.ends_with('l') || !suffix.ends_with('l') {
421 Err("MIDDLE DOT is allowed between 'l' characters only")?;
422 }
423 s = suffix;
424 }
425 }
426
427 {
430 let keralia = '\u{0375}';
431 let greek = '\u{0370}'..='\u{03FF}';
432 let mut s = unicode.as_str();
433 while let Some(i) = s.find(keralia) {
434 let suffix = &s[i + keralia.len_utf8()..];
435 if !suffix.starts_with(|c| greek.contains(&c)) {
436 Err("Greek KERAIA must be followed by Greek character")?;
437 }
438 s = suffix;
439 }
440 }
441
442 {
448 let geresh = '\u{05F3}';
449 let gereshayim = '\u{05F4}';
450 let hebrew = '\u{0590}'..='\u{05FF}';
451 for ch in [geresh, gereshayim] {
452 let mut s = unicode.as_str();
453 while let Some(i) = s.find(ch) {
454 let prefix = &s[..i];
455 let suffix = &s[i + ch.len_utf8()..];
456 if !prefix.ends_with(|c| hebrew.contains(&c)) {
457 if i == 0 {
458 Err("Hebrew GERESH must be preceded by Hebrew character")?;
459 } else {
460 Err("Hebrew GERESHYIM must be preceded by Hebrew character")?;
461 }
462 }
463 s = suffix;
464 }
465 }
466 }
467
468 {
471 let katakana_middle_dot = '\u{30FB}';
472 let hiragana = '\u{3040}'..='\u{309F}';
473 let katakana = '\u{30A0}'..='\u{30FF}';
474 let han = '\u{4E00}'..='\u{9FFF}'; if unicode.contains(katakana_middle_dot) {
476 if unicode.contains(|c| hiragana.contains(&c))
477 || unicode.contains(|c| c != katakana_middle_dot && katakana.contains(&c))
478 || unicode.contains(|c| han.contains(&c))
479 {
480 } else {
482 Err("KATAKANA MIDDLE DOT must be with Hiragana, Katakana, or Han")?;
483 }
484 }
485 }
486
487 {
491 let arabic_indic_digits = '\u{0660}'..='\u{0669}';
492 let extended_arabic_indic_digits = '\u{06F0}'..='\u{06F9}';
493 if unicode.contains(|c| arabic_indic_digits.contains(&c))
494 && unicode.contains(|c| extended_arabic_indic_digits.contains(&c))
495 {
496 Err("ARABIC-INDIC DIGITS and Extended Arabic-Indic Digits cannot be mixed")?;
497 }
498 }
499
500 {
503 let zero_width_jointer = '\u{200D}';
504 static VIRAMA: [char; 61] = [
505 '\u{094D}',
506 '\u{09CD}',
507 '\u{0A4D}',
508 '\u{0ACD}',
509 '\u{0B4D}',
510 '\u{0BCD}',
511 '\u{0C4D}',
512 '\u{0CCD}',
513 '\u{0D3B}',
514 '\u{0D3C}',
515 '\u{0D4D}',
516 '\u{0DCA}',
517 '\u{0E3A}',
518 '\u{0EBA}',
519 '\u{0F84}',
520 '\u{1039}',
521 '\u{103A}',
522 '\u{1714}',
523 '\u{1734}',
524 '\u{17D2}',
525 '\u{1A60}',
526 '\u{1B44}',
527 '\u{1BAA}',
528 '\u{1BAB}',
529 '\u{1BF2}',
530 '\u{1BF3}',
531 '\u{2D7F}',
532 '\u{A806}',
533 '\u{A82C}',
534 '\u{A8C4}',
535 '\u{A953}',
536 '\u{A9C0}',
537 '\u{AAF6}',
538 '\u{ABED}',
539 '\u{10A3F}',
540 '\u{11046}',
541 '\u{1107F}',
542 '\u{110B9}',
543 '\u{11133}',
544 '\u{11134}',
545 '\u{111C0}',
546 '\u{11235}',
547 '\u{112EA}',
548 '\u{1134D}',
549 '\u{11442}',
550 '\u{114C2}',
551 '\u{115BF}',
552 '\u{1163F}',
553 '\u{116B6}',
554 '\u{1172B}',
555 '\u{11839}',
556 '\u{1193D}',
557 '\u{1193E}',
558 '\u{119E0}',
559 '\u{11A34}',
560 '\u{11A47}',
561 '\u{11A99}',
562 '\u{11C3F}',
563 '\u{11D44}',
564 '\u{11D45}',
565 '\u{11D97}',
566 ]; let mut s = unicode.as_str();
568 while let Some(i) = s.find(zero_width_jointer) {
569 let prefix = &s[..i];
570 let suffix = &s[i + zero_width_jointer.len_utf8()..];
571 if !prefix.ends_with(VIRAMA) {
572 Err("ZERO WIDTH JOINER must be preceded by Virama")?;
573 }
574 s = suffix;
575 }
576 }
577
578 check_hostname(&s)
579}
580
581fn validate_email(v: &Value) -> Result<(), Box<dyn Error>> {
582 let Value::String(s) = v else {
583 return Ok(());
584 };
585 check_email(s)
586}
587
588fn check_email(s: &str) -> Result<(), Box<dyn Error>> {
590 if s.len() > 254 {
592 Err("more than 254 characters long")?
593 }
594
595 let Some(at) = s.rfind('@') else {
597 Err("missing @")?
598 };
599 let (local, domain) = (&s[..at], &s[at + 1..]);
600
601 if local.len() > 64 {
603 Err("local part more than 64 characters long")?
604 }
605
606 if local.len() > 1 && local.starts_with('"') && local.ends_with('"') {
607 let local = &local[1..local.len() - 1];
609 if local.contains(['\\', '"']) {
610 Err("backslash and quote not allowed within quoted local part")?
611 }
612 } else {
613 if local.starts_with('.') {
616 Err("starts with dot")?
617 }
618 if local.ends_with('.') {
619 Err("ends with dot")?
620 }
621
622 if local.contains("..") {
624 Err("consecutive dots")?
625 }
626
627 if let Some(ch) = local
629 .chars()
630 .find(|c| !(c.is_ascii_alphanumeric() || ".!#$%&'*+-/=?^_`{|}~".contains(*c)))
631 {
632 Err(format!("invalid character {ch:?}"))?
633 }
634 }
635
636 if domain.starts_with('[') && domain.ends_with(']') {
638 let s = &domain[1..domain.len() - 1];
639 if let Some(s) = s.strip_prefix("IPv6:") {
640 if let Err(e) = s.parse::<Ipv6Addr>() {
641 Err(format!("invalid ipv6 address: {e}"))?
642 }
643 return Ok(());
644 }
645 if let Err(e) = s.parse::<Ipv4Addr>() {
646 Err(format!("invalid ipv4 address: {e}"))?
647 }
648 return Ok(());
649 }
650
651 if let Err(e) = check_hostname(domain) {
653 Err(format!("invalid domain: {e}"))?
654 }
655
656 Ok(())
657}
658
659fn validate_idn_email(v: &Value) -> Result<(), Box<dyn Error>> {
660 let Value::String(s) = v else {
661 return Ok(());
662 };
663
664 let Some(at) = s.rfind('@') else {
665 Err("missing @")?
666 };
667 let (local, domain) = (&s[..at], &s[at + 1..]);
668
669 let local = idna::domain_to_ascii_strict(local)?;
670 let domain = idna::domain_to_ascii_strict(domain)?;
671 if let Err(e) = check_idn_hostname(&domain) {
672 Err(format!("invalid domain: {e}"))?
673 }
674 check_email(&format!("{local}@{domain}"))
675}
676
677fn validate_json_pointer(v: &Value) -> Result<(), Box<dyn Error>> {
678 let Value::String(s) = v else {
679 return Ok(());
680 };
681 check_json_pointer(s)
682}
683
684fn check_json_pointer(s: &str) -> Result<(), Box<dyn Error>> {
686 if s.is_empty() {
687 return Ok(());
688 }
689 if !s.starts_with('/') {
690 Err("not starting with slash")?;
691 }
692 for token in s.split('/').skip(1) {
693 let mut chars = token.chars();
694 while let Some(ch) = chars.next() {
695 if ch == '~' {
696 if !matches!(chars.next(), Some('0' | '1')) {
697 Err("~ must be followed by 0 or 1")?;
698 }
699 } else if !matches!(ch, '\x00'..='\x2E' | '\x30'..='\x7D' | '\x7F'..='\u{10FFFF}') {
700 Err("contains disallowed character")?;
701 }
702 }
703 }
704 Ok(())
705}
706
707fn validate_relative_json_pointer(v: &Value) -> Result<(), Box<dyn Error>> {
709 let Value::String(s) = v else {
710 return Ok(());
711 };
712
713 let num_digits = s.chars().take_while(char::is_ascii_digit).count();
715 if num_digits == 0 {
716 Err("must start with non-negative integer")?;
717 }
718 if num_digits > 1 && s.starts_with('0') {
719 Err("starts with zero")?;
720 }
721 let s = &s[num_digits..];
722
723 if s == "#" {
725 return Ok(());
726 }
727 if let Err(e) = check_json_pointer(s) {
728 Err(format!("invalid json-pointer element: {e}"))?;
729 }
730 Ok(())
731}
732
733fn validate_uuid(v: &Value) -> Result<(), Box<dyn Error>> {
735 let Value::String(s) = v else {
736 return Ok(());
737 };
738
739 static HEX_GROUPS: [usize; 5] = [8, 4, 4, 4, 12];
740 let mut i = 0;
741 for group in s.split('-') {
742 if i >= HEX_GROUPS.len() {
743 Err("more than 5 elements")?;
744 }
745 if group.len() != HEX_GROUPS[i] {
746 Err(format!(
747 "element {} must be {} characters long",
748 i + 1,
749 HEX_GROUPS[i]
750 ))?;
751 }
752 if let Some(ch) = group.chars().find(|c| !c.is_ascii_hexdigit()) {
753 Err(format!("non-hex character {ch:?}"))?;
754 }
755 i += 1;
756 }
757 if i != HEX_GROUPS.len() {
758 Err("must have 5 elements")?;
759 }
760 Ok(())
761}
762
763fn validate_uri(v: &Value) -> Result<(), Box<dyn Error>> {
764 let Value::String(s) = v else {
765 return Ok(());
766 };
767 if fluent_uri::UriRef::parse(s.as_str())?.scheme().is_none() {
768 Err("relative url")?;
769 };
770 Ok(())
771}
772
773fn validate_iri(v: &Value) -> Result<(), Box<dyn Error>> {
774 let Value::String(s) = v else {
775 return Ok(());
776 };
777 match Url::parse(s) {
778 Ok(_) => Ok(()),
779 Err(url::ParseError::RelativeUrlWithoutBase) => Err("relative url")?,
780 Err(e) => Err(e)?,
781 }
782}
783
784static TEMP_URL: Lazy<Url> = Lazy::new(|| Url::parse("http://temp.com").unwrap());
785
786fn parse_uri_reference(s: &str) -> Result<Url, Box<dyn Error>> {
787 if s.contains('\\') {
788 Err("contains \\\\")?;
789 }
790 Ok(TEMP_URL.join(s)?)
791}
792
793fn validate_uri_reference(v: &Value) -> Result<(), Box<dyn Error>> {
794 let Value::String(s) = v else {
795 return Ok(());
796 };
797 fluent_uri::UriRef::parse(s.as_str())?;
798 Ok(())
799}
800
801fn validate_iri_reference(v: &Value) -> Result<(), Box<dyn Error>> {
802 let Value::String(s) = v else {
803 return Ok(());
804 };
805 parse_uri_reference(s)?;
806 Ok(())
807}
808
809fn validate_uri_template(v: &Value) -> Result<(), Box<dyn Error>> {
810 let Value::String(s) = v else {
811 return Ok(());
812 };
813
814 let url = parse_uri_reference(s)?;
815
816 let path = url.path();
817 let path = percent_decode_str(path).decode_utf8()?;
819
820 for part in path.as_ref().split('/') {
822 let mut want = true;
823 for got in part
824 .chars()
825 .filter(|c| matches!(c, '{' | '}'))
826 .map(|c| c == '{')
827 {
828 if got != want {
829 Err("nested curly braces")?;
830 }
831 want = !want;
832 }
833 if !want {
834 Err("no matching closing brace")?
835 }
836 }
837 Ok(())
838}