1use once_cell::sync::Lazy;
2use std::str;
3use unicode_categories::UnicodeCategories;
4
5use crate::ctype::isalpha;
6
7pub fn check_domain(data: &[u8], allow_short: bool) -> Option<usize> {
8 let mut np = 0;
9 let mut uscore1 = 0;
10 let mut uscore2 = 0;
11
12 if data.is_empty() {
14 return None;
15 }
16
17 for (i, c) in unsafe { str::from_utf8_unchecked(data) }.char_indices() {
18 if c == '_' {
19 uscore2 += 1;
20 } else if c == '.' {
21 uscore1 = uscore2;
22 uscore2 = 0;
23 np += 1;
24 } else if !is_valid_hostchar(c) && c != '-' {
25 if uscore1 == 0 && uscore2 == 0 && (allow_short || np > 0) {
26 return Some(i);
27 }
28 return None;
29 }
30 }
31
32 if (uscore1 > 0 || uscore2 > 0) && np <= 10 {
33 None
34 } else if allow_short || np > 0 {
35 Some(data.len())
36 } else {
37 None
38 }
39}
40
41fn is_valid_hostchar(ch: char) -> bool {
42 !ch.is_whitespace() && !ch.is_punctuation()
43}
44
45pub fn autolink_delim(data: &[u8], mut link_end: usize) -> usize {
47 static LINK_END_ASSORTMENT: Lazy<[bool; 256]> = Lazy::new(|| {
48 let mut sc = [false; 256];
49 for c in &[
50 b'?', b'!', b'.', b',', b':', b'*', b'_', b'~', b'\'', b'"', b'[', b']',
51 ] {
52 sc[*c as usize] = true;
53 }
54 sc
55 });
56
57 for (i, &b) in data.iter().enumerate().take(link_end) {
58 if b == b'<' {
59 link_end = i;
60 break;
61 }
62 }
63
64 while link_end > 0 {
65 let cclose = data[link_end - 1];
66
67 let copen = if cclose == b')' { Some(b'(') } else { None };
68
69 if LINK_END_ASSORTMENT[cclose as usize] {
70 link_end -= 1;
71 } else if cclose == b';' {
72 let mut new_end = link_end - 2;
73
74 while new_end > 0 && isalpha(data[new_end]) {
75 new_end -= 1;
76 }
77
78 if new_end < link_end - 2 && data[new_end] == b'&' {
79 link_end = new_end;
80 } else {
81 link_end -= 1;
82 }
83 } else if let Some(copen) = copen {
84 let mut opening = 0;
85 let mut closing = 0;
86 for &b in data.iter().take(link_end) {
87 if b == copen {
88 opening += 1;
89 } else if b == cclose {
90 closing += 1;
91 }
92 }
93
94 if closing <= opening {
95 break;
96 }
97
98 link_end -= 1;
99 } else {
100 break;
101 }
102 }
103
104 link_end
105}