gfm_autolinks/
utils.rs

1use once_cell::sync::Lazy;
2use std::str;
3use unicode_categories::UnicodeCategories;
4
5use crate::ctype::isalpha;
6
7pub fn check_domain(data: &[u8], allow_short: bool) -> Option<usize> {
8    let mut np = 0;
9    let mut uscore1 = 0;
10    let mut uscore2 = 0;
11
12    // don't allow empty domain names
13    if data.is_empty() {
14        return None;
15    }
16
17    for (i, c) in unsafe { str::from_utf8_unchecked(data) }.char_indices() {
18        if c == '_' {
19            uscore2 += 1;
20        } else if c == '.' {
21            uscore1 = uscore2;
22            uscore2 = 0;
23            np += 1;
24        } else if !is_valid_hostchar(c) && c != '-' {
25            if uscore1 == 0 && uscore2 == 0 && (allow_short || np > 0) {
26                return Some(i);
27            }
28            return None;
29        }
30    }
31
32    if (uscore1 > 0 || uscore2 > 0) && np <= 10 {
33        None
34    } else if allow_short || np > 0 {
35        Some(data.len())
36    } else {
37        None
38    }
39}
40
41fn is_valid_hostchar(ch: char) -> bool {
42    !ch.is_whitespace() && !ch.is_punctuation()
43}
44
45/// Ensure URL is correctly terminated by a delimiter.
46pub fn autolink_delim(data: &[u8], mut link_end: usize) -> usize {
47    static LINK_END_ASSORTMENT: Lazy<[bool; 256]> = Lazy::new(|| {
48        let mut sc = [false; 256];
49        for c in &[
50            b'?', b'!', b'.', b',', b':', b'*', b'_', b'~', b'\'', b'"', b'[', b']',
51        ] {
52            sc[*c as usize] = true;
53        }
54        sc
55    });
56
57    for (i, &b) in data.iter().enumerate().take(link_end) {
58        if b == b'<' {
59            link_end = i;
60            break;
61        }
62    }
63
64    while link_end > 0 {
65        let cclose = data[link_end - 1];
66
67        let copen = if cclose == b')' { Some(b'(') } else { None };
68
69        if LINK_END_ASSORTMENT[cclose as usize] {
70            link_end -= 1;
71        } else if cclose == b';' {
72            let mut new_end = link_end - 2;
73
74            while new_end > 0 && isalpha(data[new_end]) {
75                new_end -= 1;
76            }
77
78            if new_end < link_end - 2 && data[new_end] == b'&' {
79                link_end = new_end;
80            } else {
81                link_end -= 1;
82            }
83        } else if let Some(copen) = copen {
84            let mut opening = 0;
85            let mut closing = 0;
86            for &b in data.iter().take(link_end) {
87                if b == copen {
88                    opening += 1;
89                } else if b == cclose {
90                    closing += 1;
91                }
92            }
93
94            if closing <= opening {
95                break;
96            }
97
98            link_end -= 1;
99        } else {
100            break;
101        }
102    }
103
104    link_end
105}