1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
//! Extract links and fragments from html documents
pub(crate) mod html5ever;
pub(crate) mod html5gum;
mod srcset;

use linkify::{LinkFinder, LinkKind};

/// Check if the given URL is an email link.
///
/// This operates on the raw URL strings, not the linkified version because it
/// gets used in the HTML extractors, which parse the HTML attributes directly
/// and return the raw strings.
///
/// Note that `LinkFinder::links()` is lazy and traverses the input in `O(n)`,
/// so there should be no big performance penalty for calling this function.
pub(crate) fn is_email_link(input: &str) -> bool {
    let mut findings = LinkFinder::new().kinds(&[LinkKind::Email]).links(input);
    let email = match findings.next() {
        None => return false,
        Some(email) => email.as_str(),
    };

    // Email needs to match the full string.
    // Strip the "mailto:" prefix if it exists.
    input.strip_prefix("mailto:").unwrap_or(input) == email
}

/// Check if the given element is in the list of preformatted ("verbatim") tags.
///
/// These will be excluded from link checking by default.
// Including the <script> tag is debatable, but the alternative is to
// have a separate list of tags which need a separate config setting and that
// seems worse.
pub(crate) fn is_verbatim_elem(name: &str) -> bool {
    matches!(
        name,
        "code"
            | "kbd"
            | "listing"
            | "noscript"
            | "plaintext"
            | "pre"
            | "samp"
            | "script"
            | "textarea"
            | "var"
            | "xmp"
    )
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_is_email_link() {
        assert!(is_email_link("mailto:steve@apple.com"));
        assert!(!is_email_link("mailto:steve@apple.com in a sentence"));

        assert!(is_email_link("foo@example.org"));
        assert!(!is_email_link("foo@example.org in sentence"));
        assert!(!is_email_link("https://example.org"));
    }

    #[test]
    fn test_verbatim_matching() {
        assert!(is_verbatim_elem("pre"));
        assert!(is_verbatim_elem("code"));
        assert!(is_verbatim_elem("listing"));
        assert!(is_verbatim_elem("script"));
    }
}