markdown_that/parser/
linkfmt.rs

1//! Link validator and formatter
2
3use regex::Regex;
4use std::fmt::Debug;
5use std::sync::LazyLock;
6
7pub trait LinkFormatter: Debug + Send + Sync {
8    /// Validate the link url, return `Some(())` if it is allowed
9    /// and `None` if it is a security risk.
10    fn validate_link(&self, url: &str) -> Option<()>;
11
12    /// Encode link url to a machine-readable format,
13    /// which includes url-encoding, punycode, etc.
14    fn normalize_link(&self, url: &str) -> String;
15
16    /// Decode link url to a human-readable format.
17    fn normalize_link_text(&self, url: &str) -> String;
18}
19
20/// Default link validator and formatter for markdown-it.
21///
22/// This validator can prohibit more than really needed to prevent XSS. It's a
23/// tradeoff to keep code simple and to be secure by default.
24///
25/// If you need a different setup-override the validator method as you wish. Or
26/// replace it with a placeholder function and use external sanitizer.
27///
28#[derive(Default, Debug)]
29pub struct MDLinkFormatter;
30
31impl MDLinkFormatter {
32    pub fn new() -> Self {
33        Self
34    }
35}
36
37impl LinkFormatter for MDLinkFormatter {
38    fn validate_link(&self, url: &str) -> Option<()> {
39        // url should be normalized at this point, and existing entities are decoded
40        static BAD_PROTO_RE: LazyLock<Regex> =
41            LazyLock::new(|| Regex::new(r#"(?i)^(vbscript|javascript|file|data):"#).unwrap());
42
43        static GOOD_DATA_RE: LazyLock<Regex> =
44            LazyLock::new(|| Regex::new(r#"(?i)^data:image/(gif|png|jpeg|webp);"#).unwrap());
45
46        if !BAD_PROTO_RE.is_match(url) || GOOD_DATA_RE.is_match(url) {
47            Some(())
48        } else {
49            None
50        }
51    }
52
53    fn normalize_link(&self, url: &str) -> String {
54        markdown_that_url::urlencode::encode(
55            url,
56            markdown_that_url::urlencode::ENCODE_DEFAULT_CHARS,
57            true,
58        )
59        .into()
60    }
61
62    fn normalize_link_text(&self, url: &str) -> String {
63        url.to_owned()
64    }
65}
66
67#[cfg(test)]
68mod tests {
69    use super::LinkFormatter;
70    use super::MDLinkFormatter;
71
72    #[test]
73    fn should_allow_normal_urls() {
74        let fmt = MDLinkFormatter::new();
75        assert!(fmt.validate_link("http://example.org").is_some());
76        assert!(fmt.validate_link("HTTPS://example.org").is_some());
77    }
78
79    #[test]
80    fn should_allow_plain_text() {
81        let fmt = MDLinkFormatter::new();
82        assert!(fmt.validate_link("javascript").is_some());
83        assert!(fmt.validate_link("/javascript:link").is_some());
84    }
85
86    #[test]
87    fn should_not_allow_some_protocols() {
88        let fmt = MDLinkFormatter::new();
89        assert!(fmt.validate_link("javascript:alert(1)").is_none());
90        assert!(fmt.validate_link("JAVASCRIPT:alert(1)").is_none());
91        assert!(fmt.validate_link("vbscript:alert(1)").is_none());
92        assert!(fmt.validate_link("VbScript:alert(1)").is_none());
93        assert!(fmt.validate_link("file:///123").is_none());
94    }
95
96    #[test]
97    fn should_not_allow_data_url_except_whitelisted() {
98        let fmt = MDLinkFormatter::new();
99        assert!(
100            fmt.validate_link(
101                "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"
102            )
103            .is_some()
104        );
105        assert!(
106            fmt.validate_link("data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K")
107                .is_none()
108        );
109    }
110}