markdown_it/parser/
linkfmt.rs

1//! Link validator and formatter
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5use std::fmt::Debug;
6
7pub trait LinkFormatter : Debug + Send + Sync {
8    /// Validate link url, return `Some(())` if it is allowed
9    /// and `None` if it is a security risk.
10    fn validate_link(&self, url: &str) -> Option<()>;
11
12    /// Encode link url to a machine-readable format,
13    /// which includes url-encoding, punycode, etc.
14    fn normalize_link(&self, url: &str) -> String;
15
16    /// Decode link url to a human-readable format.
17    fn normalize_link_text(&self, url: &str) -> String;
18}
19
20/// Default link validator and formatter for markdown-it.
21///
22/// This validator can prohibit more than really needed to prevent XSS. It's a
23/// tradeoff to keep code simple and to be secure by default.
24///
25/// If you need different setup - override validator method as you wish. Or
26/// replace it with dummy function and use external sanitizer.
27///
28#[derive(Default, Debug)]
29pub struct MDLinkFormatter;
30
31impl MDLinkFormatter {
32    pub fn new() -> Self {
33        Self
34    }
35}
36
37impl LinkFormatter for MDLinkFormatter {
38    fn validate_link(&self, url: &str) -> Option<()> {
39        // url should be normalized at this point, and existing entities are decoded
40        static BAD_PROTO_RE : Lazy<Regex> = Lazy::new(||
41            Regex::new(r#"(?i)^(vbscript|javascript|file|data):"#).unwrap()
42        );
43
44        static GOOD_DATA_RE : Lazy<Regex> = Lazy::new(||
45            Regex::new(r#"(?i)^data:image/(gif|png|jpeg|webp);"#).unwrap()
46        );
47
48        if !BAD_PROTO_RE.is_match(url) || GOOD_DATA_RE.is_match(url) {
49            Some(())
50        } else {
51            None
52        }
53    }
54
55    fn normalize_link(&self, url: &str) -> String {
56        mdurl::urlencode::encode(url, mdurl::urlencode::ENCODE_DEFAULT_CHARS, true).into()
57    }
58
59    fn normalize_link_text(&self, url: &str) -> String {
60        url.to_owned()
61    }
62}
63
64
65#[cfg(test)]
66mod tests {
67    use super::LinkFormatter;
68    use super::MDLinkFormatter;
69
70    #[test]
71    fn should_allow_normal_urls() {
72        let fmt = MDLinkFormatter::new();
73        assert!(fmt.validate_link("http://example.org").is_some());
74        assert!(fmt.validate_link("HTTPS://example.org").is_some());
75    }
76
77    #[test]
78    fn should_allow_plain_text() {
79        let fmt = MDLinkFormatter::new();
80        assert!(fmt.validate_link("javascript").is_some());
81        assert!(fmt.validate_link("/javascript:link").is_some());
82    }
83
84    #[test]
85    fn should_not_allow_some_protocols() {
86        let fmt = MDLinkFormatter::new();
87        assert!(fmt.validate_link("javascript:alert(1)").is_none());
88        assert!(fmt.validate_link("JAVASCRIPT:alert(1)").is_none());
89        assert!(fmt.validate_link("vbscript:alert(1)").is_none());
90        assert!(fmt.validate_link("VbScript:alert(1)").is_none());
91        assert!(fmt.validate_link("file:///123").is_none());
92    }
93
94    #[test]
95    fn should_not_allow_data_url_except_whitelisted() {
96        let fmt = MDLinkFormatter::new();
97        assert!(fmt.validate_link("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7").is_some());
98        assert!(fmt.validate_link("data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K").is_none());
99    }
100}