1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use derivative::Derivative;
use mdurl::AsciiSet;
use once_cell::sync::Lazy;
use regex::Regex;
use crate::Node;
use crate::common::TypeKey;
use crate::common::ruler::Ruler;
use crate::common::sourcemap::SourcePos;
use crate::parser::block::{self, BlockParser};
use crate::parser::inline::{self, InlineParser};
use crate::parser::extset::MarkdownItExtSet;
use crate::parser::core::Root;
use crate::parser::core::*;

type RuleFn = fn (&mut Node, &MarkdownIt);

#[derive(Derivative)]
#[derivative(Debug)]
/// Main parser struct, created once and reused for parsing multiple documents.
pub struct MarkdownIt {
    /// Block-level tokenizer.
    pub block: BlockParser,

    /// Inline-level tokenizer.
    pub inline: InlineParser,

    #[doc(hidden)]
    #[derivative(Debug="ignore")]
    // TODO: move this somewhere
    pub validate_link: fn (&str) -> bool,

    #[doc(hidden)]
    #[derivative(Debug="ignore")]
    // TODO: move this somewhere
    pub normalize_link: fn (&str) -> String,

    #[doc(hidden)]
    #[derivative(Debug="ignore")]
    // TODO: move this somewhere
    pub normalize_link_text: fn (&str) -> String,

    /// Storage for custom data used in plugins.
    pub ext: MarkdownItExtSet,

    /// Maximum depth of the generated AST, exists to prevent recursion
    /// (if markdown source reaches this depth, deeply nested structures
    /// will be parsed as plain text).
    pub max_nesting: u32,

    ruler: Ruler<TypeKey, RuleFn>,
}

////////////////////////////////////////////////////////////////////////////////
// This validator can prohibit more than really needed to prevent XSS. It's a
// tradeoff to keep code simple and to be secure by default.
//
// If you need different setup - override validator method as you wish. Or
// replace it with dummy function and use external sanitizer.
//
static BAD_PROTO_RE : Lazy<Regex> = Lazy::new(||
    Regex::new(r#"(?i)^(vbscript|javascript|file|data):"#).unwrap()
);

static GOOD_DATA_RE : Lazy<Regex> = Lazy::new(||
    Regex::new(r#"(?i)^data:image/(gif|png|jpeg|webp);"#).unwrap()
);

fn validate_link(str: &str) -> bool {
    !BAD_PROTO_RE.is_match(str) || GOOD_DATA_RE.is_match(str)
}

fn normalize_link(str: &str) -> String {
    const ASCII : AsciiSet = AsciiSet::from(r#";/?:@&=+$,-_.!~*'()#"#);
    mdurl::encode(str, ASCII, true)
}

fn normalize_link_text(str: &str) -> String {
    str.to_owned()
}

impl MarkdownIt {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn parse(&self, src: &str) -> Node {
        let mut node = Node::new(Root::new(src.to_owned()));
        node.srcmap = Some(SourcePos::new(0, src.len()));

        for rule in self.ruler.iter() {
            rule(&mut node, self);
            debug_assert!(node.is::<Root>(), "root node of the AST must always be Root");
        }
        node
    }

    pub fn add_rule<T: CoreRule>(&mut self) -> RuleBuilder<RuleFn> {
        let item = self.ruler.add(TypeKey::of::<T>(), T::run);
        RuleBuilder::new(item)
    }

    pub fn has_rule<T: CoreRule>(&mut self) -> bool {
        self.ruler.contains(TypeKey::of::<T>())
    }

    pub fn remove_rule<T: CoreRule>(&mut self) {
        self.ruler.remove(TypeKey::of::<T>());
    }
}

impl Default for MarkdownIt {
    fn default() -> Self {
        let mut md = Self {
            block: BlockParser::new(),
            inline: InlineParser::new(),
            validate_link,
            normalize_link,
            normalize_link_text,
            ext: MarkdownItExtSet::new(),
            max_nesting: 100,
            ruler: Ruler::new(),
        };
        block::builtin::add(&mut md);
        inline::builtin::add(&mut md);
        md
    }
}