markdown_it/plugins/html/
html_block.rs

1//! HTML block syntax from CommonMark
2//!
3//! <https://spec.commonmark.org/0.30/#html-blocks>
4use once_cell::sync::Lazy;
5use regex::Regex;
6
7use super::utils::blocks::*;
8use super::utils::regexps::*;
9use crate::parser::block::{BlockRule, BlockState};
10use crate::{MarkdownIt, Node, NodeValue, Renderer};
11
12#[derive(Debug)]
13pub struct HtmlBlock {
14    pub content: String,
15}
16
17impl NodeValue for HtmlBlock {
18    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
19        fmt.cr();
20        fmt.text_raw(&self.content);
21        fmt.cr();
22    }
23}
24
25pub fn add(md: &mut MarkdownIt) {
26    md.block.add_rule::<HtmlBlockScanner>();
27}
28
29struct HTMLSequence {
30    open: Regex,
31    close: Regex,
32    can_terminate_paragraph: bool,
33}
34
35impl HTMLSequence {
36    pub fn new(open: Regex, close: Regex, can_terminate_paragraph: bool) -> Self {
37        Self { open, close, can_terminate_paragraph }
38    }
39}
40
41// An array of opening and corresponding closing sequences for html tags,
42// last argument defines whether it can terminate a paragraph or not
43//
44static HTML_SEQUENCES : Lazy<[HTMLSequence; 7]> = Lazy::new(|| {
45    let block_names = HTML_BLOCKS.join("|");
46    let open_close_tag_re = HTML_OPEN_CLOSE_TAG_RE.as_str();
47
48    [
49        HTMLSequence::new(
50            Regex::new(r#"(?i)^<(script|pre|style|textarea)(\s|>|$)"#).unwrap(),
51            Regex::new(r#"(?i)</(script|pre|style|textarea)>"#).unwrap(),
52            true
53        ),
54
55        HTMLSequence::new(
56            Regex::new(r#"^<!--"#).unwrap(),
57            Regex::new(r#"-->"#).unwrap(),
58            true
59        ),
60
61        HTMLSequence::new(
62            Regex::new(r#"^<\?"#).unwrap(),
63            Regex::new(r#"\?>"#).unwrap(),
64            true
65        ),
66
67        HTMLSequence::new(
68            Regex::new(r#"^<![A-Z]"#).unwrap(),
69            Regex::new(r#">"#).unwrap(),
70            true
71        ),
72
73        HTMLSequence::new(
74            Regex::new(r#"^<!\[CDATA\["#).unwrap(),
75            Regex::new(r#"\]\]>"#).unwrap(),
76            true
77        ),
78
79        HTMLSequence::new(
80            Regex::new(&format!("(?i)^</?({block_names})(\\s|/?>|$)")).unwrap(),
81            Regex::new(r#"^$"#).unwrap(),
82            true
83        ),
84
85        HTMLSequence::new(
86            Regex::new(&format!("{open_close_tag_re}\\s*$")).unwrap(),
87            Regex::new(r#"^$"#).unwrap(),
88            false
89        ),
90    ]
91});
92
93#[doc(hidden)]
94pub struct HtmlBlockScanner;
95
96impl HtmlBlockScanner {
97    fn get_sequence(state: &mut BlockState) -> Option<&'static HTMLSequence> {
98
99        if state.line_indent(state.line) >= state.md.max_indent { return None; }
100
101        let line_text = state.get_line(state.line);
102        let Some('<') = line_text.chars().next() else { return None; };
103
104        let mut sequence = None;
105        for seq in HTML_SEQUENCES.iter() {
106            if seq.open.is_match(line_text) {
107                sequence = Some(seq);
108                break;
109            }
110        }
111
112        sequence
113    }
114}
115
116impl BlockRule for HtmlBlockScanner {
117    fn check(state: &mut BlockState) -> Option<()> {
118        let sequence = Self::get_sequence(state)?;
119        if !sequence.can_terminate_paragraph { return None; }
120        Some(())
121    }
122
123    fn run(state: &mut BlockState) -> Option<(Node, usize)> {
124        let sequence = Self::get_sequence(state)?;
125
126        let line_text = state.get_line(state.line);
127        let start_line = state.line;
128        let mut next_line = state.line + 1;
129
130        // If we are here - we detected HTML block.
131        // Let's roll down till block end.
132        if !sequence.close.is_match(line_text) {
133            while next_line < state.line_max {
134                if state.line_indent(next_line) < 0 { break; }
135
136                let line_text = state.get_line(next_line);
137
138                if sequence.close.is_match(line_text) {
139                    if !line_text.is_empty() { next_line += 1; }
140                    break;
141                }
142
143                next_line += 1;
144            }
145        }
146
147        let (content, _) = state.get_lines(start_line, next_line, state.blk_indent, true);
148        let node = Node::new(HtmlBlock { content });
149        Some((node, next_line - state.line))
150    }
151}