markdown_that/plugins/html/
html_block.rs

1//! HTML block syntax from CommonMark
2//!
3//! <https://spec.commonmark.org/0.30/#html-blocks>
4
5use regex::Regex;
6use std::sync::LazyLock;
7
8use super::utils::blocks::*;
9use super::utils::regexps::*;
10use crate::parser::block::{BlockRule, BlockState};
11use crate::{MarkdownThat, Node, NodeValue, Renderer};
12
13#[derive(Debug)]
14pub struct HtmlBlock {
15    pub content: String,
16}
17
18impl NodeValue for HtmlBlock {
19    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
20        fmt.cr();
21        fmt.text_raw(&self.content);
22        fmt.cr();
23    }
24}
25
26pub fn add(md: &mut MarkdownThat) {
27    md.block.add_rule::<HtmlBlockScanner>();
28}
29
30struct HTMLSequence {
31    open: Regex,
32    close: Regex,
33    can_terminate_paragraph: bool,
34}
35
36impl HTMLSequence {
37    pub fn new(open: Regex, close: Regex, can_terminate_paragraph: bool) -> Self {
38        Self {
39            open,
40            close,
41            can_terminate_paragraph,
42        }
43    }
44}
45
46// An array of opening and corresponding closing sequences for html tags,
47// last argument defines whether it can terminate a paragraph or not
48//
49static HTML_SEQUENCES: LazyLock<[HTMLSequence; 7]> = LazyLock::new(|| {
50    let block_names = HTML_BLOCKS.join("|");
51    let open_close_tag_re = HTML_OPEN_CLOSE_TAG_RE.as_str();
52
53    [
54        HTMLSequence::new(
55            Regex::new(r#"(?i)^<(script|pre|style|textarea)(\s|>|$)"#).unwrap(),
56            Regex::new(r#"(?i)</(script|pre|style|textarea)>"#).unwrap(),
57            true,
58        ),
59        HTMLSequence::new(
60            Regex::new(r#"^<!--"#).unwrap(),
61            Regex::new(r#"-->"#).unwrap(),
62            true,
63        ),
64        HTMLSequence::new(
65            Regex::new(r#"^<\?"#).unwrap(),
66            Regex::new(r#"\?>"#).unwrap(),
67            true,
68        ),
69        HTMLSequence::new(
70            Regex::new(r#"^<![A-Z]"#).unwrap(),
71            Regex::new(r#">"#).unwrap(),
72            true,
73        ),
74        HTMLSequence::new(
75            Regex::new(r#"^<!\[CDATA\["#).unwrap(),
76            Regex::new(r#"\]\]>"#).unwrap(),
77            true,
78        ),
79        HTMLSequence::new(
80            Regex::new(&format!("(?i)^</?({block_names})(\\s|/?>|$)")).unwrap(),
81            Regex::new(r#"^$"#).unwrap(),
82            true,
83        ),
84        HTMLSequence::new(
85            Regex::new(&format!("{open_close_tag_re}\\s*$")).unwrap(),
86            Regex::new(r#"^$"#).unwrap(),
87            false,
88        ),
89    ]
90});
91
92#[doc(hidden)]
93pub struct HtmlBlockScanner;
94
95impl HtmlBlockScanner {
96    fn get_sequence(state: &mut BlockState) -> Option<&'static HTMLSequence> {
97        if state.line_indent(state.line) >= state.md.max_indent {
98            return None;
99        }
100
101        let line_text = state.get_line(state.line);
102        let Some('<') = line_text.chars().next() else {
103            return None;
104        };
105
106        let mut sequence = None;
107        for seq in HTML_SEQUENCES.iter() {
108            if seq.open.is_match(line_text) {
109                sequence = Some(seq);
110                break;
111            }
112        }
113
114        sequence
115    }
116}
117
118impl BlockRule for HtmlBlockScanner {
119    fn check(state: &mut BlockState) -> Option<()> {
120        let sequence = Self::get_sequence(state)?;
121        if !sequence.can_terminate_paragraph {
122            return None;
123        }
124        Some(())
125    }
126
127    fn run(state: &mut BlockState) -> Option<(Node, usize)> {
128        let sequence = Self::get_sequence(state)?;
129
130        let line_text = state.get_line(state.line);
131        let start_line = state.line;
132        let mut next_line = state.line + 1;
133
134        // If we are here - we detected HTML block.
135        // Let's roll down till block end.
136        if !sequence.close.is_match(line_text) {
137            while next_line < state.line_max {
138                if state.line_indent(next_line) < 0 {
139                    break;
140                }
141
142                let line_text = state.get_line(next_line);
143
144                if sequence.close.is_match(line_text) {
145                    if !line_text.is_empty() {
146                        next_line += 1;
147                    }
148                    break;
149                }
150
151                next_line += 1;
152            }
153        }
154
155        let (content, _) = state.get_lines(start_line, next_line, state.blk_indent, true);
156        let node = Node::new(HtmlBlock { content });
157        Some((node, next_line - state.line))
158    }
159}