markdown_that/plugins/html/
html_block.rs1use regex::Regex;
6use std::sync::LazyLock;
7
8use super::utils::blocks::*;
9use super::utils::regexps::*;
10use crate::parser::block::{BlockRule, BlockState};
11use crate::{MarkdownThat, Node, NodeValue, Renderer};
12
13#[derive(Debug)]
14pub struct HtmlBlock {
15 pub content: String,
16}
17
18impl NodeValue for HtmlBlock {
19 fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
20 fmt.cr();
21 fmt.text_raw(&self.content);
22 fmt.cr();
23 }
24}
25
26pub fn add(md: &mut MarkdownThat) {
27 md.block.add_rule::<HtmlBlockScanner>();
28}
29
30struct HTMLSequence {
31 open: Regex,
32 close: Regex,
33 can_terminate_paragraph: bool,
34}
35
36impl HTMLSequence {
37 pub fn new(open: Regex, close: Regex, can_terminate_paragraph: bool) -> Self {
38 Self {
39 open,
40 close,
41 can_terminate_paragraph,
42 }
43 }
44}
45
46static HTML_SEQUENCES: LazyLock<[HTMLSequence; 7]> = LazyLock::new(|| {
50 let block_names = HTML_BLOCKS.join("|");
51 let open_close_tag_re = HTML_OPEN_CLOSE_TAG_RE.as_str();
52
53 [
54 HTMLSequence::new(
55 Regex::new(r#"(?i)^<(script|pre|style|textarea)(\s|>|$)"#).unwrap(),
56 Regex::new(r#"(?i)</(script|pre|style|textarea)>"#).unwrap(),
57 true,
58 ),
59 HTMLSequence::new(
60 Regex::new(r#"^<!--"#).unwrap(),
61 Regex::new(r#"-->"#).unwrap(),
62 true,
63 ),
64 HTMLSequence::new(
65 Regex::new(r#"^<\?"#).unwrap(),
66 Regex::new(r#"\?>"#).unwrap(),
67 true,
68 ),
69 HTMLSequence::new(
70 Regex::new(r#"^<![A-Z]"#).unwrap(),
71 Regex::new(r#">"#).unwrap(),
72 true,
73 ),
74 HTMLSequence::new(
75 Regex::new(r#"^<!\[CDATA\["#).unwrap(),
76 Regex::new(r#"\]\]>"#).unwrap(),
77 true,
78 ),
79 HTMLSequence::new(
80 Regex::new(&format!("(?i)^</?({block_names})(\\s|/?>|$)")).unwrap(),
81 Regex::new(r#"^$"#).unwrap(),
82 true,
83 ),
84 HTMLSequence::new(
85 Regex::new(&format!("{open_close_tag_re}\\s*$")).unwrap(),
86 Regex::new(r#"^$"#).unwrap(),
87 false,
88 ),
89 ]
90});
91
92#[doc(hidden)]
93pub struct HtmlBlockScanner;
94
95impl HtmlBlockScanner {
96 fn get_sequence(state: &mut BlockState) -> Option<&'static HTMLSequence> {
97 if state.line_indent(state.line) >= state.md.max_indent {
98 return None;
99 }
100
101 let line_text = state.get_line(state.line);
102 let Some('<') = line_text.chars().next() else {
103 return None;
104 };
105
106 let mut sequence = None;
107 for seq in HTML_SEQUENCES.iter() {
108 if seq.open.is_match(line_text) {
109 sequence = Some(seq);
110 break;
111 }
112 }
113
114 sequence
115 }
116}
117
118impl BlockRule for HtmlBlockScanner {
119 fn check(state: &mut BlockState) -> Option<()> {
120 let sequence = Self::get_sequence(state)?;
121 if !sequence.can_terminate_paragraph {
122 return None;
123 }
124 Some(())
125 }
126
127 fn run(state: &mut BlockState) -> Option<(Node, usize)> {
128 let sequence = Self::get_sequence(state)?;
129
130 let line_text = state.get_line(state.line);
131 let start_line = state.line;
132 let mut next_line = state.line + 1;
133
134 if !sequence.close.is_match(line_text) {
137 while next_line < state.line_max {
138 if state.line_indent(next_line) < 0 {
139 break;
140 }
141
142 let line_text = state.get_line(next_line);
143
144 if sequence.close.is_match(line_text) {
145 if !line_text.is_empty() {
146 next_line += 1;
147 }
148 break;
149 }
150
151 next_line += 1;
152 }
153 }
154
155 let (content, _) = state.get_lines(start_line, next_line, state.blk_indent, true);
156 let node = Node::new(HtmlBlock { content });
157 Some((node, next_line - state.line))
158 }
159}