rumdl_lib/utils/
mkdocs_html_markdown.rs1use regex::Regex;
12use std::sync::LazyLock;
13
14static MARKDOWN_HTML_OPEN: LazyLock<Regex> = LazyLock::new(|| {
21 Regex::new(
22 r#"(?i)^(\s*)<(div|section|article|aside|details|figure|footer|header|main|nav)\b[^>]*\bmarkdown\b[^>]*>"#,
23 )
24 .unwrap()
25});
26
27fn is_markdown_html_start(line: &str) -> bool {
29 MARKDOWN_HTML_OPEN.is_match(line)
30}
31
32fn get_tag_name(line: &str) -> Option<String> {
34 MARKDOWN_HTML_OPEN
35 .captures(line)
36 .map(|caps| caps.get(2).map(|m| m.as_str().to_lowercase()).unwrap_or_default())
37}
38
39#[derive(Debug, Default)]
41pub struct MarkdownHtmlTracker {
42 tag_stack: Vec<(String, usize)>,
44 depth: usize,
46}
47
48impl MarkdownHtmlTracker {
49 pub fn new() -> Self {
50 Self::default()
51 }
52
53 pub fn process_line(&mut self, line: &str) -> bool {
58 let trimmed = line.trim();
59
60 if is_markdown_html_start(line) {
62 if let Some(tag) = get_tag_name(line) {
63 self.depth += 1;
64 self.tag_stack.push((tag.clone(), self.depth));
65
66 if self.count_closes(line, &tag) > 0 {
68 self.depth -= 1;
69 self.tag_stack.pop();
70 }
71 }
72 return true;
73 }
74
75 if !self.tag_stack.is_empty() {
77 for (tag, _) in self.tag_stack.clone() {
79 let opens = self.count_opens(trimmed, &tag);
80 let closes = self.count_closes(trimmed, &tag);
81
82 self.depth += opens;
83
84 for _ in 0..closes {
85 if self.depth > 0 {
86 self.depth -= 1;
87 }
88 }
89 }
90
91 while let Some((_, start_depth)) = self.tag_stack.last() {
93 if self.depth < *start_depth {
94 self.tag_stack.pop();
95 } else {
96 break;
97 }
98 }
99
100 return true;
103 }
104
105 false
106 }
107
108 fn count_opens(&self, line: &str, tag: &str) -> usize {
110 let line_lower = line.to_lowercase();
111 let open_pattern = format!("<{}", tag.to_lowercase());
112 let mut count = 0;
113 let mut search_start = 0;
114
115 while let Some(pos) = line_lower[search_start..].find(&open_pattern) {
116 let abs_pos = search_start + pos;
117 let after_tag = abs_pos + open_pattern.len();
118
119 if after_tag >= line_lower.len()
121 || line_lower[after_tag..].starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/')
122 {
123 count += 1;
124 }
125 search_start = after_tag;
126 }
127 count
128 }
129
130 fn count_closes(&self, line: &str, tag: &str) -> usize {
132 let line_lower = line.to_lowercase();
133 let close_pattern = format!("</{}", tag.to_lowercase());
134 let mut count = 0;
135 let mut search_start = 0;
136
137 while let Some(pos) = line_lower[search_start..].find(&close_pattern) {
138 let abs_pos = search_start + pos;
139 let after_tag = abs_pos + close_pattern.len();
140
141 if let Some(rest) = line_lower.get(after_tag..)
143 && rest.trim_start().starts_with('>')
144 {
145 count += 1;
146 }
147 search_start = after_tag;
148 }
149 count
150 }
151
152 pub fn is_inside(&self) -> bool {
154 !self.tag_stack.is_empty()
155 }
156
157 pub fn reset(&mut self) {
159 self.tag_stack.clear();
160 self.depth = 0;
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn test_markdown_html_detection() {
170 assert!(is_markdown_html_start("<div markdown>"));
172 assert!(is_markdown_html_start("<div class=\"grid cards\" markdown>"));
173 assert!(is_markdown_html_start("<div markdown=\"1\">"));
174 assert!(is_markdown_html_start("<div markdown=\"block\">"));
175
176 assert!(is_markdown_html_start("<div markdown class=\"test\">"));
178 assert!(is_markdown_html_start("<div id=\"foo\" markdown>"));
179
180 assert!(is_markdown_html_start("<DIV markdown>"));
182 assert!(is_markdown_html_start("<Div Markdown>"));
183
184 assert!(is_markdown_html_start(" <div markdown>"));
186 assert!(is_markdown_html_start(" <div class=\"grid\" markdown>"));
187
188 assert!(is_markdown_html_start("<section markdown>"));
190 assert!(is_markdown_html_start("<article markdown>"));
191 assert!(is_markdown_html_start("<details markdown>"));
192
193 assert!(!is_markdown_html_start("<div class=\"test\">"));
195 assert!(!is_markdown_html_start("<span markdown>")); assert!(!is_markdown_html_start("text with markdown word"));
197 assert!(!is_markdown_html_start("<div>markdown</div>"));
198 }
199
200 #[test]
201 fn test_tracker_basic() {
202 let mut tracker = MarkdownHtmlTracker::new();
203
204 assert!(!tracker.is_inside());
205
206 assert!(tracker.process_line("<div class=\"grid cards\" markdown>"));
207 assert!(tracker.is_inside());
208
209 assert!(tracker.process_line("- Content here"));
210 assert!(tracker.is_inside());
211
212 assert!(tracker.process_line(" ---"));
213 assert!(tracker.is_inside());
214
215 tracker.process_line("</div>");
217 assert!(!tracker.is_inside());
218 }
219
220 #[test]
221 fn test_tracker_nested() {
222 let mut tracker = MarkdownHtmlTracker::new();
223
224 tracker.process_line("<div markdown>");
225 assert!(tracker.is_inside());
226
227 tracker.process_line("<div>nested</div>");
228 assert!(tracker.is_inside());
229
230 tracker.process_line("</div>");
231 assert!(!tracker.is_inside());
232 }
233
234 #[test]
235 fn test_grid_cards_pattern() {
236 let content = r#"<div class="grid cards" markdown>
237
238- :zap:{ .lg .middle } **Built for speed**
239
240 ---
241
242 Written in Rust.
243
244</div>"#;
245
246 let mut tracker = MarkdownHtmlTracker::new();
247 let mut inside_lines = Vec::new();
248
249 for (i, line) in content.lines().enumerate() {
250 let inside = tracker.process_line(line);
251 if inside {
252 inside_lines.push(i);
253 }
254 }
255
256 assert!(inside_lines.contains(&0)); assert!(inside_lines.contains(&2)); assert!(inside_lines.contains(&4)); assert!(inside_lines.contains(&6)); assert!(!tracker.is_inside()); }
263
264 #[test]
265 fn test_same_line_open_close() {
266 let mut tracker = MarkdownHtmlTracker::new();
267
268 let result = tracker.process_line("<div markdown>content</div>");
270 assert!(result); assert!(!tracker.is_inside()); }
273
274 #[test]
275 fn test_multiple_sequential_blocks() {
276 let mut tracker = MarkdownHtmlTracker::new();
277
278 assert!(tracker.process_line("<div markdown>"));
280 assert!(tracker.is_inside());
281 assert!(tracker.process_line("Content 1"));
282 tracker.process_line("</div>");
283 assert!(!tracker.is_inside());
284
285 assert!(tracker.process_line("<section markdown>"));
287 assert!(tracker.is_inside());
288 assert!(tracker.process_line("Content 2"));
289 tracker.process_line("</section>");
290 assert!(!tracker.is_inside());
291 }
292
293 #[test]
294 fn test_deeply_nested_same_tag() {
295 let mut tracker = MarkdownHtmlTracker::new();
296
297 assert!(tracker.process_line("<div markdown>"));
298 assert!(tracker.is_inside());
299
300 assert!(tracker.process_line("<div class=\"inner\">"));
302 assert!(tracker.is_inside());
303
304 assert!(tracker.process_line("</div>"));
306 assert!(tracker.is_inside()); tracker.process_line("</div>");
310 assert!(!tracker.is_inside());
311 }
312
313 #[test]
314 fn test_deeply_nested_different_tags() {
315 let mut tracker = MarkdownHtmlTracker::new();
316
317 assert!(tracker.process_line("<article markdown>"));
318 assert!(tracker.is_inside());
319
320 assert!(tracker.process_line("<section>"));
322 assert!(tracker.is_inside());
323
324 assert!(tracker.process_line("</section>"));
326 assert!(tracker.is_inside());
327
328 tracker.process_line("</article>");
330 assert!(!tracker.is_inside());
331 }
332
333 #[test]
334 fn test_multiple_closes_same_line() {
335 let mut tracker = MarkdownHtmlTracker::new();
336
337 assert!(tracker.process_line("<div markdown>"));
338 assert!(tracker.process_line("<div>inner</div></div>"));
339 assert!(!tracker.is_inside());
340 }
341
342 #[test]
343 fn test_count_opens_boundary_check() {
344 let tracker = MarkdownHtmlTracker::new();
345
346 assert_eq!(tracker.count_opens("<div>", "div"), 1);
348 assert_eq!(tracker.count_opens("<div class='x'>", "div"), 1);
349 assert_eq!(tracker.count_opens("<DIV>", "div"), 1);
350 assert_eq!(tracker.count_opens("<div/><div>", "div"), 2);
351
352 assert_eq!(tracker.count_opens("<divider>", "div"), 0);
354 assert_eq!(tracker.count_opens("<dividend>", "div"), 0);
355 }
356
357 #[test]
358 fn test_count_closes_variations() {
359 let tracker = MarkdownHtmlTracker::new();
360
361 assert_eq!(tracker.count_closes("</div>", "div"), 1);
362 assert_eq!(tracker.count_closes("</DIV>", "div"), 1);
363 assert_eq!(tracker.count_closes("</div >", "div"), 1);
364 assert_eq!(tracker.count_closes("</div >", "div"), 1);
365 assert_eq!(tracker.count_closes("</div></div>", "div"), 2);
366 assert_eq!(tracker.count_closes("text</div>more</div>end", "div"), 2);
367 }
368
369 #[test]
370 fn test_reset() {
371 let mut tracker = MarkdownHtmlTracker::new();
372
373 tracker.process_line("<div markdown>");
374 assert!(tracker.is_inside());
375
376 tracker.reset();
377 assert!(!tracker.is_inside());
378
379 tracker.process_line("<section markdown>");
381 assert!(tracker.is_inside());
382 }
383}