rumdl_lib/utils/
mkdocs_html_markdown.rs1use regex::Regex;
12use std::sync::LazyLock;
13
14static MARKDOWN_HTML_OPEN: LazyLock<Regex> = LazyLock::new(|| {
21 Regex::new(
22 r#"(?i)^(\s*)<(div|section|article|aside|details|figure|footer|header|main|nav)\b[^>]*\bmarkdown\b[^>]*>"#,
23 )
24 .unwrap()
25});
26
27fn is_markdown_html_start(line: &str) -> bool {
29 MARKDOWN_HTML_OPEN.is_match(line)
30}
31
32fn get_tag_name(line: &str) -> Option<String> {
34 MARKDOWN_HTML_OPEN
35 .captures(line)
36 .map(|caps| caps.get(2).map(|m| m.as_str().to_lowercase()).unwrap_or_default())
37}
38
39#[derive(Debug, Default)]
41pub struct MarkdownHtmlTracker {
42 tag_stack: Vec<(String, usize)>,
44 depth: usize,
46}
47
48impl MarkdownHtmlTracker {
49 pub fn new() -> Self {
50 Self::default()
51 }
52
53 pub fn process_line(&mut self, line: &str) -> bool {
58 let trimmed = line.trim();
59
60 if is_markdown_html_start(line) {
62 if let Some(tag) = get_tag_name(line) {
63 self.depth += 1;
64 self.tag_stack.push((tag.clone(), self.depth));
65
66 let line_lower = line.to_lowercase();
68 if Self::count_closes_lowered(&line_lower, &tag) > 0 {
69 self.depth -= 1;
70 self.tag_stack.pop();
71 }
72 }
73 return true;
74 }
75
76 if !self.tag_stack.is_empty() {
78 let line_lower = trimmed.to_lowercase();
80
81 let tags: Vec<String> = self.tag_stack.iter().map(|(tag, _)| tag.clone()).collect();
83 for tag in &tags {
84 let opens = Self::count_opens_lowered(&line_lower, tag);
85 let closes = Self::count_closes_lowered(&line_lower, tag);
86
87 self.depth += opens;
88
89 for _ in 0..closes {
90 if self.depth > 0 {
91 self.depth -= 1;
92 }
93 }
94 }
95
96 while let Some((_, start_depth)) = self.tag_stack.last() {
98 if self.depth < *start_depth {
99 self.tag_stack.pop();
100 } else {
101 break;
102 }
103 }
104
105 return true;
108 }
109
110 false
111 }
112
113 fn count_opens_lowered(line_lower: &str, tag: &str) -> usize {
116 let open_pattern = format!("<{tag}");
117 let mut count = 0;
118 let mut search_start = 0;
119
120 while let Some(pos) = line_lower[search_start..].find(&open_pattern) {
121 let abs_pos = search_start + pos;
122 let after_tag = abs_pos + open_pattern.len();
123
124 if after_tag >= line_lower.len()
126 || line_lower[after_tag..].starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/')
127 {
128 count += 1;
129 }
130 search_start = after_tag;
131 }
132 count
133 }
134
135 fn count_closes_lowered(line_lower: &str, tag: &str) -> usize {
138 let close_pattern = format!("</{tag}");
139 let mut count = 0;
140 let mut search_start = 0;
141
142 while let Some(pos) = line_lower[search_start..].find(&close_pattern) {
143 let abs_pos = search_start + pos;
144 let after_tag = abs_pos + close_pattern.len();
145
146 if let Some(rest) = line_lower.get(after_tag..)
148 && rest.trim_start().starts_with('>')
149 {
150 count += 1;
151 }
152 search_start = after_tag;
153 }
154 count
155 }
156
157 pub fn is_inside(&self) -> bool {
159 !self.tag_stack.is_empty()
160 }
161
162 pub fn reset(&mut self) {
164 self.tag_stack.clear();
165 self.depth = 0;
166 }
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172
173 #[test]
174 fn test_markdown_html_detection() {
175 assert!(is_markdown_html_start("<div markdown>"));
177 assert!(is_markdown_html_start("<div class=\"grid cards\" markdown>"));
178 assert!(is_markdown_html_start("<div markdown=\"1\">"));
179 assert!(is_markdown_html_start("<div markdown=\"block\">"));
180
181 assert!(is_markdown_html_start("<div markdown class=\"test\">"));
183 assert!(is_markdown_html_start("<div id=\"foo\" markdown>"));
184
185 assert!(is_markdown_html_start("<DIV markdown>"));
187 assert!(is_markdown_html_start("<Div Markdown>"));
188
189 assert!(is_markdown_html_start(" <div markdown>"));
191 assert!(is_markdown_html_start(" <div class=\"grid\" markdown>"));
192
193 assert!(is_markdown_html_start("<section markdown>"));
195 assert!(is_markdown_html_start("<article markdown>"));
196 assert!(is_markdown_html_start("<details markdown>"));
197
198 assert!(!is_markdown_html_start("<div class=\"test\">"));
200 assert!(!is_markdown_html_start("<span markdown>")); assert!(!is_markdown_html_start("text with markdown word"));
202 assert!(!is_markdown_html_start("<div>markdown</div>"));
203 }
204
205 #[test]
206 fn test_tracker_basic() {
207 let mut tracker = MarkdownHtmlTracker::new();
208
209 assert!(!tracker.is_inside());
210
211 assert!(tracker.process_line("<div class=\"grid cards\" markdown>"));
212 assert!(tracker.is_inside());
213
214 assert!(tracker.process_line("- Content here"));
215 assert!(tracker.is_inside());
216
217 assert!(tracker.process_line(" ---"));
218 assert!(tracker.is_inside());
219
220 tracker.process_line("</div>");
222 assert!(!tracker.is_inside());
223 }
224
225 #[test]
226 fn test_tracker_nested() {
227 let mut tracker = MarkdownHtmlTracker::new();
228
229 tracker.process_line("<div markdown>");
230 assert!(tracker.is_inside());
231
232 tracker.process_line("<div>nested</div>");
233 assert!(tracker.is_inside());
234
235 tracker.process_line("</div>");
236 assert!(!tracker.is_inside());
237 }
238
239 #[test]
240 fn test_grid_cards_pattern() {
241 let content = r#"<div class="grid cards" markdown>
242
243- :zap:{ .lg .middle } **Built for speed**
244
245 ---
246
247 Written in Rust.
248
249</div>"#;
250
251 let mut tracker = MarkdownHtmlTracker::new();
252 let mut inside_lines = Vec::new();
253
254 for (i, line) in content.lines().enumerate() {
255 let inside = tracker.process_line(line);
256 if inside {
257 inside_lines.push(i);
258 }
259 }
260
261 assert!(inside_lines.contains(&0)); assert!(inside_lines.contains(&2)); assert!(inside_lines.contains(&4)); assert!(inside_lines.contains(&6)); assert!(!tracker.is_inside()); }
268
269 #[test]
270 fn test_same_line_open_close() {
271 let mut tracker = MarkdownHtmlTracker::new();
272
273 let result = tracker.process_line("<div markdown>content</div>");
275 assert!(result); assert!(!tracker.is_inside()); }
278
279 #[test]
280 fn test_multiple_sequential_blocks() {
281 let mut tracker = MarkdownHtmlTracker::new();
282
283 assert!(tracker.process_line("<div markdown>"));
285 assert!(tracker.is_inside());
286 assert!(tracker.process_line("Content 1"));
287 tracker.process_line("</div>");
288 assert!(!tracker.is_inside());
289
290 assert!(tracker.process_line("<section markdown>"));
292 assert!(tracker.is_inside());
293 assert!(tracker.process_line("Content 2"));
294 tracker.process_line("</section>");
295 assert!(!tracker.is_inside());
296 }
297
298 #[test]
299 fn test_deeply_nested_same_tag() {
300 let mut tracker = MarkdownHtmlTracker::new();
301
302 assert!(tracker.process_line("<div markdown>"));
303 assert!(tracker.is_inside());
304
305 assert!(tracker.process_line("<div class=\"inner\">"));
307 assert!(tracker.is_inside());
308
309 assert!(tracker.process_line("</div>"));
311 assert!(tracker.is_inside()); tracker.process_line("</div>");
315 assert!(!tracker.is_inside());
316 }
317
318 #[test]
319 fn test_deeply_nested_different_tags() {
320 let mut tracker = MarkdownHtmlTracker::new();
321
322 assert!(tracker.process_line("<article markdown>"));
323 assert!(tracker.is_inside());
324
325 assert!(tracker.process_line("<section>"));
327 assert!(tracker.is_inside());
328
329 assert!(tracker.process_line("</section>"));
331 assert!(tracker.is_inside());
332
333 tracker.process_line("</article>");
335 assert!(!tracker.is_inside());
336 }
337
338 #[test]
339 fn test_multiple_closes_same_line() {
340 let mut tracker = MarkdownHtmlTracker::new();
341
342 assert!(tracker.process_line("<div markdown>"));
343 assert!(tracker.process_line("<div>inner</div></div>"));
344 assert!(!tracker.is_inside());
345 }
346
347 #[test]
348 fn test_count_opens_boundary_check() {
349 assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<div>", "div"), 1);
351 assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<div class='x'>", "div"), 1);
352 assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<div>", "div"), 1);
353 assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<div/><div>", "div"), 2);
354
355 assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<divider>", "div"), 0);
357 assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<dividend>", "div"), 0);
358
359 assert_eq!(
361 MarkdownHtmlTracker::count_opens_lowered(&"<DIV>".to_lowercase(), "div"),
362 1
363 );
364 }
365
366 #[test]
367 fn test_count_closes_variations() {
368 assert_eq!(MarkdownHtmlTracker::count_closes_lowered("</div>", "div"), 1);
370 assert_eq!(
371 MarkdownHtmlTracker::count_closes_lowered(&"</DIV>".to_lowercase(), "div"),
372 1
373 );
374 assert_eq!(MarkdownHtmlTracker::count_closes_lowered("</div >", "div"), 1);
375 assert_eq!(MarkdownHtmlTracker::count_closes_lowered("</div >", "div"), 1);
376 assert_eq!(MarkdownHtmlTracker::count_closes_lowered("</div></div>", "div"), 2);
377 assert_eq!(
378 MarkdownHtmlTracker::count_closes_lowered("text</div>more</div>end", "div"),
379 2
380 );
381 }
382
383 #[test]
384 fn test_reset() {
385 let mut tracker = MarkdownHtmlTracker::new();
386
387 tracker.process_line("<div markdown>");
388 assert!(tracker.is_inside());
389
390 tracker.reset();
391 assert!(!tracker.is_inside());
392
393 tracker.process_line("<section markdown>");
395 assert!(tracker.is_inside());
396 }
397}