use regex::Regex;
use std::sync::LazyLock;
static MARKDOWN_HTML_OPEN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r#"(?i)^(\s*)<(div|section|article|aside|details|figure|footer|header|main|nav)\b[^>]*\bmarkdown\b[^>]*>"#,
)
.unwrap()
});
fn is_markdown_html_start(line: &str) -> bool {
MARKDOWN_HTML_OPEN.is_match(line)
}
fn get_tag_name(line: &str) -> Option<String> {
MARKDOWN_HTML_OPEN
.captures(line)
.map(|caps| caps.get(2).map(|m| m.as_str().to_lowercase()).unwrap_or_default())
}
#[derive(Debug, Default)]
pub struct MarkdownHtmlTracker {
tag_stack: Vec<(String, usize)>,
depth: usize,
}
impl MarkdownHtmlTracker {
pub fn new() -> Self {
Self::default()
}
pub fn process_line(&mut self, line: &str) -> bool {
let trimmed = line.trim();
if is_markdown_html_start(line) {
if let Some(tag) = get_tag_name(line) {
self.depth += 1;
self.tag_stack.push((tag.clone(), self.depth));
let line_lower = line.to_lowercase();
if Self::count_closes_lowered(&line_lower, &tag) > 0 {
self.depth -= 1;
self.tag_stack.pop();
}
}
return true;
}
if !self.tag_stack.is_empty() {
let line_lower = trimmed.to_lowercase();
let tags: Vec<String> = self.tag_stack.iter().map(|(tag, _)| tag.clone()).collect();
for tag in &tags {
let opens = Self::count_opens_lowered(&line_lower, tag);
let closes = Self::count_closes_lowered(&line_lower, tag);
self.depth += opens;
for _ in 0..closes {
if self.depth > 0 {
self.depth -= 1;
}
}
}
while let Some((_, start_depth)) = self.tag_stack.last() {
if self.depth < *start_depth {
self.tag_stack.pop();
} else {
break;
}
}
return true;
}
false
}
fn count_opens_lowered(line_lower: &str, tag: &str) -> usize {
let open_pattern = format!("<{tag}");
let mut count = 0;
let mut search_start = 0;
while let Some(pos) = line_lower[search_start..].find(&open_pattern) {
let abs_pos = search_start + pos;
let after_tag = abs_pos + open_pattern.len();
if after_tag >= line_lower.len()
|| line_lower[after_tag..].starts_with(|c: char| c.is_whitespace() || c == '>' || c == '/')
{
count += 1;
}
search_start = after_tag;
}
count
}
fn count_closes_lowered(line_lower: &str, tag: &str) -> usize {
let close_pattern = format!("</{tag}");
let mut count = 0;
let mut search_start = 0;
while let Some(pos) = line_lower[search_start..].find(&close_pattern) {
let abs_pos = search_start + pos;
let after_tag = abs_pos + close_pattern.len();
if let Some(rest) = line_lower.get(after_tag..)
&& rest.trim_start().starts_with('>')
{
count += 1;
}
search_start = after_tag;
}
count
}
pub fn is_inside(&self) -> bool {
!self.tag_stack.is_empty()
}
pub fn reset(&mut self) {
self.tag_stack.clear();
self.depth = 0;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_markdown_html_detection() {
assert!(is_markdown_html_start("<div markdown>"));
assert!(is_markdown_html_start("<div class=\"grid cards\" markdown>"));
assert!(is_markdown_html_start("<div markdown=\"1\">"));
assert!(is_markdown_html_start("<div markdown=\"block\">"));
assert!(is_markdown_html_start("<div markdown class=\"test\">"));
assert!(is_markdown_html_start("<div id=\"foo\" markdown>"));
assert!(is_markdown_html_start("<DIV markdown>"));
assert!(is_markdown_html_start("<Div Markdown>"));
assert!(is_markdown_html_start(" <div markdown>"));
assert!(is_markdown_html_start(" <div class=\"grid\" markdown>"));
assert!(is_markdown_html_start("<section markdown>"));
assert!(is_markdown_html_start("<article markdown>"));
assert!(is_markdown_html_start("<details markdown>"));
assert!(!is_markdown_html_start("<div class=\"test\">"));
assert!(!is_markdown_html_start("<span markdown>")); assert!(!is_markdown_html_start("text with markdown word"));
assert!(!is_markdown_html_start("<div>markdown</div>"));
}
#[test]
fn test_tracker_basic() {
let mut tracker = MarkdownHtmlTracker::new();
assert!(!tracker.is_inside());
assert!(tracker.process_line("<div class=\"grid cards\" markdown>"));
assert!(tracker.is_inside());
assert!(tracker.process_line("- Content here"));
assert!(tracker.is_inside());
assert!(tracker.process_line(" ---"));
assert!(tracker.is_inside());
tracker.process_line("</div>");
assert!(!tracker.is_inside());
}
#[test]
fn test_tracker_nested() {
let mut tracker = MarkdownHtmlTracker::new();
tracker.process_line("<div markdown>");
assert!(tracker.is_inside());
tracker.process_line("<div>nested</div>");
assert!(tracker.is_inside());
tracker.process_line("</div>");
assert!(!tracker.is_inside());
}
#[test]
fn test_grid_cards_pattern() {
let content = r#"<div class="grid cards" markdown>
- :zap:{ .lg .middle } **Built for speed**
---
Written in Rust.
</div>"#;
let mut tracker = MarkdownHtmlTracker::new();
let mut inside_lines = Vec::new();
for (i, line) in content.lines().enumerate() {
let inside = tracker.process_line(line);
if inside {
inside_lines.push(i);
}
}
assert!(inside_lines.contains(&0)); assert!(inside_lines.contains(&2)); assert!(inside_lines.contains(&4)); assert!(inside_lines.contains(&6)); assert!(!tracker.is_inside()); }
#[test]
fn test_same_line_open_close() {
let mut tracker = MarkdownHtmlTracker::new();
let result = tracker.process_line("<div markdown>content</div>");
assert!(result); assert!(!tracker.is_inside()); }
#[test]
fn test_multiple_sequential_blocks() {
let mut tracker = MarkdownHtmlTracker::new();
assert!(tracker.process_line("<div markdown>"));
assert!(tracker.is_inside());
assert!(tracker.process_line("Content 1"));
tracker.process_line("</div>");
assert!(!tracker.is_inside());
assert!(tracker.process_line("<section markdown>"));
assert!(tracker.is_inside());
assert!(tracker.process_line("Content 2"));
tracker.process_line("</section>");
assert!(!tracker.is_inside());
}
#[test]
fn test_deeply_nested_same_tag() {
let mut tracker = MarkdownHtmlTracker::new();
assert!(tracker.process_line("<div markdown>"));
assert!(tracker.is_inside());
assert!(tracker.process_line("<div class=\"inner\">"));
assert!(tracker.is_inside());
assert!(tracker.process_line("</div>"));
assert!(tracker.is_inside());
tracker.process_line("</div>");
assert!(!tracker.is_inside());
}
#[test]
fn test_deeply_nested_different_tags() {
let mut tracker = MarkdownHtmlTracker::new();
assert!(tracker.process_line("<article markdown>"));
assert!(tracker.is_inside());
assert!(tracker.process_line("<section>"));
assert!(tracker.is_inside());
assert!(tracker.process_line("</section>"));
assert!(tracker.is_inside());
tracker.process_line("</article>");
assert!(!tracker.is_inside());
}
#[test]
fn test_multiple_closes_same_line() {
let mut tracker = MarkdownHtmlTracker::new();
assert!(tracker.process_line("<div markdown>"));
assert!(tracker.process_line("<div>inner</div></div>"));
assert!(!tracker.is_inside());
}
#[test]
fn test_count_opens_boundary_check() {
assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<div>", "div"), 1);
assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<div class='x'>", "div"), 1);
assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<div>", "div"), 1);
assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<div/><div>", "div"), 2);
assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<divider>", "div"), 0);
assert_eq!(MarkdownHtmlTracker::count_opens_lowered("<dividend>", "div"), 0);
assert_eq!(
MarkdownHtmlTracker::count_opens_lowered(&"<DIV>".to_lowercase(), "div"),
1
);
}
#[test]
fn test_count_closes_variations() {
assert_eq!(MarkdownHtmlTracker::count_closes_lowered("</div>", "div"), 1);
assert_eq!(
MarkdownHtmlTracker::count_closes_lowered(&"</DIV>".to_lowercase(), "div"),
1
);
assert_eq!(MarkdownHtmlTracker::count_closes_lowered("</div >", "div"), 1);
assert_eq!(MarkdownHtmlTracker::count_closes_lowered("</div >", "div"), 1);
assert_eq!(MarkdownHtmlTracker::count_closes_lowered("</div></div>", "div"), 2);
assert_eq!(
MarkdownHtmlTracker::count_closes_lowered("text</div>more</div>end", "div"),
2
);
}
#[test]
fn test_reset() {
let mut tracker = MarkdownHtmlTracker::new();
tracker.process_line("<div markdown>");
assert!(tracker.is_inside());
tracker.reset();
assert!(!tracker.is_inside());
tracker.process_line("<section markdown>");
assert!(tracker.is_inside());
}
}