use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
use super::document::{Directive, Document, EmphasisSpan, Inline, ListItem, Paragraph, Section};
use crate::types::SourceFile;
#[must_use]
pub fn parse_markdown(text: &str, source: SourceFile) -> Document {
let options = Options::ENABLE_TABLES
| Options::ENABLE_FOOTNOTES
| Options::ENABLE_STRIKETHROUGH
| Options::ENABLE_TASKLISTS;
let mut sections: Vec<Section> = Vec::new();
let mut directives: Vec<Directive> = Vec::new();
let mut pending_directive_rules: Vec<String> = Vec::new();
let mut open_blocks: Vec<(String, u32)> = Vec::new();
let mut list_items: Vec<ListItem> = Vec::new();
let mut list_depth: u32 = 0;
let mut current_title: Option<String> = None;
let mut current_depth: u32 = 0;
let mut current_heading_line: Option<u32> = None;
let mut current_paragraphs: Vec<Paragraph> = Vec::new();
let mut in_heading: Option<HeadingLevel> = None;
let mut in_paragraph = false;
let mut in_code = false;
let mut list_item_depth: u32 = 0;
let mut pending_item_start: Option<u32> = None;
let mut buf = String::new();
let mut paragraph_start_line: u32 = 1;
let mut inline_stack: Vec<Vec<Inline>> = Vec::new();
let mut lazy_inline_active = false;
let mut emphasis_opens: Vec<(u32, u32)> = Vec::new();
let offsets: Vec<(Event, std::ops::Range<usize>)> =
Parser::new_ext(text, options).into_offset_iter().collect();
for (event, range) in offsets {
if !pending_directive_rules.is_empty() && is_content_block_start(&event) {
let target_line = offset_to_line(text, range.start);
for rule_id in std::mem::take(&mut pending_directive_rules) {
directives.push(Directive::new(rule_id, target_line));
}
}
if let Some(line) = pending_item_start {
match &event {
Event::Start(Tag::Paragraph | Tag::Item | Tag::List(_))
| Event::End(TagEnd::Item | TagEnd::List(_)) => {
pending_item_start = None;
},
_ => {
in_paragraph = true;
paragraph_start_line = line;
buf.clear();
inline_stack.clear();
lazy_inline_active = false;
emphasis_opens.clear();
pending_item_start = None;
},
}
}
match event {
Event::Start(Tag::Heading { level, .. }) => {
finish_paragraph(
&mut in_paragraph,
list_item_depth,
&mut buf,
&mut inline_stack,
&mut current_paragraphs,
paragraph_start_line,
);
finish_section(
&mut sections,
&mut current_title,
&mut current_depth,
&mut current_heading_line,
&mut current_paragraphs,
);
in_heading = Some(level);
current_heading_line = Some(offset_to_line(text, range.start));
buf.clear();
},
Event::End(TagEnd::Heading(level)) => {
current_title = Some(buf.trim().to_string()).filter(|s| !s.is_empty());
current_depth = heading_depth(level);
current_paragraphs = Vec::new();
in_heading = None;
buf.clear();
},
Event::Start(Tag::Paragraph) => {
in_paragraph = true;
buf.clear();
inline_stack.clear();
lazy_inline_active = false;
emphasis_opens.clear();
paragraph_start_line = offset_to_line(text, range.start);
},
Event::End(TagEnd::Paragraph) => {
finish_paragraph(
&mut in_paragraph,
list_item_depth,
&mut buf,
&mut inline_stack,
&mut current_paragraphs,
paragraph_start_line,
);
},
Event::Start(Tag::List(_)) => {
list_depth = list_depth.saturating_add(1);
},
Event::End(TagEnd::List(_)) => {
list_depth = list_depth.saturating_sub(1);
},
Event::Start(Tag::Item) => {
finish_paragraph(
&mut in_paragraph,
list_item_depth,
&mut buf,
&mut inline_stack,
&mut current_paragraphs,
paragraph_start_line,
);
list_item_depth = list_item_depth.saturating_add(1);
let item_line = offset_to_line(text, range.start);
list_items.push(ListItem::new(list_depth.max(1), item_line));
pending_item_start = Some(item_line);
},
Event::End(TagEnd::Item) => {
finish_paragraph(
&mut in_paragraph,
list_item_depth,
&mut buf,
&mut inline_stack,
&mut current_paragraphs,
paragraph_start_line,
);
list_item_depth = list_item_depth.saturating_sub(1);
pending_item_start = None;
},
Event::Start(Tag::CodeBlock(_)) => {
in_code = true;
},
Event::End(TagEnd::CodeBlock) => {
in_code = false;
},
Event::Code(_) => {
},
Event::Start(Tag::Emphasis) if in_paragraph => {
if !lazy_inline_active {
let mut bottom = Vec::with_capacity(2);
if !buf.is_empty() {
bottom.push(Inline::Text(buf.clone()));
}
inline_stack.push(bottom);
lazy_inline_active = true;
}
let (line, col) = offset_to_line_col(text, range.start);
emphasis_opens.push((line, col));
inline_stack.push(Vec::new());
},
Event::End(TagEnd::Emphasis) if in_paragraph => {
if let Some(children) = inline_stack.pop() {
let (start_line, start_column) =
emphasis_opens.pop().unwrap_or((paragraph_start_line, 1));
let span = EmphasisSpan {
children,
start_line,
start_column,
};
if let Some(parent) = inline_stack.last_mut() {
parent.push(Inline::Emphasis(span));
}
}
},
Event::Html(s) | Event::InlineHtml(s) => {
if (in_heading.is_some() || in_paragraph) && html_is_br_tag(&s) {
buf.push('\n');
if in_paragraph && lazy_inline_active {
push_inline_text(&mut inline_stack, "\n");
}
}
let block_line = offset_to_line(text, range.start);
for (parsed, line) in parse_all_directives_in_html(&s, block_line) {
match parsed {
ParsedDirective::LineForm { rule_id } => {
pending_directive_rules.push(rule_id);
},
ParsedDirective::BlockOpen { rule_id } => {
open_blocks.push((rule_id, line));
},
ParsedDirective::BlockClose { rule_id: Some(id) } => {
if let Some(pos) = open_blocks.iter().rposition(|(r, _)| r == &id) {
let (rule_id, start) = open_blocks.remove(pos);
directives.push(Directive::block(rule_id, start, line));
}
},
ParsedDirective::BlockClose { rule_id: None } => {
for (rule_id, start) in std::mem::take(&mut open_blocks) {
directives.push(Directive::block(rule_id, start, line));
}
},
}
}
},
Event::Text(s) => {
if in_code {
continue;
}
if in_heading.is_some() || in_paragraph {
buf.push_str(&s);
}
if in_paragraph && lazy_inline_active {
push_inline_text(&mut inline_stack, &s);
}
},
Event::SoftBreak if in_heading.is_some() || in_paragraph => {
buf.push(' ');
if in_paragraph && lazy_inline_active {
push_inline_text(&mut inline_stack, " ");
}
},
Event::HardBreak if in_heading.is_some() || in_paragraph => {
buf.push('\n');
if in_paragraph && lazy_inline_active {
push_inline_text(&mut inline_stack, "\n");
}
},
_ => {},
}
}
finish_paragraph(
&mut in_paragraph,
list_item_depth,
&mut buf,
&mut inline_stack,
&mut current_paragraphs,
paragraph_start_line,
);
finish_section(
&mut sections,
&mut current_title,
&mut current_depth,
&mut current_heading_line,
&mut current_paragraphs,
);
if sections.is_empty() {
sections.push(Section::new(None, 0, Vec::new()));
}
if !open_blocks.is_empty() {
let last_line = offset_to_line(text, text.len().saturating_sub(1));
for (rule_id, start) in open_blocks {
directives.push(Directive::block(rule_id, start, last_line.max(start)));
}
}
Document::with_metadata(source, sections, directives, list_items)
}
fn is_content_block_start(event: &Event<'_>) -> bool {
matches!(
event,
Event::Start(Tag::Paragraph | Tag::Heading { .. } | Tag::Item)
)
}
enum ParsedDirective {
LineForm { rule_id: String },
BlockOpen { rule_id: String },
BlockClose { rule_id: Option<String> },
}
fn parse_all_directives_in_html(html: &str, block_start_line: u32) -> Vec<(ParsedDirective, u32)> {
let mut out = Vec::new();
let mut cursor = 0usize;
while let Some(open_rel) = html[cursor..].find("<!--") {
let open = cursor + open_rel;
let Some(close_rel) = html[open..].find("-->") else {
break;
};
let close = open + close_rel + 3;
let comment = &html[open..close];
if let Some(parsed) = parse_single_directive(comment) {
#[allow(clippy::naive_bytecount)]
let newlines_before = html.as_bytes()[..open]
.iter()
.filter(|&&b| b == b'\n')
.count();
let line = block_start_line.saturating_add(u32::try_from(newlines_before).unwrap_or(0));
out.push((parsed, line));
}
cursor = close;
}
out
}
fn parse_single_directive(html: &str) -> Option<ParsedDirective> {
let inner = html
.trim()
.strip_prefix("<!--")?
.strip_suffix("-->")?
.trim();
if let Some(rest) = inner.strip_prefix("lucid-lint-disable") {
let rule_id = rest.strip_prefix(|c: char| c.is_whitespace())?.trim();
if rule_id.is_empty() || !is_valid_rule_id(rule_id) {
return None;
}
return Some(ParsedDirective::BlockOpen {
rule_id: rule_id.to_string(),
});
}
if let Some(rest) = inner.strip_prefix("lucid-lint-enable") {
let trimmed = rest.trim();
if trimmed.is_empty() {
return Some(ParsedDirective::BlockClose { rule_id: None });
}
if !is_valid_rule_id(trimmed) {
return None;
}
return Some(ParsedDirective::BlockClose {
rule_id: Some(trimmed.to_string()),
});
}
let rest = inner.strip_prefix("lucid-lint")?;
let rest = rest.strip_prefix(|c: char| c.is_whitespace())?.trim_start();
let rest = rest.strip_prefix("disable-next-line")?;
let rule_id = rest.strip_prefix(|c: char| c.is_whitespace())?.trim();
if rule_id.is_empty() || !is_valid_rule_id(rule_id) {
return None;
}
Some(ParsedDirective::LineForm {
rule_id: rule_id.to_string(),
})
}
fn is_valid_rule_id(s: &str) -> bool {
s.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '.')
}
fn finish_paragraph(
in_paragraph: &mut bool,
list_item_depth: u32,
buf: &mut String,
inline_stack: &mut Vec<Vec<Inline>>,
paragraphs: &mut Vec<Paragraph>,
start_line: u32,
) {
if !*in_paragraph {
return;
}
let text = buf.trim().to_string();
while inline_stack.len() > 1 {
let frame = inline_stack.pop().unwrap_or_default();
if let Some(parent) = inline_stack.last_mut() {
parent.extend(frame);
}
}
let inline = inline_stack.pop().unwrap_or_default();
if !text.is_empty() {
let para = if list_item_depth > 0 {
Paragraph::from_list_item_with_inline(text, start_line, inline)
} else {
Paragraph::with_inline(text, start_line, inline)
};
paragraphs.push(para);
}
buf.clear();
*in_paragraph = false;
}
fn push_inline_text(inline_stack: &mut [Vec<Inline>], s: &str) {
if s.is_empty() {
return;
}
let Some(frame) = inline_stack.last_mut() else {
return;
};
if let Some(Inline::Text(existing)) = frame.last_mut() {
existing.push_str(s);
} else {
frame.push(Inline::Text(s.to_string()));
}
}
fn offset_to_line_col(text: &str, offset: usize) -> (u32, u32) {
let capped = offset.min(text.len());
let line_start = text.as_bytes()[..capped]
.iter()
.rposition(|&b| b == b'\n')
.map_or(0, |p| p + 1);
let line = offset_to_line(text, capped);
let col = text[line_start..capped].chars().count() + 1;
(line, u32::try_from(col).unwrap_or(u32::MAX))
}
fn finish_section(
sections: &mut Vec<Section>,
title: &mut Option<String>,
depth: &mut u32,
heading_line: &mut Option<u32>,
paragraphs: &mut Vec<Paragraph>,
) {
if title.is_none() && paragraphs.is_empty() && sections.is_empty() {
return;
}
if title.is_some() || !paragraphs.is_empty() {
let section = match heading_line.take() {
Some(line) => {
Section::with_heading_line(title.take(), *depth, line, std::mem::take(paragraphs))
},
None => Section::new(title.take(), *depth, std::mem::take(paragraphs)),
};
sections.push(section);
*depth = 0;
}
}
fn html_is_br_tag(s: &str) -> bool {
let trimmed = s.trim();
let Some(inner) = trimmed.strip_prefix('<').and_then(|t| t.strip_suffix('>')) else {
return false;
};
let inner = inner.trim_end_matches('/').trim();
inner.eq_ignore_ascii_case("br")
}
const fn heading_depth(level: HeadingLevel) -> u32 {
match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
}
}
fn offset_to_line(text: &str, offset: usize) -> u32 {
let capped = offset.min(text.len());
#[allow(clippy::naive_bytecount)]
let lines_before = text.as_bytes()[..capped]
.iter()
.filter(|&&b| b == b'\n')
.count();
(lines_before + 1).try_into().unwrap_or(u32::MAX)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_simple_markdown() {
let md = "# Title\n\nFirst paragraph.\n\n## Sub\n\nSecond paragraph.";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert_eq!(doc.sections.len(), 2);
assert_eq!(doc.sections[0].title.as_deref(), Some("Title"));
assert_eq!(doc.sections[0].paragraphs.len(), 1);
assert_eq!(doc.sections[1].title.as_deref(), Some("Sub"));
assert_eq!(doc.sections[1].paragraphs.len(), 1);
}
#[test]
fn preserves_inline_emphasis_text() {
let md = "Some *emphasized* and **strong** and `code` text.";
let doc = parse_markdown(md, SourceFile::Anonymous);
let para = &doc.sections[0].paragraphs[0].text;
assert!(para.contains("emphasized"));
assert!(para.contains("strong"));
assert!(!para.contains("code"));
}
#[test]
fn excludes_fenced_code_blocks() {
let md = "Before.\n\n```\nignored code\n```\n\nAfter.";
let doc = parse_markdown(md, SourceFile::Anonymous);
let texts: Vec<_> = doc.sections[0]
.paragraphs
.iter()
.map(|p| p.text.clone())
.collect();
assert!(!texts.iter().any(|t| t.contains("ignored code")));
assert!(texts.iter().any(|t| t == "Before."));
assert!(texts.iter().any(|t| t == "After."));
}
#[test]
fn extracts_heading_hierarchy() {
let md = "# H1\n\nIntro.\n\n## H2\n\nSubcontent.\n\n### H3\n\nDeep.";
let doc = parse_markdown(md, SourceFile::Anonymous);
let depths: Vec<u32> = doc.sections.iter().map(|s| s.depth).collect();
assert_eq!(depths, vec![1, 2, 3]);
}
#[test]
fn br_tag_inside_paragraph_is_a_hard_break() {
for variant in ["<br>", "<br/>", "<br />", "<BR>", "<Br />"] {
let md = format!("Lead.{variant}Tail.");
let doc = parse_markdown(&md, SourceFile::Anonymous);
let para = &doc.sections[0].paragraphs[0].text;
assert!(
para.contains("Lead.\nTail."),
"variant {variant:?} produced {para:?}"
);
}
}
#[test]
fn html_comment_directives_do_not_inject_newlines() {
let md = "Lead. <!-- lucid-lint-disable rhythm.foo --> Tail.";
let doc = parse_markdown(md, SourceFile::Anonymous);
let para = &doc.sections[0].paragraphs[0].text;
assert!(!para.contains('\n'), "got {para:?}");
}
#[test]
fn handles_empty_markdown() {
let doc = parse_markdown("", SourceFile::Anonymous);
assert!(doc.sections.is_empty() || doc.sections[0].paragraphs.is_empty());
}
#[test]
fn handles_markdown_with_no_headings() {
let md = "Just a paragraph.\n\nAnd another.";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert_eq!(doc.sections.len(), 1);
assert_eq!(doc.sections[0].title, None);
assert_eq!(doc.sections[0].paragraphs.len(), 2);
}
#[test]
fn extracts_disable_next_line_directive() {
let md = "Intro.\n\n<!-- lucid-lint disable-next-line structure.sentence-too-long -->\n\
A long sentence that will be suppressed.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert_eq!(doc.directives.len(), 1);
assert_eq!(doc.directives[0].rule_id, "structure.sentence-too-long");
assert_eq!(doc.directives[0].start_line, 4);
assert_eq!(doc.directives[0].end_line, 4);
}
#[test]
fn ignores_non_directive_html_comments() {
let md = "Intro.\n\n<!-- just a regular comment -->\n\nAfter.";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert!(doc.directives.is_empty());
}
#[test]
fn rejects_directive_with_invalid_rule_id() {
let md = "<!-- lucid-lint disable-next-line Bad_Rule -->\nText.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert!(doc.directives.is_empty());
}
#[test]
fn directive_without_following_content_is_dropped() {
let md = "Body.\n\n<!-- lucid-lint disable-next-line structure.sentence-too-long -->\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert!(doc.directives.is_empty());
}
#[test]
fn extracts_block_disable_and_enable_directive() {
let md = "Intro.\n\n\
<!-- lucid-lint-disable structure.sentence-too-long -->\n\n\
Inside block.\n\n\
More inside.\n\n\
<!-- lucid-lint-enable -->\n\n\
After.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert_eq!(doc.directives.len(), 1);
let d = &doc.directives[0];
assert_eq!(d.rule_id, "structure.sentence-too-long");
assert_eq!(d.start_line, 3);
assert_eq!(d.end_line, 9);
assert!(d.covers(5));
assert!(d.covers(7));
assert!(!d.covers(11));
}
#[test]
fn block_enable_with_rule_id_closes_matching_scope_only() {
let md = "<!-- lucid-lint-disable structure.sentence-too-long -->\n\n\
<!-- lucid-lint-disable lexicon.weasel-words -->\n\n\
Between.\n\n\
<!-- lucid-lint-enable structure.sentence-too-long -->\n\n\
After.\n\n\
<!-- lucid-lint-enable -->\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert_eq!(doc.directives.len(), 2);
let sentence = doc
.directives
.iter()
.find(|d| d.rule_id == "structure.sentence-too-long")
.expect("sentence-too-long directive");
let weasel = doc
.directives
.iter()
.find(|d| d.rule_id == "lexicon.weasel-words")
.expect("weasel-words directive");
assert!(sentence.end_line < weasel.end_line);
}
#[test]
fn unterminated_block_disable_extends_to_end_of_document() {
let md = "Intro.\n\n\
<!-- lucid-lint-disable structure.sentence-too-long -->\n\n\
Body.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert_eq!(doc.directives.len(), 1);
let d = &doc.directives[0];
assert_eq!(d.rule_id, "structure.sentence-too-long");
assert!(d.end_line >= d.start_line);
assert!(d.covers(5));
}
#[test]
fn enable_with_no_matching_open_scope_is_ignored() {
let md = "<!-- lucid-lint-enable structure.sentence-too-long -->\n\nText.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert!(doc.directives.is_empty());
}
#[test]
fn block_directive_with_invalid_rule_id_is_rejected() {
let md = "<!-- lucid-lint-disable Bad_Rule -->\n\nBody.\n\n\
<!-- lucid-lint-enable -->\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
assert!(doc.directives.is_empty());
}
#[test]
fn tight_list_item_emits_a_paragraph() {
let md = "- One bullet, five commas: a, b, c, d, e, f.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
let paras: Vec<_> = doc
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
.collect();
assert_eq!(paras.len(), 1, "got {paras:?}");
assert!(paras[0].text.contains("five commas"));
assert!(paras[0].from_list_item);
}
#[test]
fn loose_list_item_paragraphs_are_marked_from_list_item() {
let md = "- First item, comma-heavy: a, b, c, d, e.\n\n\
- Second item, also comma-heavy: f, g, h, i, j.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
let paras: Vec<_> = doc
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
.collect();
assert_eq!(paras.len(), 2);
assert!(paras.iter().all(|p| p.from_list_item));
}
#[test]
fn body_paragraphs_are_not_marked_from_list_item() {
let md = "A regular body paragraph.\n\nAnother one.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
let paras: Vec<_> = doc
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
.collect();
assert_eq!(paras.len(), 2);
assert!(paras.iter().all(|p| !p.from_list_item));
}
#[test]
fn nested_list_emits_one_paragraph_per_item() {
let md = "- outer item, with three commas: a, b, c.\n \
- inner item, also three: d, e, f.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
let paras: Vec<_> = doc
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
.collect();
assert_eq!(paras.len(), 2, "got {paras:?}");
assert!(paras.iter().all(|p| p.from_list_item));
assert!(paras[0].text.contains("outer item"));
assert!(paras[1].text.contains("inner item"));
}
#[test]
fn empty_list_item_produces_no_paragraph() {
let md = "- \n- still empty\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
let paras: Vec<_> = doc
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
.collect();
assert_eq!(paras.len(), 1);
assert!(paras[0].text.contains("still empty"));
}
#[test]
fn preserves_paragraph_start_line() {
let md = "Line 1.\n\nLine 3.\n\nLine 5.";
let doc = parse_markdown(md, SourceFile::Anonymous);
let lines: Vec<u32> = doc.sections[0]
.paragraphs
.iter()
.map(|p| p.start_line)
.collect();
assert_eq!(lines.len(), 3);
assert!(lines[0] <= lines[1] && lines[1] <= lines[2]);
}
fn paragraph_inline(md: &str) -> Vec<Inline> {
let doc = parse_markdown(md, SourceFile::Anonymous);
doc.sections
.into_iter()
.flat_map(|s| s.paragraphs)
.next()
.map(|p| p.inline)
.unwrap_or_default()
}
#[test]
fn paragraph_without_emphasis_has_empty_inline() {
let inline = paragraph_inline("Plain prose, nothing fancy.");
assert!(inline.is_empty(), "got {inline:?}");
}
#[test]
fn emphasis_span_is_captured() {
let inline = paragraph_inline("Some *italic words* in the middle.");
assert_eq!(inline.len(), 3, "got {inline:?}");
assert_eq!(inline[0], Inline::Text("Some ".to_string()));
let span = match &inline[1] {
Inline::Emphasis(s) => s,
Inline::Text(t) => unreachable!("expected Emphasis at index 1, got Text({t:?})"),
};
assert_eq!(
span.children,
vec![Inline::Text("italic words".to_string())]
);
assert_eq!(span.start_line, 1);
assert_eq!(span.start_column, 6);
assert_eq!(inline[2], Inline::Text(" in the middle.".to_string()));
}
#[test]
fn underscore_emphasis_is_captured_too() {
let inline = paragraph_inline("An _underscore italic_ here.");
let has_emphasis = inline
.iter()
.any(|n| matches!(n, Inline::Emphasis(s) if s.children == vec![Inline::Text("underscore italic".to_string())]));
assert!(has_emphasis, "got {inline:?}");
}
#[test]
fn strong_does_not_create_an_emphasis_node() {
let inline = paragraph_inline("Some **bold words** here.");
assert!(
inline.iter().all(|n| !matches!(n, Inline::Emphasis(_))),
"got {inline:?}"
);
assert!(inline.is_empty(), "got {inline:?}");
}
#[test]
fn nested_emphasis_is_preserved() {
let inline = paragraph_inline("Outer *one _two_ three* end.");
let outer = inline
.iter()
.find_map(|n| match n {
Inline::Emphasis(s) => Some(s),
Inline::Text(_) => None,
})
.expect("outer emphasis present");
let inner = outer
.children
.iter()
.find_map(|n| match n {
Inline::Emphasis(s) => Some(s),
Inline::Text(_) => None,
})
.expect("inner emphasis present");
assert_eq!(inner.children, vec![Inline::Text("two".to_string())]);
}
#[test]
fn multiple_emphases_in_one_paragraph_are_all_captured() {
let inline = paragraph_inline("First *one* then *two* then *three*.");
let count = inline
.iter()
.filter(|n| matches!(n, Inline::Emphasis(_)))
.count();
assert_eq!(count, 3, "got {inline:?}");
}
#[test]
fn emphasis_in_code_block_does_not_appear_in_inline_tree() {
let md = "Before.\n\n```\nignored *not italic*\n```\n\nAfter.";
let doc = parse_markdown(md, SourceFile::Anonymous);
for para in doc.sections.iter().flat_map(|s| s.paragraphs.iter()) {
assert!(
para.inline
.iter()
.all(|n| !matches!(n, Inline::Emphasis(_))),
"code-block emphasis leaked into {para:?}"
);
}
}
#[test]
fn inline_tree_is_empty_for_plain_text_input() {
let doc = super::super::parse_plain("A plain *paragraph* of text.", SourceFile::Anonymous);
let para = &doc.sections[0].paragraphs[0];
assert!(para.inline.is_empty(), "got {:?}", para.inline);
}
#[test]
fn inline_tree_text_concatenation_matches_paragraph_text() {
fn flatten(nodes: &[Inline], out: &mut String) {
for node in nodes {
match node {
Inline::Text(t) => out.push_str(t),
Inline::Emphasis(span) => flatten(&span.children, out),
}
}
}
let md = "Before *italic with _nested_ inside* and after.";
let doc = parse_markdown(md, SourceFile::Anonymous);
let para = &doc.sections[0].paragraphs[0];
let mut flat = String::new();
flatten(¶.inline, &mut flat);
assert_eq!(flat, para.text, "tree {:?}", para.inline);
}
#[test]
fn emphasis_position_points_at_opening_delimiter_on_later_line() {
let md = "Intro paragraph.\n\nFollow-up paragraph with *important* word.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
let span = doc
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
.flat_map(|p| p.inline.iter())
.find_map(|n| match n {
Inline::Emphasis(s) => Some(s),
Inline::Text(_) => None,
})
.expect("emphasis present");
assert_eq!(span.start_line, 3);
assert_eq!(span.start_column, 26);
}
fn flatten(nodes: &[Inline], out: &mut String) {
for node in nodes {
match node {
Inline::Text(t) => out.push_str(t),
Inline::Emphasis(span) => flatten(&span.children, out),
}
}
}
fn flatten_to_string(nodes: &[Inline]) -> String {
let mut s = String::new();
flatten(nodes, &mut s);
s
}
fn proptest_md_input() -> impl proptest::strategy::Strategy<Value = String> {
use proptest::prelude::*;
prop::collection::vec(proptest_segment(), 1..6).prop_map(|segs| segs.join(" "))
}
fn proptest_segment() -> impl proptest::strategy::Strategy<Value = String> {
use proptest::prelude::*;
prop_oneof![
"[a-z]{1,8}( [a-z]{1,8}){0,5}".prop_map(String::from),
"[a-z]{1,8}( [a-z]{1,8}){0,3}".prop_map(|s| format!("*{s}*")),
"[a-z]{1,8}( [a-z]{1,8}){0,3}".prop_map(|s| format!("_{s}_")),
]
}
fn proptest_plain_text() -> impl proptest::strategy::Strategy<Value = String> {
use proptest::prelude::*;
"[a-z ]{0,80}".prop_map(String::from)
}
proptest::proptest! {
#![proptest_config(proptest::prelude::ProptestConfig {
cases: 256,
..proptest::prelude::ProptestConfig::default()
})]
#[test]
fn prop_flatten_inline_equals_paragraph_text(
input in proptest_md_input()
) {
let doc = parse_markdown(&input, SourceFile::Anonymous);
for para in doc.sections.iter().flat_map(|s| s.paragraphs.iter()) {
if !para.inline.is_empty() {
let flat = flatten_to_string(¶.inline);
proptest::prop_assert_eq!(&flat, ¶.text);
}
}
}
#[test]
fn prop_inline_empty_iff_no_emphasis(
input in proptest_md_input()
) {
let doc = parse_markdown(&input, SourceFile::Anonymous);
for para in doc.sections.iter().flat_map(|s| s.paragraphs.iter()) {
let has_emphasis_in_tree = para
.inline
.iter()
.any(|n| matches!(n, Inline::Emphasis(_)));
proptest::prop_assert_eq!(
!para.inline.is_empty(),
has_emphasis_in_tree,
"para.inline non-emptiness {} disagrees with emphasis presence {} for text {:?}",
!para.inline.is_empty(),
has_emphasis_in_tree,
para.text
);
}
}
#[test]
fn prop_no_delimiters_implies_no_emphasis(
input in proptest_plain_text()
) {
proptest::prop_assume!(!input.contains('*') && !input.contains('_'));
let doc = parse_markdown(&input, SourceFile::Anonymous);
for para in doc.sections.iter().flat_map(|s| s.paragraphs.iter()) {
let any_emphasis = para
.inline
.iter()
.any(|n| matches!(n, Inline::Emphasis(_)));
proptest::prop_assert!(!any_emphasis, "got {:?}", para.inline);
}
}
#[test]
fn prop_emphasis_text_is_substring_of_paragraph(
input in proptest_md_input()
) {
for para in parse_markdown(&input, SourceFile::Anonymous)
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
{
for node in ¶.inline {
if let Inline::Emphasis(span) = node {
let inner = flatten_to_string(&span.children);
if !inner.is_empty() {
proptest::prop_assert!(
para.text.contains(&inner),
"emphasis {:?} not in paragraph {:?}",
inner,
para.text
);
}
}
}
}
}
#[test]
fn prop_emphasis_position_within_paragraph(
input in proptest_md_input()
) {
for para in parse_markdown(&input, SourceFile::Anonymous)
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
{
let nl = u32::try_from(para.text.matches('\n').count())
.unwrap_or(u32::MAX);
let max_line = para.start_line.saturating_add(nl);
for node in ¶.inline {
if let Inline::Emphasis(span) = node {
proptest::prop_assert!(
span.start_line >= para.start_line
&& span.start_line <= max_line,
"span line {} outside [{}, {}] for para {:?}",
span.start_line,
para.start_line,
max_line,
para.text
);
proptest::prop_assert!(
span.start_column >= 1,
"span column {} is not 1-based",
span.start_column
);
}
}
}
}
#[test]
fn prop_parse_plain_has_empty_inline(
input in proptest_plain_text()
) {
let doc = super::super::parse_plain(&input, SourceFile::Anonymous);
for para in doc.sections.iter().flat_map(|s| s.paragraphs.iter()) {
proptest::prop_assert!(
para.inline.is_empty(),
"got {:?}",
para.inline
);
}
}
}
fn snapshot_inline(md: &str) -> Vec<Vec<Inline>> {
parse_markdown(md, SourceFile::Anonymous)
.sections
.into_iter()
.flat_map(|s| s.paragraphs)
.map(|p| p.inline)
.collect()
}
#[test]
fn snapshot_en_plain_paragraph() {
let trees = snapshot_inline("Plain prose, nothing fancy here.");
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_en_single_emphasis() {
let trees = snapshot_inline("Some *italic words* in the middle of prose.");
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_en_strong_does_not_create_emphasis() {
let trees = snapshot_inline("A line with **strong words** but no italic.");
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_en_multi_paragraph_mixed() {
let md = "First paragraph, plain.\n\n\
Second has *italic* in it.\n\n\
Third has **bold** and *italic both*.";
let trees = snapshot_inline(md);
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_en_nested_emphasis() {
let trees = snapshot_inline("Outer *one _two_ three* end.");
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_en_tight_list_item_with_emphasis() {
let trees = snapshot_inline("- bullet with *italic phrase* inside.\n");
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_en_code_block_excluded() {
let md = "Before italics.\n\n\
```\n\
not *italic* here\n\
```\n\n\
After *real italics* end.";
let trees = snapshot_inline(md);
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_fr_single_emphasis() {
let trees = snapshot_inline("Une phrase avec *des mots en italique* au milieu.");
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_fr_nested_emphasis_with_accents() {
let trees = snapshot_inline("Élève *attentif _très_ concentré* ici.");
insta::assert_debug_snapshot!(trees);
}
#[test]
fn snapshot_fr_multi_paragraph_with_lists() {
let md = "Paragraphe simple.\n\n\
- item avec *italique court* ici\n\
- autre item avec **gras** et _souligné_\n\n\
Conclusion en *italique final*.";
let trees = snapshot_inline(md);
insta::assert_debug_snapshot!(trees);
}
#[test]
fn emphasis_inside_heading_does_not_bleed_into_next_paragraph() {
let md = "# Heading with *italic title*\n\nA body paragraph, no emphasis here.";
let doc = parse_markdown(md, SourceFile::Anonymous);
let para = doc
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
.next()
.expect("body paragraph present");
assert_eq!(para.text, "A body paragraph, no emphasis here.");
assert!(
para.inline.is_empty(),
"heading emphasis leaked into body paragraph: {:?}",
para.inline
);
}
#[test]
fn emphasis_inside_tight_list_item_is_captured() {
let md = "- bullet with *italic phrase* inside.\n";
let doc = parse_markdown(md, SourceFile::Anonymous);
let para = doc
.sections
.iter()
.flat_map(|s| s.paragraphs.iter())
.next()
.expect("paragraph synthesized");
assert!(para.from_list_item);
let has_emphasis = para.inline.iter().any(|n| matches!(n, Inline::Emphasis(_)));
assert!(has_emphasis, "got {:?}", para.inline);
}
}