use pulldown_cmark::{Alignment, CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
pub fn format(input: &str) -> String {
if input.trim().is_empty() {
return String::new();
}
let mut state = FormatterState::new();
let events: Vec<Event<'_>> = Parser::new_ext(input, mk_options()).collect();
let lookahead: Vec<bool> = (0..events.len())
.map(|i| matches!(events.get(i + 1), Some(Event::Start(Tag::List(None)))))
.collect();
for (event, next_is_ul) in events.into_iter().zip(lookahead) {
state.next_is_unordered_list = next_is_ul;
state.process(event);
}
state.finish()
}
fn mk_options() -> Options {
let mut opts = Options::empty();
opts.insert(Options::ENABLE_TABLES);
opts.insert(Options::ENABLE_FOOTNOTES);
opts.insert(Options::ENABLE_STRIKETHROUGH);
opts.insert(Options::ENABLE_TASKLISTS);
opts.insert(Options::ENABLE_HEADING_ATTRIBUTES);
opts
}
struct FormatterState {
out: String,
needs_blank: bool,
list_depth: usize,
list_starts: Vec<Option<u64>>,
in_tight_item: bool,
bq_depth: usize,
inline: String,
in_code_block: bool,
link_stack: Vec<(String, String)>,
next_is_unordered_list: bool,
table_alignments: Vec<Alignment>,
table_head_cells: Vec<String>,
table_data_rows: Vec<Vec<String>>,
current_row_cells: Vec<String>,
in_table_head: bool,
}
impl FormatterState {
fn new() -> Self {
Self {
out: String::new(),
needs_blank: false,
list_depth: 0,
list_starts: Vec::new(),
in_tight_item: false,
bq_depth: 0,
inline: String::new(),
in_code_block: false,
link_stack: Vec::new(),
next_is_unordered_list: false,
table_alignments: Vec::new(),
table_head_cells: Vec::new(),
table_data_rows: Vec::new(),
current_row_cells: Vec::new(),
in_table_head: false,
}
}
fn process(&mut self, event: Event<'_>) {
match event {
Event::Start(tag) => self.on_start(tag),
Event::End(tag) => self.on_end(tag),
Event::Text(t) => self.on_text(&t),
Event::Code(c) => {
let max_run = c.chars().fold((0usize, 0usize), |(max, cur), ch| {
if ch == '`' {
(max.max(cur + 1), cur + 1)
} else {
(max, 0)
}
});
let delim = "`".repeat(max_run.0 + 1);
self.inline.push_str(&delim);
if c.starts_with('`') || c.ends_with('`') {
self.inline.push(' ');
}
self.inline.push_str(&c);
if c.starts_with('`') || c.ends_with('`') {
self.inline.push(' ');
}
self.inline.push_str(&delim);
}
Event::Html(h) => {
self.emit_blank_if_needed();
self.out.push_str(&h);
if !self.out.ends_with('\n') {
self.out.push('\n');
}
self.needs_blank = true;
}
Event::InlineHtml(h) => {
self.inline.push_str(&h);
}
Event::SoftBreak => {
self.inline.push('\n');
}
Event::HardBreak => {
self.inline.push_str("\\\n");
}
Event::Rule => {
self.emit_blank_if_needed();
self.write_bq_prefix();
self.out.push_str("---\n");
self.needs_blank = true;
}
Event::FootnoteReference(label) => {
self.inline.push_str(&format!("[^{}]", label));
}
Event::TaskListMarker(checked) => {
if checked {
self.inline.push_str("[x] ");
} else {
self.inline.push_str("[ ] ");
}
}
_ => {}
}
}
fn on_start(&mut self, tag: Tag<'_>) {
match tag {
Tag::Paragraph => {
if self.list_depth == 0 {
self.emit_blank_if_needed();
}
self.in_tight_item = false;
}
Tag::Heading { .. } => {
self.emit_blank_if_needed();
}
Tag::CodeBlock(kind) => {
self.emit_blank_if_needed();
let lang = match kind {
CodeBlockKind::Fenced(lang) => lang.into_string(),
CodeBlockKind::Indented => String::new(),
};
self.write_bq_prefix();
self.out.push_str("```");
self.out.push_str(&lang);
self.out.push('\n');
self.in_code_block = true;
}
Tag::List(start) => {
if self.list_depth == 0 {
self.emit_blank_if_needed();
} else {
self.needs_blank = false;
if self.in_tight_item && !self.inline.is_empty() {
let text = std::mem::take(&mut self.inline);
let prefix = " ".repeat(self.list_depth);
self.flush_inline_text(&text, &prefix);
self.in_tight_item = false;
}
}
self.list_depth += 1;
self.list_starts.push(start);
}
Tag::Item => {
if self.list_depth > 0 {
self.emit_blank_if_needed();
}
self.in_tight_item = true;
let indent = " ".repeat(self.list_depth.saturating_sub(1));
let marker = match self.list_starts.last_mut() {
Some(Some(n)) => {
let s = format!("{}{}. ", indent, n);
*n += 1;
s
}
_ => format!("{}- ", indent),
};
self.write_bq_prefix();
self.out.push_str(&marker);
}
Tag::Emphasis => self.inline.push('*'),
Tag::Strong => self.inline.push_str("**"),
Tag::Strikethrough => self.inline.push_str("~~"),
Tag::Link {
dest_url, title, ..
} => {
self.link_stack
.push((dest_url.into_string(), title.into_string()));
self.inline.push('[');
}
Tag::Image {
dest_url, title, ..
} => {
self.link_stack
.push((dest_url.into_string(), title.into_string()));
self.inline.push_str("![");
}
Tag::BlockQuote(_) => {
self.emit_blank_if_needed();
self.bq_depth += 1;
}
Tag::FootnoteDefinition(label) => {
self.emit_blank_if_needed();
self.write_bq_prefix();
self.out.push_str(&format!("[^{}]: ", label));
}
Tag::Table(alignments) => {
self.emit_blank_if_needed();
self.table_alignments = alignments.to_vec();
self.table_head_cells = Vec::new();
self.table_data_rows = Vec::new();
self.current_row_cells = Vec::new();
self.in_table_head = false;
}
Tag::TableHead => {
self.in_table_head = true;
}
Tag::TableRow => {
self.current_row_cells = Vec::new();
}
Tag::TableCell => {
}
_ => {}
}
}
fn on_end(&mut self, tag: TagEnd) {
match tag {
TagEnd::Paragraph => {
let text = std::mem::take(&mut self.inline);
if self.list_depth == 0 {
self.write_bq_prefix();
}
let prefix = " ".repeat(self.list_depth);
self.flush_inline_text(&text, &prefix);
self.needs_blank = true;
self.in_tight_item = false;
}
TagEnd::Heading(level) => {
let text = std::mem::take(&mut self.inline);
let hashes = "#".repeat(heading_to_u8(level) as usize);
self.write_bq_prefix();
self.out.push_str(&format!("{} {}\n", hashes, text));
self.needs_blank = true;
}
TagEnd::CodeBlock => {
if !self.out.ends_with('\n') {
self.out.push('\n');
}
self.write_bq_prefix();
self.out.push_str("```\n");
self.in_code_block = false;
self.needs_blank = true;
}
TagEnd::List(_) => {
self.list_depth -= 1;
self.list_starts.pop();
if self.list_depth == 0 {
if self.next_is_unordered_list {
self.needs_blank = false;
self.out.push_str("\n<!---->\n");
self.needs_blank = true;
} else {
self.needs_blank = true;
}
}
}
TagEnd::Item => {
if self.in_tight_item {
let text = std::mem::take(&mut self.inline);
if !text.is_empty() {
let prefix = " ".repeat(self.list_depth);
self.flush_inline_text(&text, &prefix);
}
self.in_tight_item = false;
}
}
TagEnd::Emphasis => self.inline.push('*'),
TagEnd::Strong => self.inline.push_str("**"),
TagEnd::Strikethrough => self.inline.push_str("~~"),
TagEnd::Link => {
if let Some((dest, title)) = self.link_stack.pop() {
if title.is_empty() {
self.inline.push_str(&format!("]({})", dest));
} else {
self.inline.push_str(&format!("]({} \"{}\")", dest, title));
}
}
}
TagEnd::Image => {
if let Some((dest, title)) = self.link_stack.pop() {
if title.is_empty() {
self.inline.push_str(&format!("]({})", dest));
} else {
self.inline.push_str(&format!("]({} \"{}\")", dest, title));
}
}
}
TagEnd::BlockQuote(_) => {
self.bq_depth -= 1;
self.needs_blank = true;
}
TagEnd::FootnoteDefinition => {
let text = std::mem::take(&mut self.inline);
self.flush_inline_text(&text, "");
self.needs_blank = true;
}
TagEnd::TableCell => {
let cell = std::mem::take(&mut self.inline);
self.current_row_cells.push(cell);
}
TagEnd::TableHead => {
if self.table_head_cells.is_empty() {
self.table_head_cells = std::mem::take(&mut self.current_row_cells);
}
self.in_table_head = false;
}
TagEnd::TableRow => {
let row = std::mem::take(&mut self.current_row_cells);
if self.in_table_head {
self.table_head_cells = row;
} else {
self.table_data_rows.push(row);
}
}
TagEnd::Table => {
let head = std::mem::take(&mut self.table_head_cells);
let rows = std::mem::take(&mut self.table_data_rows);
let aligns = std::mem::take(&mut self.table_alignments);
self.write_bq_prefix();
self.out.push_str("| ");
self.out.push_str(&head.join(" | "));
self.out.push_str(" |\n");
self.write_bq_prefix();
self.out.push_str("| ");
let seps: Vec<&str> = aligns
.iter()
.map(|a| match a {
Alignment::Left => ":---",
Alignment::Right => "---:",
Alignment::Center => ":---:",
Alignment::None => "---",
})
.collect();
self.out.push_str(&seps.join(" | "));
self.out.push_str(" |\n");
for row in rows {
self.write_bq_prefix();
self.out.push_str("| ");
self.out.push_str(&row.join(" | "));
self.out.push_str(" |\n");
}
self.needs_blank = true;
}
_ => {}
}
}
fn on_text(&mut self, text: &str) {
if self.in_code_block {
self.out.push_str(text);
} else {
for ch in text.chars() {
if ch == '\\' {
self.inline.push_str("\\\\");
} else {
self.inline.push(ch);
}
}
}
}
fn emit_blank_if_needed(&mut self) {
if self.needs_blank && !self.out.is_empty() {
if self.bq_depth > 0 {
self.out.push_str(&">".repeat(self.bq_depth));
}
self.out.push('\n');
}
self.needs_blank = false;
}
fn write_bq_prefix(&mut self) {
for _ in 0..self.bq_depth {
self.out.push_str("> ");
}
}
fn flush_inline_text(&mut self, text: &str, continuation_prefix: &str) {
let bq = "> ".repeat(self.bq_depth);
let mut lines = text.split('\n').peekable();
if let Some(first) = lines.next() {
if self.bq_depth > 0 && (self.out.ends_with('\n') || self.out.is_empty()) {
self.out.push_str(&bq);
}
if first.starts_with('>') {
self.out.push('\\');
}
self.out.push_str(first);
self.out.push('\n');
}
while let Some(line) = lines.next() {
if lines.peek().is_none() && line.is_empty() {
break;
}
self.out.push_str(continuation_prefix);
self.out.push_str(&bq);
if line.starts_with('>') {
self.out.push('\\');
}
self.out.push_str(line);
self.out.push('\n');
}
}
fn finish(mut self) -> String {
let s = std::mem::take(&mut self.out);
let cleaned: String = s
.lines()
.map(|l| l.trim_end())
.collect::<Vec<_>>()
.join("\n");
let trimmed = cleaned.trim_end_matches('\n');
if trimmed.is_empty() {
return String::new();
}
format!("{}\n", trimmed)
}
}
fn heading_to_u8(level: HeadingLevel) -> u8 {
match level {
HeadingLevel::H1 => 1,
HeadingLevel::H2 => 2,
HeadingLevel::H3 => 3,
HeadingLevel::H4 => 4,
HeadingLevel::H5 => 5,
HeadingLevel::H6 => 6,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_input() {
assert_eq!(format(""), "");
assert_eq!(format(" "), "");
assert_eq!(format("\n\n"), "");
}
#[test]
fn test_simple_paragraph() {
assert_eq!(format("Hello, world."), "Hello, world.\n");
}
#[test]
fn test_atx_heading() {
assert_eq!(format("# Heading 1"), "# Heading 1\n");
assert_eq!(format("## Heading 2"), "## Heading 2\n");
assert_eq!(format("###### Heading 6"), "###### Heading 6\n");
}
#[test]
fn test_heading_and_paragraph() {
let input = "# Title\n\nSome text.";
let output = format(input);
assert_eq!(output, "# Title\n\nSome text.\n");
}
#[test]
fn test_multiple_paragraphs() {
let input = "First paragraph.\n\nSecond paragraph.";
let output = format(input);
assert_eq!(output, "First paragraph.\n\nSecond paragraph.\n");
}
#[test]
fn test_fenced_code_block() {
let input = "```rust\nlet x = 1;\n```";
let output = format(input);
assert_eq!(output, "```rust\nlet x = 1;\n```\n");
}
#[test]
fn test_code_block_no_lang() {
let input = "```\ncode here\n```";
let output = format(input);
assert_eq!(output, "```\ncode here\n```\n");
}
#[test]
fn test_horizontal_rule() {
assert_eq!(format("---"), "---\n");
assert_eq!(format("***"), "---\n");
assert_eq!(format("___"), "---\n");
}
#[test]
fn test_unordered_list() {
let input = "- Item 1\n- Item 2\n- Item 3";
let output = format(input);
assert_eq!(output, "- Item 1\n- Item 2\n- Item 3\n");
}
#[test]
fn test_ordered_list() {
let input = "1. First\n2. Second\n3. Third";
let output = format(input);
assert_eq!(output, "1. First\n2. Second\n3. Third\n");
}
#[test]
fn test_bold_italic_inline() {
assert_eq!(format("**bold** and *italic*"), "**bold** and *italic*\n");
}
#[test]
fn test_inline_code() {
assert_eq!(format("Use `foo()` here."), "Use `foo()` here.\n");
}
#[test]
fn test_link() {
let input = "[text](https://example.com)";
let output = format(input);
assert_eq!(output, "[text](https://example.com)\n");
}
#[test]
fn test_image() {
let input = "";
let output = format(input);
assert_eq!(output, "\n");
}
#[test]
fn test_blank_line_between_heading_and_code() {
let input = "# Heading\n\n```\ncode\n```";
let output = format(input);
assert_eq!(output, "# Heading\n\n```\ncode\n```\n");
}
#[test]
fn test_blank_line_between_list_and_paragraph() {
let input = "- item\n\nAfter list.";
let output = format(input);
assert_eq!(output, "- item\n\nAfter list.\n");
}
#[test]
fn test_trailing_newline_normalised() {
assert_eq!(format("text\n\n\n"), "text\n");
assert_eq!(format("text"), "text\n");
}
#[test]
fn test_nested_list() {
let input = "- Item 1\n - Nested\n- Item 2";
let output = format(input);
assert_eq!(output, "- Item 1\n - Nested\n- Item 2\n");
}
#[test]
fn test_strikethrough() {
assert_eq!(format("~~struck~~"), "~~struck~~\n");
}
#[test]
fn test_setext_h1_to_atx() {
assert_eq!(format("Heading 1\n========="), "# Heading 1\n");
}
#[test]
fn test_setext_h2_to_atx() {
assert_eq!(format("Heading 2\n---------"), "## Heading 2\n");
}
#[test]
fn test_closed_atx_stripped() {
assert_eq!(format("## Heading ##"), "## Heading\n");
assert_eq!(format("# Title #"), "# Title\n");
}
#[test]
fn test_multiple_spaces_after_hash_collapsed() {
assert_eq!(format("# Heading"), "# Heading\n");
assert_eq!(format("## Wide"), "## Wide\n");
}
#[test]
fn test_multiple_blank_lines_collapsed() {
assert_eq!(format("First.\n\n\n\nSecond."), "First.\n\nSecond.\n");
}
#[test]
fn test_asterisk_list_to_dash() {
assert_eq!(format("* Item 1\n* Item 2"), "- Item 1\n- Item 2\n");
}
#[test]
fn test_plus_list_to_dash() {
assert_eq!(format("+ Item 1\n+ Item 2"), "- Item 1\n- Item 2\n");
}
#[test]
fn test_underscore_italic_to_asterisk() {
assert_eq!(format("_italic_"), "*italic*\n");
}
#[test]
fn test_double_underscore_bold_to_asterisk() {
assert_eq!(format("__bold__"), "**bold**\n");
}
#[test]
fn test_tilde_fence_to_backtick() {
assert_eq!(format("~~~rust\ncode\n~~~"), "```rust\ncode\n```\n");
}
#[test]
fn test_tilde_fence_no_lang() {
assert_eq!(format("~~~\ncode\n~~~"), "```\ncode\n```\n");
}
#[test]
fn test_all_hr_styles_to_dashes() {
assert_eq!(format("***"), "---\n");
assert_eq!(format("___"), "---\n");
assert_eq!(format("* * *"), "---\n");
assert_eq!(format("- - -"), "---\n");
assert_eq!(format("_ _ _"), "---\n");
}
#[test]
fn test_simple_table() {
let input = "| A | B |\n| --- | --- |\n| 1 | 2 |\n| 3 | 4 |\n";
let output = format(input);
assert_eq!(output, "| A | B |\n| --- | --- |\n| 1 | 2 |\n| 3 | 4 |\n");
}
#[test]
fn test_table_no_leading_pipes() {
let input = "A | B\n--- | ---\n1 | 2\n";
let output = format(input);
assert_eq!(output, "| A | B |\n| --- | --- |\n| 1 | 2 |\n");
}
#[test]
fn test_table_idempotent() {
let input = "| A | B |\n| --- | --- |\n| 1 | 2 |\n";
let once = format(input);
let twice = format(&once);
assert_eq!(once, twice);
}
#[test]
fn test_table_with_inline_formatting() {
let input = "| **bold** | `code` |\n| --- | --- |\n| *em* | plain |\n";
let output = format(input);
assert_eq!(
output,
"| **bold** | `code` |\n| --- | --- |\n| *em* | plain |\n"
);
}
#[test]
fn test_table_followed_by_paragraph() {
let input = "| A | B |\n| --- | --- |\n| 1 | 2 |\n\nSome text.\n";
let output = format(input);
assert_eq!(
output,
"| A | B |\n| --- | --- |\n| 1 | 2 |\n\nSome text.\n"
);
}
#[test]
fn test_idempotent_paragraph() {
let once = format("Hello, world.");
let twice = format(&once);
assert_eq!(once, twice);
}
#[test]
fn test_idempotent_headings() {
let once = format("# H1\n\n## H2");
let twice = format(&once);
assert_eq!(once, twice);
}
#[test]
fn test_idempotent_list_tight() {
let once = format("* a\n* b\n* c");
let twice = format(&once);
assert_eq!(once, twice);
}
#[test]
fn test_idempotent_list_loose() {
let once = format("- a\n\n- b\n\n- c");
let twice = format(&once);
assert_eq!(once, twice);
}
#[test]
fn test_idempotent_code_block() {
let once = format("~~~python\nx = 1\n~~~");
let twice = format(&once);
assert_eq!(once, twice);
}
#[test]
fn test_idempotent_setext() {
let once = format("Title\n=====\n\nSome text.");
let twice = format(&once);
assert_eq!(once, twice);
}
}