use std::collections::HashMap;
use crate::detect::FormatType;
use crate::error::Result;
use crate::model::{
Block, CellAlignment, Document, HeadingLevel, Paragraph, RevisionType, Table, TextRun,
};
use super::heading_analyzer::{HeadingAnalyzer, HeadingDecision};
use super::options::{RenderOptions, RevisionHandling, SectionMarkerStyle};
type ResourceMap = HashMap<String, String>;
const MAX_HEADING_TEXT_LENGTH: usize = 80;
const LIST_MARKERS: &[char] = &[
'-', '*', '>', '※', '○', '•', '●', '◦', '◎', '□', '■', '▪', '▫', '◇', '◆', '☐', '☑', '☒', '✓', '✗',
'ㅇ', 'ㆍ', '·', '∙', '→', '←', '↔', '⇒', '⇐', '⇔', '►', '▶', '▷', '◀', '◁', '▻',
];
pub fn to_markdown(doc: &Document, options: &RenderOptions) -> Result<String> {
if let Some(ref config) = options.heading_config {
return to_markdown_with_analyzer(doc, options, config);
}
to_markdown_standard(doc, options)
}
fn build_resource_map(doc: &Document) -> ResourceMap {
doc.resources
.iter()
.map(|(id, resource)| (id.clone(), resource.suggested_filename(id)))
.collect()
}
fn render_header_footer(
label: &str,
paragraphs: &[Paragraph],
options: &RenderOptions,
resource_map: &ResourceMap,
output: &mut String,
) {
let texts: Vec<String> = paragraphs
.iter()
.map(|p| render_paragraph(p, options, None, resource_map))
.filter(|t| !t.is_empty())
.collect();
if !texts.is_empty() {
output.push_str(&format!("> *{}: {}*\n\n", label, texts.join(" | ")));
}
}
fn resolve_image_path(resource_id: &str, resource_map: &ResourceMap, prefix: &str) -> String {
let filename = resource_map
.get(resource_id)
.cloned()
.unwrap_or_else(|| resource_id.to_string());
format!("{}{}", prefix, filename)
}
fn section_marker(
format: FormatType,
style: SectionMarkerStyle,
idx: usize,
name: Option<&str>,
) -> String {
if style == SectionMarkerStyle::None {
return String::new();
}
let n = idx + 1;
match format {
FormatType::Pptx => match name.filter(|s| !s.is_empty()) {
Some(name) => format!("<!-- slide {}: {} -->", n, name),
None => format!("<!-- slide {} -->", n),
},
FormatType::Xlsx => match name.filter(|s| !s.is_empty()) {
Some(name) => format!("<!-- sheet {}: {} -->", n, name),
None => format!("<!-- sheet {} -->", n),
},
FormatType::Docx => String::new(),
}
}
fn to_markdown_standard(doc: &Document, options: &RenderOptions) -> Result<String> {
let mut output = String::new();
let resource_map = build_resource_map(doc);
if options.include_frontmatter {
output.push_str(&render_frontmatter(doc));
}
for (i, section) in doc.sections.iter().enumerate() {
let marker = section_marker(
doc.format,
options.section_markers,
i,
section.name.as_deref(),
);
if !marker.is_empty() {
output.push_str(&marker);
output.push_str("\n\n");
}
if let Some(ref name) = section.name {
if i > 0 {
output.push_str("\n---\n\n");
}
output.push_str(&format!("## {}\n\n", name));
}
if options.include_headers_footers {
if let Some(ref header) = section.header {
render_header_footer("Header", header, options, &resource_map, &mut output);
}
}
for (block_idx, block) in section.content.iter().enumerate() {
match block {
Block::Paragraph(para) => {
let md = render_paragraph(para, options, None, &resource_map);
if !md.is_empty() || options.include_empty_paragraphs {
output.push_str(&md);
let in_list = para.list_info.is_some();
let tight =
in_list && next_block_continues_list(§ion.content, block_idx);
if tight {
output.push('\n');
} else if options.paragraph_spacing {
output.push_str("\n\n");
} else {
output.push('\n');
}
}
}
Block::Table(table) => {
output.push_str(&render_table(table, options, &resource_map));
output.push_str("\n\n");
}
Block::PageBreak => {
if options.emit_page_breaks {
output.push_str("\n---\n\n");
}
}
Block::SectionBreak => {
output.push_str("\n---\n\n");
}
Block::Image {
resource_id,
alt_text,
..
} => {
let alt = alt_text.as_deref().unwrap_or("image");
let path =
resolve_image_path(resource_id, &resource_map, &options.image_path_prefix);
output.push_str(&format!("\n\n", alt, path));
}
}
}
if options.include_headers_footers {
if let Some(ref footer) = section.footer {
render_header_footer("Footer", footer, options, &resource_map, &mut output);
}
}
if let Some(ref notes) = section.notes {
if !notes.is_empty() {
output.push_str("\n> **Notes:**\n");
for note in notes {
let text = render_paragraph(note, options, None, &resource_map);
if !text.is_empty() {
output.push_str(&format!("> {}\n", text));
}
}
output.push('\n');
}
}
}
let processed = if let Some(ref cleanup) = options.cleanup {
super::cleanup::clean_text(&output, cleanup)
} else {
output
};
let result = super::cleanup::collapse_blank_lines(&processed)
.trim()
.to_string();
Ok(result)
}
fn to_markdown_with_analyzer(
doc: &Document,
options: &RenderOptions,
config: &super::heading_analyzer::HeadingConfig,
) -> Result<String> {
let mut analyzer = HeadingAnalyzer::new(config.clone());
let decisions = analyzer.analyze(doc);
let mut output = String::new();
let resource_map = build_resource_map(doc);
if options.include_frontmatter {
output.push_str(&render_frontmatter(doc));
}
for (section_idx, section) in doc.sections.iter().enumerate() {
let marker = section_marker(
doc.format,
options.section_markers,
section_idx,
section.name.as_deref(),
);
if !marker.is_empty() {
output.push_str(&marker);
output.push_str("\n\n");
}
if let Some(ref name) = section.name {
if section_idx > 0 {
output.push_str("\n---\n\n");
}
output.push_str(&format!("## {}\n\n", name));
}
if options.include_headers_footers {
if let Some(ref header) = section.header {
render_header_footer("Header", header, options, &resource_map, &mut output);
}
}
let section_decisions = decisions.get(section_idx);
let mut para_idx = 0;
for (block_idx, block) in section.content.iter().enumerate() {
match block {
Block::Paragraph(para) => {
let decision = section_decisions.and_then(|d| d.get(para_idx)).copied();
let md = render_paragraph(para, options, decision, &resource_map);
if !md.is_empty() || options.include_empty_paragraphs {
output.push_str(&md);
let in_list = para.list_info.is_some();
let tight =
in_list && next_block_continues_list(§ion.content, block_idx);
if tight {
output.push('\n');
} else if options.paragraph_spacing {
output.push_str("\n\n");
} else {
output.push('\n');
}
}
para_idx += 1;
}
Block::Table(table) => {
output.push_str(&render_table(table, options, &resource_map));
output.push_str("\n\n");
}
Block::PageBreak => {
if options.emit_page_breaks {
output.push_str("\n---\n\n");
}
}
Block::SectionBreak => {
output.push_str("\n---\n\n");
}
Block::Image {
resource_id,
alt_text,
..
} => {
let alt = alt_text.as_deref().unwrap_or("image");
let path =
resolve_image_path(resource_id, &resource_map, &options.image_path_prefix);
output.push_str(&format!("\n\n", alt, path));
}
}
}
if options.include_headers_footers {
if let Some(ref footer) = section.footer {
render_header_footer("Footer", footer, options, &resource_map, &mut output);
}
}
if let Some(ref notes) = section.notes {
if !notes.is_empty() {
output.push_str("\n> **Notes:**\n");
for note in notes {
let text = render_paragraph(note, options, None, &resource_map);
if !text.is_empty() {
output.push_str(&format!("> {}\n", text));
}
}
output.push('\n');
}
}
}
let processed = if let Some(ref cleanup) = options.cleanup {
super::cleanup::clean_text(&output, cleanup)
} else {
output
};
let result = super::cleanup::collapse_blank_lines(&processed)
.trim()
.to_string();
Ok(result)
}
fn render_frontmatter(doc: &Document) -> String {
let mut fm = String::from("---\n");
let meta = &doc.metadata;
if let Some(ref title) = meta.title {
fm.push_str(&format!("title: \"{}\"\n", escape_yaml(title)));
}
if let Some(ref author) = meta.author {
fm.push_str(&format!("author: \"{}\"\n", escape_yaml(author)));
}
if let Some(ref subject) = meta.subject {
fm.push_str(&format!("subject: \"{}\"\n", escape_yaml(subject)));
}
if let Some(ref created) = meta.created {
fm.push_str(&format!("created: \"{}\"\n", created));
}
if let Some(ref modified) = meta.modified {
fm.push_str(&format!("modified: \"{}\"\n", modified));
}
if let Some(page_count) = meta.page_count {
let label = if doc
.sections
.first()
.and_then(|s| s.name.as_ref())
.is_some_and(|n| n.starts_with("Slide"))
{
"slides"
} else if doc
.sections
.first()
.and_then(|s| s.name.as_ref())
.is_some_and(|n| n.starts_with("Sheet"))
{
"sheets"
} else {
"pages"
};
fm.push_str(&format!("{}: {}\n", label, page_count));
}
if let Some(word_count) = meta.word_count {
fm.push_str(&format!("words: {}\n", word_count));
}
if !meta.keywords.is_empty() {
fm.push_str("keywords:\n");
for keyword in &meta.keywords {
fm.push_str(&format!(" - \"{}\"\n", escape_yaml(keyword)));
}
}
if let Some(ref app) = meta.application {
fm.push_str(&format!("application: \"{}\"\n", escape_yaml(app)));
}
fm.push_str("---\n\n");
fm
}
fn escape_yaml(s: &str) -> String {
s.replace('\\', "\\\\").replace('"', "\\\"")
}
fn render_paragraph(
para: &Paragraph,
options: &RenderOptions,
heading_decision: Option<HeadingDecision>,
resource_map: &ResourceMap,
) -> String {
let mut output = String::new();
let merged_para = para.with_merged_runs();
let effective_heading: Option<HeadingLevel> = if let Some(decision) = heading_decision {
match decision {
HeadingDecision::Explicit(level) | HeadingDecision::Inferred(level) => Some(level),
HeadingDecision::Demoted | HeadingDecision::None => None,
}
} else {
if merged_para.heading.is_heading() {
let plain_text = merged_para.plain_text();
let trimmed_text = plain_text.trim();
let looks_like_list_item = trimmed_text
.chars()
.next()
.is_some_and(|c| LIST_MARKERS.contains(&c));
let text_too_long = trimmed_text.chars().count() > MAX_HEADING_TEXT_LENGTH;
if !looks_like_list_item && !text_too_long {
let level = merged_para.heading.level().min(options.max_heading_level);
Some(HeadingLevel::from_number(level))
} else {
None
}
} else {
None
}
};
if let Some(level) = effective_heading {
let capped_level = level.level().min(options.max_heading_level);
if capped_level > 0 {
output.push_str(&"#".repeat(capped_level as usize));
output.push(' ');
}
}
if let Some(ref list_info) = merged_para.list_info {
let indent = " ".repeat(list_info.level as usize);
output.push_str(&indent);
match list_info.list_type {
crate::model::ListType::Bullet => {
output.push(options.list_marker);
output.push(' ');
}
crate::model::ListType::Numbered => {
let num = list_info.number.unwrap_or(1);
output.push_str(&format!("{}. ", num));
}
crate::model::ListType::None => {}
}
}
let suppress_heading_emphasis = effective_heading.is_some()
&& options.strip_redundant_emphasis_in_headings
&& all_runs_uniformly_bold(&merged_para);
let run_ctx = RunContext {
in_table_cell: false,
suppress_emphasis: suppress_heading_emphasis,
};
for (i, run) in merged_para.runs.iter().enumerate() {
let run_text = render_run(run, options, run_ctx);
if i > 0 && !run_text.is_empty() && !output.is_empty() {
let last_char = output.chars().last();
let first_char = run_text.chars().next();
if let (Some(last), Some(first)) = (last_char, first_char) {
let needs_space =
!last.is_whitespace() && !first.is_whitespace() && !is_no_space_before(first);
if needs_space {
output.push(' ');
}
}
}
output.push_str(&run_text);
}
for image in ¶.images {
if !output.is_empty() {
output.push('\n');
}
let alt = image.alt_text.as_deref().unwrap_or("image");
let path = resolve_image_path(&image.resource_id, resource_map, &options.image_path_prefix);
output.push_str(&format!("", alt, path));
}
output
}
fn next_block_continues_list(blocks: &[Block], idx: usize) -> bool {
matches!(
blocks.get(idx + 1),
Some(Block::Paragraph(p)) if p.list_info.is_some()
)
}
fn all_runs_uniformly_bold(para: &Paragraph) -> bool {
let mut saw_text = false;
for run in ¶.runs {
if run.text.trim().is_empty() {
continue;
}
saw_text = true;
if !run.style.bold {
return false;
}
}
saw_text
}
fn is_no_space_before(c: char) -> bool {
matches!(
c,
'.' | ',' | ':' | ';' | '!' | '?' | ')' | ']' | '}' | '"' | '\'' | '…'
)
}
#[derive(Debug, Clone, Copy, Default)]
struct RunContext {
in_table_cell: bool,
suppress_emphasis: bool,
}
fn render_run(run: &TextRun, options: &RenderOptions, ctx: RunContext) -> String {
match (&run.revision, &options.revision_handling) {
(RevisionType::Deleted, RevisionHandling::AcceptAll) => {
if run.page_break && options.emit_page_breaks {
return "\n\n---\n\n".to_string();
} else if run.line_break && options.preserve_line_breaks {
return " \n".to_string();
}
return String::new();
}
(RevisionType::Inserted, RevisionHandling::RejectAll) => {
if run.page_break && options.emit_page_breaks {
return "\n\n---\n\n".to_string();
} else if run.line_break && options.preserve_line_breaks {
return " \n".to_string();
}
return String::new();
}
_ => {}
}
if run.text.is_empty() {
if run.page_break && options.emit_page_breaks {
return "\n\n---\n\n".to_string();
} else if run.line_break && options.preserve_line_breaks {
return " \n".to_string();
} else {
return String::new();
}
}
let mut text = if options.escape_special_chars {
escape_markdown(&run.text, ctx.in_table_cell)
} else {
run.text.clone()
};
if run.style.code {
text = format!("`{}`", text.replace('`', "\\`"));
}
if run.style.superscript {
text = format!("<sup>{}</sup>", text);
}
if run.style.subscript {
text = format!("<sub>{}</sub>", text);
}
if run.style.underline {
text = format!("<u>{}</u>", text);
}
if run.style.strikethrough {
text = format!("~~{}~~", text);
}
let effective_bold = run.style.bold && !ctx.suppress_emphasis;
let effective_italic = run.style.italic && !ctx.suppress_emphasis;
if effective_bold && effective_italic {
text = format!("***{}***", text);
} else if effective_bold {
text = format!("**{}**", text);
} else if effective_italic {
text = format!("*{}*", text);
}
if let Some(ref url) = run.hyperlink {
text = format!("[{}]({})", text, url);
}
match (&run.revision, &options.revision_handling) {
(RevisionType::Deleted, RevisionHandling::ShowMarkup) => {
text = format!("~~{}~~", text);
}
(RevisionType::Inserted, RevisionHandling::ShowMarkup) => {
text = format!("<ins>{}</ins>", text);
}
_ => {}
}
if run.page_break && options.emit_page_breaks {
text.push_str("\n\n---\n\n");
} else if run.line_break && options.preserve_line_breaks {
text.push_str(" \n");
}
text
}
fn escape_markdown(s: &str, in_table_cell: bool) -> String {
let mut result = String::with_capacity(s.len());
let chars: Vec<char> = s.chars().collect();
for (i, &c) in chars.iter().enumerate() {
match c {
'\\' | '`' => {
result.push('\\');
result.push(c);
}
'|' => {
if in_table_cell {
result.push('\\');
}
result.push(c);
}
'*' | '_' => {
let prev = if i > 0 { Some(chars[i - 1]) } else { None };
let next = chars.get(i + 1).copied();
let after_opener = prev.is_none_or(|p| {
matches!(p, '(' | '[' | '{' | ':' | '-' | '/' | '\\') || p.is_whitespace()
});
let before_closer = next.is_none_or(|n| {
matches!(n, ')' | ']' | '}' | ':' | '-' | '/' | '\\') || n.is_whitespace()
});
let intra_word_underscore = c == '_'
&& prev.is_some_and(|p| p.is_alphanumeric())
&& next.is_some_and(|n| n.is_alphanumeric());
if after_opener || before_closer || intra_word_underscore {
result.push(c);
} else {
result.push('\\');
result.push(c);
}
}
_ => result.push(c),
}
}
result
}
fn render_cell_content(
cell: &crate::model::Cell,
options: &RenderOptions,
resource_map: &ResourceMap,
is_header_cell: bool,
) -> String {
let mut parts = Vec::new();
for para in &cell.content {
let merged_para = para.with_merged_runs();
let mut para_text = String::new();
let suppress_emphasis = is_header_cell
&& options.strip_redundant_emphasis_in_headings
&& all_runs_uniformly_bold(&merged_para);
let ctx = RunContext {
in_table_cell: true,
suppress_emphasis,
};
for (i, run) in merged_para.runs.iter().enumerate() {
let run_text = render_run(run, options, ctx);
if i > 0 && !run_text.is_empty() && !para_text.is_empty() {
let last_char = para_text.chars().last();
let first_char = run_text.chars().next();
if let (Some(last), Some(first)) = (last_char, first_char) {
let needs_space = !last.is_whitespace()
&& !first.is_whitespace()
&& !is_no_space_before(first);
if needs_space {
para_text.push(' ');
}
}
}
para_text.push_str(&run_text);
}
if !para_text.is_empty() {
parts.push(para_text);
}
for image in ¶.images {
let alt = image.alt_text.as_deref().unwrap_or("image");
let path =
resolve_image_path(&image.resource_id, resource_map, &options.image_path_prefix);
parts.push(format!("", alt, path));
}
}
let text = parts.join("<br>");
let text = if options.preserve_line_breaks {
text.replace(" \n", "<br>")
} else {
text
};
text.replace('\n', " ")
}
fn effective_cell_alignment(cell: &crate::model::Cell) -> CellAlignment {
if cell.alignment != CellAlignment::Left {
return cell.alignment;
}
if let Some(para) = cell.content.first() {
return match para.alignment {
crate::model::TextAlignment::Center => CellAlignment::Center,
crate::model::TextAlignment::Right => CellAlignment::Right,
_ => CellAlignment::Left,
};
}
CellAlignment::Left
}
fn get_column_alignments(table: &Table, col_count: usize) -> Vec<CellAlignment> {
let source_row = table
.rows
.iter()
.find(|r| !r.is_header)
.or_else(|| table.rows.first());
let mut alignments = Vec::with_capacity(col_count);
if let Some(row) = source_row {
for cell in &row.cells {
let alignment = effective_cell_alignment(cell);
for _ in 0..cell.col_span {
alignments.push(alignment);
}
}
}
while alignments.len() < col_count {
alignments.push(CellAlignment::Left);
}
alignments.truncate(col_count);
alignments
}
fn render_callout_blockquote(
table: &Table,
options: &RenderOptions,
resource_map: &ResourceMap,
) -> Option<String> {
if table.rows.len() != 1 {
return None;
}
let row = &table.rows[0];
if row.cells.len() != 1 {
return None;
}
let cell = &row.cells[0];
if cell.content.is_empty() {
return None;
}
let all_bold = cell.content.iter().all(|p| {
p.runs
.iter()
.all(|r| r.text.trim().is_empty() || r.style.bold)
});
let any_text = cell
.content
.iter()
.any(|p| p.runs.iter().any(|r| !r.text.trim().is_empty()));
if !(all_bold && any_text) {
return None;
}
let inner = render_cell_content(cell, options, resource_map, true);
let inner = inner.replace("<br>", "\n");
let mut out = String::new();
for line in inner.lines() {
if line.trim().is_empty() {
out.push_str(">\n");
} else {
out.push_str("> **");
out.push_str(line.trim());
out.push_str("**\n");
}
}
Some(out)
}
fn render_table(table: &Table, options: &RenderOptions, resource_map: &ResourceMap) -> String {
if table.is_empty() {
return String::new();
}
if options.callout_blockquote {
if let Some(quote) = render_callout_blockquote(table, options, resource_map) {
return quote;
}
}
if table.has_merged_cells() && matches!(options.table_fallback, super::TableFallback::Html) {
return render_table_html(table);
}
let mut output = String::new();
let mut nested_tables: Vec<&Table> = Vec::new();
let col_count = table.column_count();
if col_count == 0 {
return String::new();
}
for (i, row) in table.rows.iter().enumerate() {
output.push('|');
if i == 0 && row.cells.len() < col_count {
let missing_cols = col_count - row.cells.len();
for j in 0..missing_cols {
let placeholder = if j == 0 { "#" } else { "" };
output.push_str(&format!(" {} |", placeholder));
}
}
let is_header_row = i == 0 || row.is_header;
for cell in &row.cells {
let text = render_cell_content(cell, options, resource_map, is_header_row);
output.push_str(&format!(" {} |", text));
for nested in &cell.nested_tables {
nested_tables.push(nested);
}
}
if i > 0 {
for _ in row.cells.len()..col_count {
output.push_str(" |");
}
}
output.push('\n');
if i == 0 {
output.push('|');
let alignments = get_column_alignments(table, col_count);
for alignment in &alignments {
let separator = match alignment {
CellAlignment::Center => " :---: |",
CellAlignment::Right => " ---: |",
CellAlignment::Left => " --- |",
};
output.push_str(separator);
}
output.push('\n');
}
}
for nested in nested_tables {
output.push('\n');
output.push_str(&render_table(nested, options, resource_map));
}
output
}
fn render_table_html(table: &Table) -> String {
let mut html = String::from("<table>\n");
for row in &table.rows {
html.push_str(" <tr>\n");
for cell in &row.cells {
let tag = if cell.is_header || row.is_header {
"th"
} else {
"td"
};
let mut attrs = String::new();
if cell.col_span > 1 {
attrs.push_str(&format!(" colspan=\"{}\"", cell.col_span));
}
if cell.row_span > 1 {
attrs.push_str(&format!(" rowspan=\"{}\"", cell.row_span));
}
let text = escape_html(&cell.plain_text());
html.push_str(&format!(" <{}{}>{}</{}>\n", tag, attrs, text, tag));
}
html.push_str(" </tr>\n");
}
html.push_str("</table>");
html
}
fn escape_html(text: &str) -> String {
text.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::detect::FormatType;
use crate::model::{Cell, HeadingLevel, RevisionType, Row, Section, TextStyle};
use crate::render::options::SectionMarkerStyle;
fn two_section_doc(format: FormatType, names: [&str; 2]) -> Document {
let mut doc = Document::new();
doc.format = format;
let mut s0 = Section::new(0);
s0.name = Some(names[0].to_string());
let mut s1 = Section::new(1);
s1.name = Some(names[1].to_string());
doc.sections.push(s0);
doc.sections.push(s1);
doc
}
#[test]
fn test_pptx_section_markers_comment() {
let doc = two_section_doc(FormatType::Pptx, ["Introduction", "Conclusion"]);
let opts = RenderOptions::new().with_section_markers(SectionMarkerStyle::Comment);
let md = to_markdown(&doc, &opts).unwrap();
assert!(
md.contains("<!-- slide 1: Introduction -->"),
"slide 1 marker missing\n{}",
md
);
assert!(
md.contains("<!-- slide 2: Conclusion -->"),
"slide 2 marker missing\n{}",
md
);
}
#[test]
fn test_pptx_section_markers_default_off() {
let doc = two_section_doc(FormatType::Pptx, ["Introduction", "Conclusion"]);
let opts = RenderOptions::new();
let md = to_markdown(&doc, &opts).unwrap();
assert!(
!md.contains("<!-- slide"),
"markers must be absent by default\n{}",
md
);
}
#[test]
fn test_xlsx_section_markers_comment() {
let doc = two_section_doc(FormatType::Xlsx, ["Revenue", "Costs"]);
let opts = RenderOptions::new().with_section_markers(SectionMarkerStyle::Comment);
let md = to_markdown(&doc, &opts).unwrap();
assert!(
md.contains("<!-- sheet 1: Revenue -->"),
"sheet 1 marker missing\n{}",
md
);
assert!(
md.contains("<!-- sheet 2: Costs -->"),
"sheet 2 marker missing\n{}",
md
);
}
#[test]
fn test_docx_section_markers_never_emitted() {
let doc = two_section_doc(FormatType::Docx, ["Chapter 1", "Chapter 2"]);
let opts = RenderOptions::new().with_section_markers(SectionMarkerStyle::Comment);
let md = to_markdown(&doc, &opts).unwrap();
assert!(!md.contains("<!-- "), "DOCX must not emit markers\n{}", md);
}
#[test]
fn test_pptx_nameless_section_marker() {
let mut doc = Document::new();
doc.format = FormatType::Pptx;
let mut s = Section::new(0);
s.name = None;
doc.sections.push(s);
let opts = RenderOptions::new().with_section_markers(SectionMarkerStyle::Comment);
let md = to_markdown(&doc, &opts).unwrap();
assert!(
md.contains("<!-- slide 1 -->"),
"nameless slide must use number-only marker\n{}",
md
);
}
fn empty_resource_map() -> ResourceMap {
HashMap::new()
}
#[test]
fn test_basic_paragraph() {
let para = Paragraph::with_text("Hello, World!");
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert_eq!(md, "Hello, World!");
}
#[test]
fn test_heading() {
let para = Paragraph::heading(HeadingLevel::H2, "Title");
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert_eq!(md, "## Title");
}
#[test]
fn test_formatted_text() {
let mut para = Paragraph::new();
para.runs.push(TextRun::styled("bold", TextStyle::bold()));
para.runs.push(TextRun::plain(" and "));
para.runs
.push(TextRun::styled("italic", TextStyle::italic()));
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert!(md.contains("**bold**"));
assert!(md.contains("*italic*"));
}
#[test]
fn test_hyperlink() {
let mut para = Paragraph::new();
para.runs
.push(TextRun::link("click here", "https://example.com"));
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert!(md.contains("[click here](https://example.com)"));
}
#[test]
fn test_simple_table() {
let mut table = Table::new();
let mut header = Row::header(vec![Cell::header("A"), Cell::header("B")]);
header.is_header = true;
table.add_row(header);
table.add_row(Row {
cells: vec![Cell::with_text("1"), Cell::with_text("2")],
is_header: false,
height: None,
});
let options = RenderOptions::default();
let md = render_table(&table, &options, &empty_resource_map());
assert!(md.contains("| A | B |"));
assert!(md.contains("| --- | --- |"));
assert!(md.contains("| 1 | 2 |"));
}
#[test]
fn test_document_to_markdown() {
let mut doc = Document::new();
let mut section = Section::new(0);
section.add_paragraph(Paragraph::heading(HeadingLevel::H1, "Test Document"));
section.add_paragraph(Paragraph::with_text("This is a test."));
doc.add_section(section);
let options = RenderOptions::default();
let md = to_markdown(&doc, &options).unwrap();
assert!(md.contains("# Test Document"));
assert!(md.contains("This is a test."));
}
#[test]
fn test_escape_pipe_only_in_table_cells() {
assert_eq!(
escape_markdown("v1.0 | 2026-04-27", false),
"v1.0 | 2026-04-27"
);
assert_eq!(escape_markdown("a | b", true), "a \\| b");
}
#[test]
fn test_escape_intra_word_underscore_not_escaped() {
assert_eq!(
escape_markdown("YESUNG_OMS_backup_2026", false),
"YESUNG_OMS_backup_2026"
);
assert_eq!(escape_markdown("in_house", false), "in_house");
assert_eq!(escape_markdown("a _foo_ b", false), "a _foo_ b");
}
#[test]
fn test_escape_backslash_and_backtick_always() {
assert_eq!(escape_markdown("a`b\\c", false), "a\\`b\\\\c");
}
#[test]
fn test_escape_star_intra_word_still_escaped() {
assert_eq!(escape_markdown("foo*bar*baz", false), "foo\\*bar\\*baz");
}
#[test]
fn test_heading_strips_uniform_bold_artifact() {
let mut para = Paragraph::heading(HeadingLevel::H1, "");
para.runs.push(TextRun::styled("Title", TextStyle::bold()));
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert_eq!(md, "# Title", "heading bold should be stripped, got {md:?}");
}
#[test]
fn test_heading_preserves_partial_bold_intent() {
let mut para = Paragraph::heading(HeadingLevel::H2, "");
para.runs.push(TextRun::plain("Section 2: "));
para.runs
.push(TextRun::styled("Required", TextStyle::bold()));
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert!(
md.contains("**Required**"),
"partial bold must be preserved, got {md:?}"
);
assert!(md.starts_with("## "));
}
#[test]
fn test_cell_alignment_falls_back_to_paragraph_alignment() {
use crate::model::TextAlignment;
let mut table = Table::new();
table.add_row(Row::header(vec![
Cell::header("L"),
Cell::header("C"),
Cell::header("R"),
]));
let mut data_row = Row {
cells: vec![
Cell::with_text("left-text"),
Cell::with_text("center-text"),
Cell::with_text("right-text"),
],
is_header: false,
height: None,
};
data_row.cells[1].content[0].alignment = TextAlignment::Center;
data_row.cells[2].content[0].alignment = TextAlignment::Right;
table.add_row(data_row);
let md = render_table(&table, &RenderOptions::default(), &empty_resource_map());
assert!(md.contains("| --- | :---: | ---: |"), "got {md:?}");
}
#[test]
fn test_callout_blockquote_when_enabled() {
let mut table = Table::new();
let mut para = Paragraph::new();
para.runs
.push(TextRun::styled("Important note", TextStyle::bold()));
let cell = Cell {
content: vec![para],
..Cell::with_text("")
};
table.add_row(Row {
cells: vec![cell],
is_header: false,
height: None,
});
let options = RenderOptions::default().with_callout_blockquote(true);
let md = render_table(&table, &options, &empty_resource_map());
assert!(md.starts_with("> **Important note**"), "got {md:?}");
assert!(!md.contains("|"), "should not render as table: {md:?}");
}
#[test]
fn test_callout_blockquote_off_by_default() {
let mut table = Table::new();
let mut para = Paragraph::new();
para.runs.push(TextRun::styled("X", TextStyle::bold()));
let cell = Cell {
content: vec![para],
..Cell::with_text("")
};
table.add_row(Row {
cells: vec![cell],
is_header: false,
height: None,
});
let md = render_table(&table, &RenderOptions::default(), &empty_resource_map());
assert!(md.contains("|"), "default should keep table form: {md:?}");
}
#[test]
fn test_page_break_default_off() {
let mut doc = Document::new();
let mut section = Section::new(0);
section.add_paragraph(Paragraph::with_text("Before"));
section.content.push(Block::PageBreak);
section.add_paragraph(Paragraph::with_text("After"));
doc.add_section(section);
let md = to_markdown(&doc, &RenderOptions::default()).unwrap();
assert!(
!md.contains("---"),
"page break should not emit ---: {md:?}"
);
let md_lossless = to_markdown(&doc, &RenderOptions::lossless()).unwrap();
assert!(
md_lossless.contains("---"),
"lossless preset should emit ---"
);
}
#[test]
fn test_headers_footers_default_off() {
let mut doc = Document::new();
let mut section = Section::new(0);
section.header = Some(vec![Paragraph::with_text("Page header text")]);
section.footer = Some(vec![Paragraph::with_text("Page footer text")]);
section.add_paragraph(Paragraph::with_text("Body."));
doc.add_section(section);
let md = to_markdown(&doc, &RenderOptions::default()).unwrap();
assert!(!md.contains("Page header text"), "got {md:?}");
assert!(!md.contains("Page footer text"), "got {md:?}");
}
#[test]
fn test_consecutive_list_items_are_tight() {
use crate::model::{ListInfo, ListType};
let mut doc = Document::new();
let mut section = Section::new(0);
let mk_item = |text: &str| -> Paragraph {
let mut p = Paragraph::with_text(text);
p.list_info = Some(ListInfo {
list_type: ListType::Bullet,
level: 0,
number: None,
});
p
};
section.add_paragraph(mk_item("Alpha"));
section.add_paragraph(mk_item("Bravo"));
section.add_paragraph(mk_item("Charlie"));
section.add_paragraph(Paragraph::with_text("After list."));
doc.add_section(section);
let options = RenderOptions::default();
let md = to_markdown(&doc, &options).unwrap();
assert!(
md.contains("- Alpha\n- Bravo\n- Charlie"),
"expected tight list, got {md:?}"
);
assert!(md.contains("- Charlie\n\nAfter list."), "got {md:?}");
}
#[test]
fn test_blank_lines_collapsed_without_cleanup() {
let mut doc = Document::new();
let mut section = Section::new(0);
section.add_paragraph(Paragraph::with_text("Before break."));
section.content.push(Block::PageBreak);
section.add_paragraph(Paragraph::with_text("After break."));
doc.add_section(section);
let options = RenderOptions::lossless();
assert!(options.cleanup.is_none());
let md = to_markdown(&doc, &options).unwrap();
assert!(
!md.contains("\n\n\n"),
"output must not contain 3+ consecutive newlines: {:?}",
md
);
assert!(md.contains("Before break."));
assert!(md.contains("After break."));
assert!(md.contains("---"));
}
#[test]
fn test_frontmatter() {
let mut doc = Document::new();
doc.metadata.title = Some("Test Title".to_string());
doc.metadata.author = Some("Test Author".to_string());
let options = RenderOptions::new().with_frontmatter(true);
let md = to_markdown(&doc, &options).unwrap();
assert!(md.starts_with("---\n"));
assert!(md.contains("title: \"Test Title\""));
assert!(md.contains("author: \"Test Author\""));
}
#[test]
fn test_korean_bullet_marker_not_heading() {
let para = Paragraph::heading(HeadingLevel::H2, "ㅇ항목 내용입니다");
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert!(
!md.contains("##"),
"Korean bullet marker should not be heading: {}",
md
);
assert!(
md.contains("ㅇ항목"),
"Content should still be present: {}",
md
);
}
#[test]
fn test_long_text_not_heading() {
let long_text = "이것은 매우 긴 문장입니다. 제목으로 사용하기에는 너무 길어서 본문으로 처리되어야 합니다. 일반적인 제목은 짧고 간결해야 하며, 본문과 구분되어야 합니다.";
assert!(
long_text.chars().count() > 80,
"Test text should be longer than 80 chars"
);
let para = Paragraph::heading(HeadingLevel::H3, long_text);
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert!(
!md.contains("###"),
"Long text should not have heading markers: {}",
md
);
assert!(
md.contains("이것은 매우"),
"Content should still be present: {}",
md
);
}
#[test]
fn test_max_heading_level_capped() {
let para = Paragraph::heading(HeadingLevel::H6, "Deep Heading");
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert!(
md.contains("#### Deep Heading"),
"Heading level 6 should be capped to 4: {}",
md
);
assert!(
!md.contains("######"),
"Should not have 6 hash marks: {}",
md
);
}
#[test]
fn test_arrow_marker_not_heading() {
let para = Paragraph::heading(HeadingLevel::H2, "→ 다음 단계로 이동");
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert!(
!md.contains("##"),
"Arrow marker should not be heading: {}",
md
);
}
#[test]
fn test_table_cell_with_bold_text() {
let mut table = Table::new();
let header = Row::header(vec![Cell::header("Header")]);
table.add_row(header);
let mut bold_para = Paragraph::new();
bold_para
.runs
.push(TextRun::styled("ClusterPlex v5.0", TextStyle::bold()));
let cell = Cell {
content: vec![bold_para],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: crate::model::CellAlignment::Left,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
};
table.add_row(Row {
cells: vec![cell],
is_header: false,
height: None,
});
let options = RenderOptions::default();
let md = render_table(&table, &options, &empty_resource_map());
assert!(
md.contains("**ClusterPlex v5.0**"),
"Expected bold formatting, got: {}",
md
);
}
#[test]
fn test_table_cell_with_italic_text() {
let mut table = Table::new();
let header = Row::header(vec![Cell::header("Header")]);
table.add_row(header);
let mut italic_para = Paragraph::new();
italic_para
.runs
.push(TextRun::styled("emphasis", TextStyle::italic()));
let cell = Cell {
content: vec![italic_para],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: crate::model::CellAlignment::Left,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
};
table.add_row(Row {
cells: vec![cell],
is_header: false,
height: None,
});
let options = RenderOptions::default();
let md = render_table(&table, &options, &empty_resource_map());
assert!(
md.contains("*emphasis*"),
"Expected italic formatting, got: {}",
md
);
}
#[test]
fn test_table_cell_with_multiple_paragraphs() {
let mut table = Table::new();
let header = Row::header(vec![Cell::header("Steps")]);
table.add_row(header);
let para1 = Paragraph::with_text("1. Active 서버 어댑터 Disable");
let para2 = Paragraph::with_text("2. Standby 서버 어댑터 Enable");
let cell = Cell {
content: vec![para1, para2],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: crate::model::CellAlignment::Left,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
};
table.add_row(Row {
cells: vec![cell],
is_header: false,
height: None,
});
let options = RenderOptions::default();
let md = render_table(&table, &options, &empty_resource_map());
assert!(
md.contains("<br>"),
"Expected <br> separator between paragraphs, got: {}",
md
);
assert!(
md.contains("1. Active"),
"Expected first paragraph content, got: {}",
md
);
assert!(
md.contains("2. Standby"),
"Expected second paragraph content, got: {}",
md
);
}
#[test]
fn test_table_cell_with_mixed_formatting() {
let mut table = Table::new();
let header = Row::header(vec![Cell::header("OS"), Cell::header("리소스 타입")]);
table.add_row(header);
let mut para1 = Paragraph::new();
para1.runs.push(TextRun::styled("OS", TextStyle::bold()));
let mut para2 = Paragraph::new();
para2.runs.push(TextRun::plain("Linux"));
let cell1 = Cell {
content: vec![para1],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: crate::model::CellAlignment::Left,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
};
let cell2 = Cell {
content: vec![para2],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: crate::model::CellAlignment::Left,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
};
table.add_row(Row {
cells: vec![cell1, cell2],
is_header: false,
height: None,
});
let options = RenderOptions::default();
let md = render_table(&table, &options, &empty_resource_map());
assert!(md.contains("**OS**"), "Expected bold OS, got: {}", md);
assert!(md.contains("Linux"), "Expected Linux text, got: {}", md);
}
#[test]
fn test_line_break_rendering() {
let mut para = Paragraph::new();
para.runs.push(TextRun {
text: "First line".to_string(),
style: TextStyle::default(),
hyperlink: None,
line_break: true,
page_break: false,
revision: RevisionType::None,
});
para.runs.push(TextRun::plain("Second line"));
let options = RenderOptions::default();
let md = render_paragraph(¶, &options, None, &empty_resource_map());
assert!(
!md.contains(" \n"),
"Should not contain line break when preserve_line_breaks is false: {}",
md
);
let options_with_breaks = RenderOptions::new().with_preserve_breaks(true);
let md_with_breaks =
render_paragraph(¶, &options_with_breaks, None, &empty_resource_map());
assert!(
md_with_breaks.contains("First line \n"),
"Should contain Markdown line break: {}",
md_with_breaks
);
assert!(
md_with_breaks.contains("Second line"),
"Should contain second line: {}",
md_with_breaks
);
}
#[test]
fn test_table_cell_line_break_rendering_with_preserve_breaks() {
let mut table = Table::new();
table.add_row(Row::header(vec![Cell::header("Notes")]));
table.add_row(Row {
cells: vec![Cell {
content: vec![Paragraph {
runs: vec![
TextRun {
text: "First line".to_string(),
line_break: true,
..Default::default()
},
TextRun::plain("Second line"),
],
..Default::default()
}],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: CellAlignment::Left,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
}],
is_header: false,
height: None,
});
let options = RenderOptions::new().with_preserve_breaks(true);
let md = render_table(&table, &options, &empty_resource_map());
assert!(
md.contains("First line<br>Second line"),
"Expected preserved line break in table cell, got: {md}"
);
}
#[test]
fn test_html_table_fallback_escapes_special_chars() {
let mut table = Table::new();
table.add_row(Row::header(vec![Cell::header("Header")]));
table.add_row(Row {
cells: vec![Cell {
content: vec![Paragraph::with_text("<unsafe> & value")],
nested_tables: Vec::new(),
col_span: 2,
row_span: 1,
alignment: CellAlignment::Left,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
}],
is_header: false,
height: None,
});
let options = RenderOptions::new().with_table_fallback(crate::render::TableFallback::Html);
let md = render_table(&table, &options, &empty_resource_map());
assert!(
md.contains("<unsafe> & value"),
"Expected HTML-escaped table cell content, got: {md}"
);
}
#[test]
fn test_table_cell_alignment_rendering() {
let mut table = Table::new();
let header = Row::header(vec![
Cell::header("Left"),
Cell::header("Center"),
Cell::header("Right"),
]);
table.add_row(header);
let left_cell = Cell {
content: vec![Paragraph::with_text("L")],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: CellAlignment::Left,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
};
let center_cell = Cell {
content: vec![Paragraph::with_text("C")],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: CellAlignment::Center,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
};
let right_cell = Cell {
content: vec![Paragraph::with_text("R")],
nested_tables: Vec::new(),
col_span: 1,
row_span: 1,
alignment: CellAlignment::Right,
vertical_alignment: crate::model::VerticalAlignment::Top,
is_header: false,
background: None,
};
table.add_row(Row {
cells: vec![left_cell, center_cell, right_cell],
is_header: false,
height: None,
});
let options = RenderOptions::default();
let md = render_table(&table, &options, &empty_resource_map());
assert!(
md.contains("| --- |"),
"Expected left alignment marker, got: {}",
md
);
assert!(
md.contains("| :---: |"),
"Expected center alignment marker, got: {}",
md
);
assert!(
md.contains("| ---: |"),
"Expected right alignment marker, got: {}",
md
);
}
#[test]
fn test_render_header_footer() {
let mut doc = Document::new();
let mut section = Section::new(0);
section.header = Some(vec![Paragraph::with_text("My Header")]);
section.footer = Some(vec![Paragraph::with_text("Page 1 of 10")]);
section.add_paragraph(Paragraph::with_text("Body content"));
doc.add_section(section);
let options = RenderOptions::lossless();
let md = to_markdown(&doc, &options).unwrap();
assert!(
md.contains("> *Header: My Header*"),
"Expected header in output, got: {}",
md
);
assert!(
md.contains("> *Footer: Page 1 of 10*"),
"Expected footer in output, got: {}",
md
);
let header_pos = md.find("> *Header:").unwrap();
let body_pos = md.find("Body content").unwrap();
let footer_pos = md.find("> *Footer:").unwrap();
assert!(header_pos < body_pos, "Header should appear before body");
assert!(body_pos < footer_pos, "Footer should appear after body");
}
#[test]
fn test_render_header_footer_multiple_paragraphs() {
let mut doc = Document::new();
let mut section = Section::new(0);
section.header = Some(vec![
Paragraph::with_text("Company"),
Paragraph::with_text("Department"),
]);
section.add_paragraph(Paragraph::with_text("Content"));
doc.add_section(section);
let options = RenderOptions::lossless();
let md = to_markdown(&doc, &options).unwrap();
assert!(
md.contains("> *Header: Company | Department*"),
"Multiple header paragraphs should be joined with ' | ', got: {}",
md
);
}
#[test]
fn test_render_no_header_footer() {
let mut doc = Document::new();
let mut section = Section::new(0);
section.add_paragraph(Paragraph::with_text("Body only"));
doc.add_section(section);
let options = RenderOptions::default();
let md = to_markdown(&doc, &options).unwrap();
assert!(!md.contains("Header:"), "No header should be rendered");
assert!(!md.contains("Footer:"), "No footer should be rendered");
}
}