use super::{ConversionContext, RunConverter};
use crate::render::{
escape_html_attr, escape_markdown_link_destination, escape_markdown_link_text,
};
use crate::Result;
use rs_docx::document::{Hyperlink, Paragraph, ParagraphContent};
pub struct ParagraphConverter;
#[derive(Debug, Clone, PartialEq, Default)]
struct FormattedSegment {
text: String,
is_bold: bool,
is_italic: bool,
has_underline: bool,
has_strike: bool,
is_insertion: bool,
is_deletion: bool,
is_superscript: bool,
is_subscript: bool,
is_code: bool,
anchor: Option<String>,
}
fn is_monospace_font(fonts: &rs_docx::formatting::Fonts) -> bool {
let check = |name: &Option<String>| -> bool {
name.as_ref()
.map(|n| {
let lower = n.to_lowercase();
lower.contains("courier")
|| lower.contains("consolas")
|| lower.contains("mono")
|| lower.contains("source code")
|| lower.contains("fira code")
|| lower.contains("menlo")
|| lower.contains("dejavu sans mono")
|| lower.contains("liberation mono")
|| lower.contains("andale mono")
|| lower.contains("lucida console")
})
.unwrap_or(false)
};
check(&fonts.ascii) || check(&fonts.h_ansi)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum FieldPhase {
Instruction,
Result,
}
impl ParagraphConverter {
fn filter_run_by_field_state<'a>(
run: &rs_docx::document::Run<'a>,
field_stack: &mut Vec<FieldPhase>,
) -> rs_docx::document::Run<'a> {
let mut filtered = run.clone();
filtered.content.clear();
for content in &run.content {
match content {
rs_docx::document::RunContent::FieldChar(fc) => {
if let Some(char_type) = &fc.ty {
match char_type {
rs_docx::document::CharType::Begin => {
field_stack.push(FieldPhase::Instruction)
}
rs_docx::document::CharType::Separate => {
if let Some(last) = field_stack.last_mut() {
*last = FieldPhase::Result;
}
}
rs_docx::document::CharType::End => {
let _ = field_stack.pop();
}
}
}
}
rs_docx::document::RunContent::InstrText(_)
| rs_docx::document::RunContent::DelInstrText(_) => {}
_ => {
if field_stack.last() != Some(&FieldPhase::Instruction) {
filtered.content.push(content.clone());
}
}
}
}
filtered
}
pub fn convert<'a>(
para: &Paragraph<'a>,
context: &mut ConversionContext<'a>,
) -> Result<String> {
let segments = Self::collect_segments(para, context)?;
let merged = Self::merge_segments(segments);
let mut leading_anchors = Vec::new();
let mut content_segments = Vec::new();
let mut looking_for_anchors = true;
for seg in merged {
if looking_for_anchors && seg.text.is_empty() && seg.anchor.is_some() {
if let Some(anchor) = &seg.anchor {
leading_anchors.push(format!("<a id=\"{}\"></a>", escape_html_attr(anchor)));
}
} else {
looking_for_anchors = false;
content_segments.push(seg);
}
}
let is_heading_para = para.property.as_ref()
.and_then(|p| p.style_id.as_ref())
.and_then(|s| crate::localization::parse_heading_style(&s.value))
.is_some();
if is_heading_para {
for seg in &mut content_segments {
seg.is_bold = false;
}
}
let text = Self::segments_to_markdown(&content_segments, context);
let anchor_tags = leading_anchors.join("");
let is_effectively_empty = if context.preserve_whitespace() {
text.is_empty()
} else {
text.trim().is_empty()
};
if is_effectively_empty {
return Ok(anchor_tags);
}
let formatted_text = Self::apply_paragraph_formatting(para, text, context)?;
if !anchor_tags.is_empty() {
Ok(format!("{}\n{}", anchor_tags, formatted_text))
} else {
Ok(formatted_text)
}
}
fn collect_segments<'a>(
para: &Paragraph<'a>,
context: &mut ConversionContext<'a>,
) -> Result<Vec<FormattedSegment>> {
let mut segments = Vec::new();
let mut field_stack = Vec::new();
let para_style_id = para
.property
.as_ref()
.and_then(|p| p.style_id.as_ref())
.map(|s| s.value.as_ref());
for content in ¶.content {
match content {
ParagraphContent::Run(run) => {
let filtered_run = Self::filter_run_by_field_state(run, &mut field_stack);
if filtered_run.content.is_empty() {
continue;
}
let text = Self::extract_text(&filtered_run, context);
if !text.is_empty() {
let segs =
Self::run_to_segment(&filtered_run, &text, context, para_style_id);
segments.extend(segs);
}
}
ParagraphContent::Link(hyperlink) => {
let link_md = Self::convert_hyperlink(hyperlink, context, para_style_id)?;
if !link_md.is_empty() {
segments.push(FormattedSegment {
text: link_md,
..Default::default()
});
}
}
ParagraphContent::BookmarkStart(bookmark) => {
if let Some(name) = &bookmark.name {
segments.push(FormattedSegment {
anchor: Some(name.to_string()),
..Default::default()
});
}
}
ParagraphContent::BookmarkEnd(_) => {}
ParagraphContent::CommentRangeStart(_) => {}
ParagraphContent::CommentRangeEnd(_) => {}
ParagraphContent::SDT(sdt) => {
if let Some(sdt_content) = &sdt.content {
for bc in &sdt_content.content {
if let rs_docx::document::BodyContent::Paragraph(inner_para) = bc {
let inner_segs = Self::collect_segments(inner_para, context)?;
segments.extend(inner_segs);
}
}
}
}
ParagraphContent::Insertion(ins) => {
for run in &ins.runs {
let text = Self::extract_text(run, context);
if !text.is_empty() {
let mut segs = Self::run_to_segment(run, &text, context, para_style_id);
for seg in &mut segs {
seg.is_insertion = true;
}
segments.extend(segs);
}
}
}
ParagraphContent::Deletion(del) => {
let text = Self::extract_deleted_text(del);
if !text.is_empty() {
segments.push(FormattedSegment {
text,
is_deletion: true,
..Default::default()
});
}
}
}
}
Ok(segments)
}
fn extract_deleted_text(del: &rs_docx::document::Deletion) -> String {
let mut text = String::new();
for run in &del.runs {
for content in &run.content {
if let rs_docx::document::RunContent::DelText(del_text) = content {
text.push_str(&del_text.text);
}
}
}
text
}
fn extract_text<'a>(
run: &rs_docx::document::Run<'a>,
context: &mut ConversionContext<'a>,
) -> String {
let mut text = String::new();
for content in &run.content {
match content {
rs_docx::document::RunContent::Text(t) => {
text.push_str(&t.text);
}
rs_docx::document::RunContent::Tab(_) => {
text.push('\t');
}
rs_docx::document::RunContent::Break(br) => match br.ty {
Some(rs_docx::document::BreakType::Page) => text.push_str("\n\n---\n\n"),
_ => text.push('\n'),
},
rs_docx::document::RunContent::CarriageReturn(_) => {
text.push('\n');
}
rs_docx::document::RunContent::NoBreakHyphen(_) => {
text.push('\u{2011}');
}
rs_docx::document::RunContent::SoftHyphen(_) => {
text.push('\u{00AD}');
}
rs_docx::document::RunContent::Sym(sym) => {
if let Some(char_code) = &sym.char {
if let Ok(code) = u32::from_str_radix(char_code, 16) {
if let Some(c) = char::from_u32(code) {
text.push(c);
}
}
}
}
rs_docx::document::RunContent::PTab(_) => {
text.push('\t');
}
rs_docx::document::RunContent::LastRenderedPageBreak(_) => {
text.push_str("\n\n---\n\n");
}
rs_docx::document::RunContent::PgNum(_) => {
text.push_str("{PAGE}");
}
rs_docx::document::RunContent::Drawing(drawing) => {
if let Ok(Some(img_md)) = context.extract_image_from_drawing(drawing) {
text.push_str(&img_md);
}
}
rs_docx::document::RunContent::Pict(pict) => {
if let Ok(Some(img_md)) = context.extract_image_from_pict(pict) {
text.push_str(&img_md);
}
}
rs_docx::document::RunContent::InstrText(_) => {}
rs_docx::document::RunContent::DelInstrText(_) => {}
rs_docx::document::RunContent::CommentReference(cref) => {
if let Some(id) = &cref.id {
let marker = context.register_comment_reference(id.as_ref());
text.push_str(&marker);
}
}
rs_docx::document::RunContent::FootnoteReference(fnref) => {
if let Some(ref id_str) = fnref.id {
if let Ok(id_num) = id_str.parse::<isize>() {
let marker = context.register_footnote_reference(id_num);
text.push_str(&marker);
}
}
}
rs_docx::document::RunContent::EndnoteReference(enref) => {
if let Some(ref id_str) = enref.id {
if let Ok(id_num) = id_str.parse::<isize>() {
let marker = context.register_endnote_reference(id_num);
text.push_str(&marker);
}
}
}
rs_docx::document::RunContent::AnnotationRef(_)
| rs_docx::document::RunContent::FootnoteRef(_)
| rs_docx::document::RunContent::EndnoteRef(_)
| rs_docx::document::RunContent::Separator(_)
| rs_docx::document::RunContent::ContinuationSeparator(_) => {}
_ => {}
}
}
text
}
fn run_to_segment<'a>(
run: &rs_docx::document::Run<'a>,
text: &str,
context: &mut ConversionContext<'a>,
para_style_id: Option<&str>,
) -> Vec<FormattedSegment> {
let mut run_style_id = None;
if let Some(props) = &run.property {
if let Some(style) = &props.style_id {
run_style_id = Some(style.value.as_ref());
}
}
let props =
context.resolve_run_property(run.property.as_ref(), run_style_id, para_style_id);
let is_bold = props
.bold
.as_ref()
.map(|b| b.value.unwrap_or(true))
.unwrap_or(false);
let is_italic = props
.italics
.as_ref()
.map(|i| i.value.unwrap_or(true))
.unwrap_or(false);
let has_underline = props.underline.is_some();
let has_strike = props
.strike
.as_ref()
.map(|s| s.value.unwrap_or(true))
.unwrap_or(false);
let is_superscript = props
.vertical_align
.as_ref()
.and_then(|v| v.value.as_ref())
.map(|v| matches!(v, rs_docx::formatting::VertAlignType::Superscript))
.unwrap_or(false);
let is_subscript = props
.vertical_align
.as_ref()
.and_then(|v| v.value.as_ref())
.map(|v| matches!(v, rs_docx::formatting::VertAlignType::Subscript))
.unwrap_or(false);
let is_code = !context.is_in_table_cell()
&& props
.fonts
.as_ref()
.map(is_monospace_font)
.unwrap_or(false);
let delimiter = "\n\n---\n\n";
let parts: Vec<&str> = text.split(delimiter).collect();
let mut segments = Vec::new();
for (i, part) in parts.iter().enumerate() {
if i > 0 {
segments.push(FormattedSegment {
text: delimiter.to_string(),
is_bold: false,
is_italic: false,
has_underline: false,
has_strike: false,
is_insertion: false,
is_deletion: false,
is_superscript: false,
is_subscript: false,
is_code: false,
anchor: None,
});
}
if !part.is_empty() {
segments.push(FormattedSegment {
text: part.to_string(),
is_bold,
is_italic,
has_underline,
has_strike,
is_insertion: false,
is_deletion: false,
is_superscript,
is_subscript,
is_code,
anchor: None,
});
}
}
segments
}
fn merge_segments(segments: Vec<FormattedSegment>) -> Vec<FormattedSegment> {
let mut merged: Vec<FormattedSegment> = Vec::new();
for seg in segments {
if let Some(last) = merged.last_mut() {
if last.is_bold == seg.is_bold
&& last.is_italic == seg.is_italic
&& last.has_underline == seg.has_underline
&& last.has_strike == seg.has_strike
&& last.is_insertion == seg.is_insertion
&& last.is_deletion == seg.is_deletion
&& last.is_superscript == seg.is_superscript
&& last.is_subscript == seg.is_subscript
&& last.is_code == seg.is_code
&& last.anchor == seg.anchor
{
last.text.push_str(&seg.text);
continue;
}
}
merged.push(seg);
}
merged
}
fn apply_format_safely(text: &str, open: &str, close: &str) -> String {
if text.trim().is_empty() {
return text.to_string();
}
let leading_ws: String = text
.chars()
.take_while(|c| c.is_whitespace() && *c != '\n')
.collect();
let trailing_ws: String = text
.chars()
.rev()
.take_while(|c| c.is_whitespace() && *c != '\n')
.collect::<String>()
.chars()
.rev()
.collect();
let content_start = leading_ws.len();
let content_end = text.len() - trailing_ws.len();
let content = &text[content_start..content_end];
if content.contains('\n') {
let formatted: Vec<String> = content
.split('\n')
.map(|line| {
let line_trimmed = line.trim();
if line_trimmed.is_empty() {
line.to_string()
} else {
let line_leading: String =
line.chars().take_while(|c| c.is_whitespace()).collect();
let line_trailing: String = line
.chars()
.rev()
.take_while(|c| c.is_whitespace())
.collect::<String>()
.chars()
.rev()
.collect();
format!(
"{}{}{}{}{}",
line_leading, open, line_trimmed, close, line_trailing
)
}
})
.collect();
return format!("{}{}{}", leading_ws, formatted.join("\n"), trailing_ws);
}
format!(
"{}{}{}{}{}",
leading_ws,
open,
content.trim(),
close,
trailing_ws
)
}
fn segments_to_markdown(
segments: &[FormattedSegment],
context: &ConversionContext<'_>,
) -> String {
let mut result = String::new();
for seg in segments {
if let Some(anchor) = &seg.anchor {
result.push_str(&format!(
"<a id=\"{}\"></a>",
escape_html_attr(anchor)
));
}
let mut text = seg.text.clone();
if seg.is_code {
result.push_str(&format!("`{}`", text));
continue;
}
if seg.is_superscript {
text = format!("<sup>{}</sup>", text);
}
if seg.is_subscript {
text = format!("<sub>{}</sub>", text);
}
if seg.is_deletion {
text = Self::apply_format_safely(&text, "~~", "~~");
}
if seg.is_insertion {
text = format!("<ins>{}</ins>", text);
}
if seg.has_underline && context.html_underline_enabled() && !seg.is_insertion {
text = format!("<u>{}</u>", text);
}
if seg.has_strike && !seg.is_deletion {
if context.html_strikethrough_enabled() {
text = format!("<s>{}</s>", text);
} else {
text = Self::apply_format_safely(&text, "~~", "~~");
}
}
if seg.is_bold && seg.is_italic {
text = format!("<strong><em>{}</em></strong>", text);
} else if seg.is_bold {
text = format!("<strong>{}</strong>", text);
} else if seg.is_italic {
text = format!("<em>{}</em>", text);
}
result.push_str(&text);
}
result
}
fn apply_paragraph_formatting<'a>(
para: &Paragraph<'a>,
text: String,
context: &mut ConversionContext<'a>,
) -> Result<String> {
let para_style_id = para
.property
.as_ref()
.and_then(|p| p.style_id.as_ref())
.map(|s| s.value.as_ref());
let effective_props =
context.resolve_paragraph_property(para.property.as_ref(), para_style_id);
let mut prefix = String::new();
let mut is_heading = false;
if let Some(style) = &effective_props.style_id {
if let Some(heading_level) = crate::localization::parse_heading_style(&style.value) {
if text.trim().is_empty() {
return Ok(String::new());
}
prefix.push_str(&"#".repeat(heading_level));
prefix.push(' ');
is_heading = true;
} else if crate::localization::is_blockquote_style(&style.value) {
let text_for_output = if context.preserve_whitespace() {
text.as_str()
} else {
text.trim()
};
return Ok(format!("> {}", text_for_output));
}
}
if let Some(num_pr) = &effective_props.numbering {
if let (Some(num_id), Some(ilvl)) = (&num_pr.id, &num_pr.level) {
let num_id_val = num_id.value as i32;
let ilvl_val = ilvl.value as i32;
let marker = context.next_list_marker(num_id_val, ilvl_val);
if is_heading {
prefix.push_str(&marker);
if !marker.is_empty() {
prefix.push(' ');
}
} else {
let indent = context.list_indent_level(num_id_val, ilvl_val);
let indent_str = " ".repeat(indent);
prefix.push_str(&indent_str);
prefix.push_str(&marker);
prefix.push(' ');
}
}
}
let text_for_output = if context.preserve_whitespace() {
text.as_str()
} else {
text.trim()
};
let final_text = format!("{}{}", prefix, text_for_output);
if !is_heading {
if let Some(jc) = &effective_props.justification {
match &jc.value {
rs_docx::formatting::JustificationVal::Center => {
return Ok(format!(
"<div style=\"text-align: center;\">{}</div>",
final_text
));
}
rs_docx::formatting::JustificationVal::Right => {
return Ok(format!(
"<div style=\"text-align: right;\">{}</div>",
final_text
));
}
_ => {}
}
}
}
Ok(final_text)
}
fn convert_hyperlink<'a>(
hyperlink: &Hyperlink<'a>,
context: &mut ConversionContext<'a>,
para_style_id: Option<&str>,
) -> Result<String> {
let mut link_text = String::new();
let mut field_stack = Vec::new();
for run in &hyperlink.content {
let filtered_run = Self::filter_run_by_field_state(run, &mut field_stack);
if filtered_run.content.is_empty() {
continue;
}
let text = RunConverter::convert(&filtered_run, context, para_style_id)?;
link_text.push_str(&text);
}
let url = if let Some(anchor) = &hyperlink.anchor {
format!("#{}", escape_markdown_link_destination(anchor))
} else if let Some(id) = &hyperlink.id {
context
.relationship_target(id.as_ref())
.map(str::to_owned)
.unwrap_or_else(|| "#".to_string())
} else {
"#".to_string()
};
if link_text.is_empty() {
Ok(url)
} else {
Ok(format!(
"[{}]({})",
escape_markdown_link_text(&link_text),
escape_markdown_link_destination(&url)
))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use rs_docx::document::{Hyperlink, ParagraphContent, Run, RunContent, Text};
use std::borrow::Cow;
use std::collections::HashMap;
#[test]
fn test_toc_anchor_link() {
let mut para = Paragraph::default();
let mut hyperlink = Hyperlink {
anchor: Some(Cow::Borrowed("_Toc123456789")),
..Default::default()
};
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Introduction".into(),
..Default::default()
}));
hyperlink.content.push(run);
para.content.push(ParagraphContent::Link(hyperlink));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "[Introduction](#_Toc123456789)");
}
#[test]
fn test_toc_anchor_target() {
use rs_docx::document::BookmarkStart;
let mut para = Paragraph::default();
let bookmark = BookmarkStart {
name: Some(Cow::Borrowed("_Toc123456789")),
..Default::default()
};
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Chapter 1".into(),
..Default::default()
}));
para.content.push(ParagraphContent::BookmarkStart(bookmark));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "<a id=\"_Toc123456789\"></a>\nChapter 1");
}
#[test]
fn test_anchor_placement_header() {
use rs_docx::document::BookmarkStart;
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Heading1".into(),
}),
..Default::default()
};
para.property = Some(props);
let bookmark = BookmarkStart {
name: Some(Cow::Borrowed("header_anchor")),
..Default::default()
};
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Header Title".into(),
..Default::default()
}));
para.content.push(ParagraphContent::BookmarkStart(bookmark));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "<a id=\"header_anchor\"></a>\n# Header Title");
}
#[test]
fn test_adjacent_anchors() {
use rs_docx::document::BookmarkStart;
let mut para = Paragraph::default();
let b1 = BookmarkStart {
name: Some(Cow::Borrowed("anchor1")),
..Default::default()
};
let b2 = BookmarkStart {
name: Some(Cow::Borrowed("anchor2")),
..Default::default()
};
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Content".into(),
..Default::default()
}));
para.content.push(ParagraphContent::BookmarkStart(b1));
para.content.push(ParagraphContent::BookmarkStart(b2));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "<a id=\"anchor1\"></a><a id=\"anchor2\"></a>\nContent");
}
#[test]
fn test_preserve_whitespace_option() {
let mut para = Paragraph::default();
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: " Keep Surrounding Spaces ".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions {
preserve_whitespace: true,
..Default::default()
};
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, " Keep Surrounding Spaces ");
}
#[test]
fn test_deep_list_indentation_not_clamped() {
use rs_docx::document::{
AbstractNum, AbstractNumId, Level, LevelStart, LevelText, Num, NumFmt, Numbering,
};
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
numbering: Some(rs_docx::formatting::NumberingProperty::from((
2isize, 3isize,
))),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Deep Item".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let abstract_num = AbstractNum {
abstract_num_id: Some(1),
levels: vec![Level {
i_level: Some(3),
start: Some(LevelStart { value: Some(1) }),
number_format: Some(NumFmt {
value: Cow::Borrowed("decimal"),
}),
level_text: Some(LevelText {
value: Some(Cow::Borrowed("%4.")),
}),
..Default::default()
}],
..Default::default()
};
let num = Num {
num_id: Some(2),
abstract_num_id: Some(AbstractNumId { value: Some(1) }),
..Default::default()
};
let docx = rs_docx::Docx {
numbering: Some(Numbering {
abstract_numberings: vec![abstract_num],
numberings: vec![num],
}),
..Default::default()
};
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, " 1. Deep Item");
}
#[test]
fn test_duplicate_footnote_references_reuse_index() {
use rs_docx::document::{BodyContent, FootNote, FootNotes, FootnoteReference};
let mut note_para = Paragraph::default();
let mut note_run = Run::default();
note_run.content.push(RunContent::Text(Text {
text: "Same footnote text".into(),
..Default::default()
}));
note_para.content.push(ParagraphContent::Run(note_run));
let docx = rs_docx::Docx {
footnotes: Some(FootNotes {
content: vec![FootNote {
id: Some(5),
content: vec![BodyContent::Paragraph(note_para)],
..Default::default()
}],
}),
..Default::default()
};
let mut para = Paragraph::default();
let mut run1 = Run::default();
run1.content
.push(RunContent::FootnoteReference(FootnoteReference {
id: Some(Cow::Borrowed("5")),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run1));
let mut run2 = Run::default();
run2.content
.push(RunContent::FootnoteReference(FootnoteReference {
id: Some(Cow::Borrowed("5")),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run2));
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
docx.footnotes.as_ref(),
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "[^1][^1]");
assert_eq!(context.footnote_count(), 1);
}
#[test]
fn test_duplicate_comment_references_reuse_definition() {
use rs_docx::document::{Comment, CommentReference, Comments};
let mut comment_para = Paragraph::default();
let mut comment_run = Run::default();
comment_run.content.push(RunContent::Text(Text {
text: "Shared comment".into(),
..Default::default()
}));
comment_para
.content
.push(ParagraphContent::Run(comment_run));
let docx = rs_docx::Docx {
comments: Some(Comments {
comments: vec![Comment {
id: Some(9),
author: Cow::Borrowed("tester"),
content: comment_para,
}],
}),
..Default::default()
};
let mut para = Paragraph::default();
let mut run1 = Run::default();
run1.content
.push(RunContent::CommentReference(CommentReference {
id: Some(Cow::Borrowed("9")),
}));
para.content.push(ParagraphContent::Run(run1));
let mut run2 = Run::default();
run2.content
.push(RunContent::CommentReference(CommentReference {
id: Some(Cow::Borrowed("9")),
}));
para.content.push(ParagraphContent::Run(run2));
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
docx.comments.as_ref(),
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "[^c9][^c9]");
assert_eq!(context.comment_count(), 1);
assert_eq!(context.comment_at(0), Some(("9", "Shared comment")));
}
#[test]
fn test_field_code_within_single_run_preserves_visible_text() {
use hard_xml::XmlRead;
let mut para = Paragraph::default();
let run = Run::from_str(
r#"<w:r>
<w:t>prefix </w:t>
<w:fldChar w:fldCharType="begin"/>
<w:instrText>PAGEREF _Ref</w:instrText>
<w:t>hidden </w:t>
<w:fldChar w:fldCharType="separate"/>
<w:t>Visible</w:t>
<w:fldChar w:fldCharType="end"/>
<w:t> suffix</w:t>
</w:r>"#,
)
.expect("Failed to parse run XML");
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "prefix Visible suffix");
}
#[test]
fn test_extended_run_content_is_preserved() {
use hard_xml::XmlRead;
let mut para = Paragraph::default();
let run = Run::from_str(
r#"<w:r>
<w:t>A</w:t>
<w:noBreakHyphen/>
<w:t>B</w:t>
<w:softHyphen/>
<w:t>C</w:t>
<w:sym w:char="2013"/>
<w:ptab/>
<w:lastRenderedPageBreak/>
<w:pgNum/>
<w:t>D</w:t>
</w:r>"#,
)
.expect("Failed to parse run XML");
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "A\u{2011}B\u{00AD}C\u{2013}\t\n\n---\n\n{PAGE}D");
}
#[test]
fn test_inline_anchor_escapes_html_special_chars() {
use rs_docx::document::BookmarkStart;
let mut para = Paragraph::default();
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Before".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let bookmark = BookmarkStart {
name: Some(Cow::Borrowed("x\"onmouseover=\"alert(1)")),
..Default::default()
};
para.content.push(ParagraphContent::BookmarkStart(bookmark));
let mut run2 = Run::default();
run2.content.push(RunContent::Text(Text {
text: "After".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run2));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(
md.contains("""),
"Inline anchor must escape HTML special chars, got: {}",
md
);
assert!(
!md.contains("x\"onmouseover"),
"Raw unescaped quote found in anchor — XSS vulnerability"
);
}
#[test]
fn test_hyperlink_italic_matches_paragraph_italic() {
use rs_docx::formatting::{CharacterProperty, Italics};
let mut para = Paragraph::default();
let mut italic_run = Run {
property: Some(CharacterProperty {
italics: Some(Italics { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
italic_run.content.push(RunContent::Text(Text {
text: "regular".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(italic_run));
let mut link_run = Run {
property: Some(CharacterProperty {
italics: Some(Italics { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
link_run.content.push(RunContent::Text(Text {
text: "linked".into(),
..Default::default()
}));
let hyperlink = Hyperlink {
anchor: Some(Cow::Borrowed("target")),
content: vec![link_run],
..Default::default()
};
para.content.push(ParagraphContent::Link(hyperlink));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels,
&mut numbering_resolver,
&mut image_extractor,
&options,
None,
None,
None,
&style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(
md.contains("<em>regular</em>"),
"Regular italic should use <em>, got: {}",
md
);
assert!(
md.contains("<em>linked</em>"),
"Hyperlink italic should use <em>, got: {}",
md
);
}
#[test]
fn test_bold_text() {
use rs_docx::formatting::{Bold, CharacterProperty};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
bold: Some(Bold { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: "hello".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("<strong>hello</strong>"), "Expected bold, got: {}", md);
}
#[test]
fn test_italic_text() {
use rs_docx::formatting::{CharacterProperty, Italics};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
italics: Some(Italics { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: "hello".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("<em>hello</em>"), "Expected italic, got: {}", md);
}
#[test]
fn test_bold_italic_text() {
use rs_docx::formatting::{Bold, CharacterProperty, Italics};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
bold: Some(Bold { value: Some(true) }),
italics: Some(Italics { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: "hello".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(
md.contains("<strong><em>hello</em></strong>"),
"Expected bold+italic, got: {}",
md
);
}
#[test]
fn test_underline_text() {
use rs_docx::formatting::{CharacterProperty, Underline};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
underline: Some(Underline::default()),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: "hello".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("<u>hello</u>"), "Expected underline, got: {}", md);
}
#[test]
fn test_strikethrough_text_markdown() {
use rs_docx::formatting::{CharacterProperty, Strike};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
strike: Some(Strike { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: "hello".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("~~hello~~"), "Expected markdown strike, got: {}", md);
}
#[test]
fn test_strikethrough_text_html() {
use rs_docx::formatting::{CharacterProperty, Strike};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
strike: Some(Strike { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: "hello".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions {
html_strikethrough: true,
..Default::default()
};
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("<s>hello</s>"), "Expected HTML strike, got: {}", md);
}
#[test]
fn test_bold_and_underline_combined() {
use rs_docx::formatting::{Bold, CharacterProperty, Underline};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
bold: Some(Bold { value: Some(true) }),
underline: Some(Underline::default()),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: "hello".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("<strong>"), "Expected bold tag, got: {}", md);
assert!(md.contains("<u>hello</u>"), "Expected underline, got: {}", md);
}
#[test]
fn test_plain_text_no_formatting() {
let mut para = Paragraph::default();
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "hello".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "hello");
}
#[test]
fn test_empty_run_produces_empty() {
let mut para = Paragraph::default();
let run = Run::default();
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert_eq!(md, "");
}
#[test]
fn test_whitespace_preserved_outside_markers() {
use rs_docx::formatting::{Bold, CharacterProperty};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
bold: Some(Bold { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: " hello ".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
let docx = rs_docx::Docx::default();
let rels = HashMap::new();
let mut numbering_resolver = super::super::NumberingResolver::new(&docx);
let mut image_extractor = super::super::ImageExtractor::new_skip();
let options = crate::ConvertOptions::default();
let style_resolver = super::super::StyleResolver::new(&docx.styles);
let mut context = super::ConversionContext::new(
&rels, &mut numbering_resolver, &mut image_extractor,
&options, None, None, None, &style_resolver,
);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(
md.contains("<strong>") && md.contains("hello") && md.contains("</strong>"),
"Expected bold wrapping around text, got: {}",
md
);
}
#[test]
fn test_format_safely_empty_string() {
let result = ParagraphConverter::apply_format_safely("", "<strong>", "</strong>");
assert_eq!(result, "");
}
#[test]
fn test_format_safely_whitespace_only() {
let result = ParagraphConverter::apply_format_safely(" ", "<em>", "</em>");
assert_eq!(result, " ");
}
#[test]
fn test_format_safely_single_line() {
let result = ParagraphConverter::apply_format_safely("hello", "<strong>", "</strong>");
assert_eq!(result, "<strong>hello</strong>");
}
#[test]
fn test_format_safely_with_outer_spaces() {
let result = ParagraphConverter::apply_format_safely(" hello ", "<strong>", "</strong>");
assert_eq!(result, " <strong>hello</strong> ");
}
#[test]
fn test_format_safely_multiline() {
let result = ParagraphConverter::apply_format_safely("line1\nline2", "~~", "~~");
assert!(
result.contains("~~line1~~"),
"line1 should be wrapped, got: {}",
result
);
assert!(
result.contains("~~line2~~"),
"line2 should be wrapped, got: {}",
result
);
}
#[test]
fn test_heading1() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Heading1".into(),
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Title".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.starts_with("# "), "Expected '# ' prefix, got: {}", md);
assert!(md.contains("Title"), "Expected 'Title', got: {}", md);
}
#[test]
fn test_heading2() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Heading2".into(),
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Section".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("## Section"), "Expected '## Section', got: {}", md);
}
#[test]
fn test_heading3() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Heading 3".into(),
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Sub".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("### Sub"), "Expected '### Sub', got: {}", md);
}
#[test]
fn test_heading6() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Heading6".into(),
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Deep".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("###### Deep"), "Expected '###### Deep', got: {}", md);
}
#[test]
fn test_title_style() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Title".into(),
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Doc".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("# Doc"), "Expected '# Doc', got: {}", md);
}
#[test]
fn test_subtitle_style() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Subtitle".into(),
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Sub".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("## Sub"), "Expected '## Sub', got: {}", md);
}
#[test]
fn test_heading_empty_text_produces_empty() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Heading1".into(),
}),
..Default::default()
};
para.property = Some(props);
para.content.push(ParagraphContent::Run(Run::default()));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert_eq!(md, "");
}
#[test]
fn test_center_alignment() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
justification: Some(rs_docx::formatting::Justification {
value: rs_docx::formatting::JustificationVal::Center,
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "centered".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("text-align: center"), "Expected center alignment, got: {}", md);
assert!(md.contains("centered"), "Expected text, got: {}", md);
}
#[test]
fn test_right_alignment() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
justification: Some(rs_docx::formatting::Justification {
value: rs_docx::formatting::JustificationVal::Right,
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "right".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("text-align: right"), "Expected right alignment, got: {}", md);
assert!(md.contains("right"), "Expected text, got: {}", md);
}
#[test]
fn test_heading_ignores_alignment() {
let mut para = Paragraph::default();
let props = rs_docx::formatting::ParagraphProperty {
style_id: Some(rs_docx::formatting::ParagraphStyleId {
value: "Heading1".into(),
}),
justification: Some(rs_docx::formatting::Justification {
value: rs_docx::formatting::JustificationVal::Center,
}),
..Default::default()
};
para.property = Some(props);
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "Title".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("# "), "Expected heading prefix, got: {}", md);
assert!(!md.contains("text-align"), "Heading should not have alignment, got: {}", md);
}
#[test]
fn test_insertion_produces_ins_tag() {
use rs_docx::document::Insertion;
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "added".into(),
..Default::default()
}));
let ins = Insertion {
runs: vec![run],
..Default::default()
};
let mut para = Paragraph::default();
para.content.push(ParagraphContent::Insertion(ins));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("<ins>added</ins>"), "Expected <ins> tag, got: {}", md);
}
#[test]
fn test_deletion_produces_strikethrough() {
use rs_docx::document::{DelText, Deletion};
let mut del_run = Run::default();
del_run.content.push(RunContent::DelText(DelText {
text: "removed".into(),
..Default::default()
}));
let del = Deletion {
runs: vec![del_run],
..Default::default()
};
let mut para = Paragraph::default();
para.content.push(ParagraphContent::Deletion(del));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("~~removed~~"), "Expected strikethrough, got: {}", md);
}
#[test]
fn test_page_break_in_run() {
use hard_xml::XmlRead;
let mut para = Paragraph::default();
let run = Run::from_str(
r#"<w:r><w:t>before</w:t><w:br w:type="page"/><w:t>after</w:t></w:r>"#,
)
.expect("Failed to parse run XML");
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("---"), "Expected page break '---', got: {}", md);
}
#[test]
fn test_line_break_in_run() {
use hard_xml::XmlRead;
let mut para = Paragraph::default();
let run = Run::from_str(r#"<w:r><w:t>before</w:t><w:br/><w:t>after</w:t></w:r>"#)
.expect("Failed to parse run XML");
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains('\n'), "Expected line break, got: {:?}", md);
}
#[test]
fn test_tab_in_run() {
use hard_xml::XmlRead;
let mut para = Paragraph::default();
let run = Run::from_str(r#"<w:r><w:t>col1</w:t><w:tab/><w:t>col2</w:t></w:r>"#)
.expect("Failed to parse run XML");
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains('\t'), "Expected tab, got: {:?}", md);
}
#[test]
fn test_hyperlink_external() {
let mut rels = HashMap::new();
rels.insert("rId1".to_string(), "https://example.com".to_string());
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "click".into(),
..Default::default()
}));
let hyperlink = Hyperlink {
id: Some(Cow::Borrowed("rId1")),
content: vec![run],
..Default::default()
};
let mut para = Paragraph::default();
para.content.push(ParagraphContent::Link(hyperlink));
make_test_context_ext!(ctx, rels: rels);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(
md.contains("[click](https://example.com)"),
"Expected external link, got: {}",
md
);
}
#[test]
fn test_hyperlink_anchor() {
let mut run = Run::default();
run.content.push(RunContent::Text(Text {
text: "go".into(),
..Default::default()
}));
let hyperlink = Hyperlink {
anchor: Some(Cow::Borrowed("_Heading")),
content: vec![run],
..Default::default()
};
let mut para = Paragraph::default();
para.content.push(ParagraphContent::Link(hyperlink));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("[go](#_Heading)"), "Expected anchor link, got: {}", md);
}
#[test]
fn test_hyperlink_empty_text() {
let hyperlink = Hyperlink {
anchor: Some(Cow::Borrowed("ref")),
..Default::default()
};
let mut para = Paragraph::default();
para.content.push(ParagraphContent::Link(hyperlink));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains("#ref"), "Expected anchor reference, got: {}", md);
}
#[test]
fn test_empty_paragraph_produces_empty() {
let para = Paragraph::default();
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert_eq!(md, "");
}
#[test]
fn test_sym_element_hex_decode() {
use hard_xml::XmlRead;
let mut para = Paragraph::default();
let run = Run::from_str(r#"<w:r><w:sym w:char="2022"/></w:r>"#)
.expect("Failed to parse run XML");
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains('\u{2022}'), "Expected bullet •, got: {:?}", md);
}
#[test]
fn test_non_breaking_hyphen() {
use hard_xml::XmlRead;
let mut para = Paragraph::default();
let run = Run::from_str(r#"<w:r><w:noBreakHyphen/></w:r>"#)
.expect("Failed to parse run XML");
para.content.push(ParagraphContent::Run(run));
make_test_context!(ctx);
let md = ParagraphConverter::convert(¶, &mut ctx).expect("Conversion failed");
assert!(md.contains('\u{2011}'), "Expected non-breaking hyphen, got: {:?}", md);
}
#[test]
fn test_heading_strips_bold_formatting() {
use rs_docx::formatting::{Bold, CharacterProperty, ParagraphProperty, ParagraphStyleId};
let mut para = Paragraph {
property: Some(ParagraphProperty {
style_id: Some(ParagraphStyleId { value: "Heading1".into() }),
..Default::default()
}),
..Default::default()
};
let mut run = Run {
property: Some(CharacterProperty {
bold: Some(Bold { value: Some(true) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text {
text: "My Title".into(),
..Default::default()
}));
para.content.push(ParagraphContent::Run(run));
make_test_context!(context);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(
!md.contains("<strong>"),
"Heading should not contain <strong> tags, got: {}",
md
);
assert!(md.contains("# My Title"), "Expected '# My Title', got: {}", md);
}
#[test]
fn test_superscript_text() {
use rs_docx::formatting::{CharacterProperty, VertAlign, VertAlignType};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
vertical_align: Some(VertAlign { value: Some(VertAlignType::Superscript) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text { text: "2".into(), ..Default::default() }));
para.content.push(ParagraphContent::Run(run));
make_test_context!(context);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("<sup>2</sup>"), "Expected <sup>2</sup>, got: {}", md);
}
#[test]
fn test_subscript_text() {
use rs_docx::formatting::{CharacterProperty, VertAlign, VertAlignType};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
vertical_align: Some(VertAlign { value: Some(VertAlignType::Subscript) }),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text { text: "2".into(), ..Default::default() }));
para.content.push(ParagraphContent::Run(run));
make_test_context!(context);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("<sub>2</sub>"), "Expected <sub>2</sub>, got: {}", md);
}
#[test]
fn test_quote_style_produces_blockquote() {
use rs_docx::formatting::{ParagraphProperty, ParagraphStyleId};
let mut para = Paragraph {
property: Some(ParagraphProperty {
style_id: Some(ParagraphStyleId { value: "Quote".into() }),
..Default::default()
}),
..Default::default()
};
let mut run = Run::default();
run.content.push(RunContent::Text(Text { text: "A wise quote".into(), ..Default::default() }));
para.content.push(ParagraphContent::Run(run));
make_test_context!(context);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.starts_with("> "), "Quote style should produce blockquote, got: {}", md);
assert!(md.contains("A wise quote"), "Quote text missing, got: {}", md);
}
#[test]
fn test_intense_quote_style_produces_blockquote() {
use rs_docx::formatting::{ParagraphProperty, ParagraphStyleId};
let mut para = Paragraph {
property: Some(ParagraphProperty {
style_id: Some(ParagraphStyleId { value: "IntenseQuote".into() }),
..Default::default()
}),
..Default::default()
};
let mut run = Run::default();
run.content.push(RunContent::Text(Text { text: "Intense".into(), ..Default::default() }));
para.content.push(ParagraphContent::Run(run));
make_test_context!(context);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.starts_with("> "), "IntenseQuote should produce blockquote, got: {}", md);
}
#[test]
fn test_monospace_font_produces_inline_code() {
use rs_docx::formatting::{CharacterProperty, Fonts};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
fonts: Some(Fonts {
ascii: Some("Courier New".to_string()),
..Default::default()
}),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text { text: "let x = 1;".into(), ..Default::default() }));
para.content.push(ParagraphContent::Run(run));
make_test_context!(context);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("`let x = 1;`"), "Monospace should produce inline code, got: {}", md);
}
#[test]
fn test_consolas_font_produces_inline_code() {
use rs_docx::formatting::{CharacterProperty, Fonts};
let mut para = Paragraph::default();
let mut run = Run {
property: Some(CharacterProperty {
fonts: Some(Fonts {
ascii: Some("Consolas".to_string()),
..Default::default()
}),
..Default::default()
}),
..Default::default()
};
run.content.push(RunContent::Text(Text { text: "code".into(), ..Default::default() }));
para.content.push(ParagraphContent::Run(run));
make_test_context!(context);
let md = ParagraphConverter::convert(¶, &mut context).expect("Conversion failed");
assert!(md.contains("`code`"), "Consolas should produce inline code, got: {}", md);
}
}