use super::DocxDocument;
use super::document::BlockElement;
use super::hyperlink::HyperlinkTarget;
use super::image::DrawingInfo;
use super::numbering::NumberingDefinitions;
use super::paragraph::{BreakType, ParagraphContent, Run, RunContent};
use super::styles::StyleSheet;
use super::table::Table;
impl DocxDocument {
pub fn plain_text(&self) -> String {
let mut out = String::new();
plain_text_blocks(&self.body.elements, &mut out);
while out.ends_with('\n') {
out.pop();
}
out
}
pub fn to_markdown(&self) -> String {
let mut out = String::new();
let ctx = MarkdownCtx {
styles: self.styles.as_ref(),
numbering: self.numbering.as_ref(),
};
markdown_blocks(&self.body.elements, &ctx, &mut out, 0);
while out.ends_with('\n') {
out.pop();
}
out
}
}
fn plain_text_blocks(elements: &[BlockElement], out: &mut String) {
for elem in elements {
match elem {
BlockElement::Paragraph(p) => {
for content in &p.content {
match content {
ParagraphContent::Run(run) => plain_text_run(run, out),
ParagraphContent::Hyperlink(hl) => {
for run in &hl.runs {
plain_text_run(run, out);
}
},
}
}
out.push('\n');
},
BlockElement::Table(table) => {
plain_text_table(table, out);
},
}
}
}
fn plain_text_run(run: &Run, out: &mut String) {
for content in &run.content {
match content {
RunContent::Text(text) => out.push_str(text),
RunContent::Break(BreakType::Line) => out.push('\n'),
RunContent::Break(BreakType::Page | BreakType::Column) => out.push('\n'),
RunContent::Tab => out.push('\t'),
RunContent::Drawing(_) => {},
}
}
}
fn plain_text_table(table: &Table, out: &mut String) {
for row in &table.rows {
for (i, cell) in row.cells.iter().enumerate() {
if i > 0 {
out.push('\t');
}
let mut cell_text = String::new();
plain_text_blocks(&cell.content, &mut cell_text);
out.push_str(&cell_text.trim_end_matches('\n').replace('\n', " "));
}
out.push('\n');
}
}
struct MarkdownCtx<'a> {
styles: Option<&'a StyleSheet>,
numbering: Option<&'a NumberingDefinitions>,
}
fn markdown_blocks(elements: &[BlockElement], ctx: &MarkdownCtx, out: &mut String, _depth: usize) {
for elem in elements {
match elem {
BlockElement::Paragraph(p) => {
let heading_level = p
.properties
.as_ref()
.and_then(|pp| {
pp.outline_level.or_else(|| {
pp.style_id
.as_ref()
.and_then(|sid| ctx.styles?.resolve_outline_level(sid))
})
})
.map(|lvl| (lvl as usize) + 1);
let list_prefix = p.properties.as_ref().and_then(|pp| {
let nr = pp.numbering_ref.as_ref()?;
let numbering = ctx.numbering?;
let level = numbering.resolve_level(nr.num_id, nr.ilvl)?;
let indent = " ".repeat(nr.ilvl as usize);
use super::numbering::NumberFormat;
let marker = match &level.format {
NumberFormat::Bullet => "- ".to_string(),
NumberFormat::Decimal => format!("{}. ", level.start),
NumberFormat::LowerLetter => format!("{}. ", level.start),
NumberFormat::UpperLetter => format!("{}. ", level.start),
NumberFormat::LowerRoman => format!("{}. ", level.start),
NumberFormat::UpperRoman => format!("{}. ", level.start),
NumberFormat::None => String::new(),
NumberFormat::Other(_) => "- ".to_string(),
};
Some(format!("{indent}{marker}"))
});
if let Some(level) = heading_level {
let hashes = "#".repeat(level.min(9));
out.push_str(&hashes);
out.push(' ');
} else if let Some(ref prefix) = list_prefix {
out.push_str(prefix);
}
for content in &p.content {
match content {
ParagraphContent::Run(run) => markdown_run(run, out),
ParagraphContent::Hyperlink(hl) => {
let text = runs_to_plain_text(&hl.runs);
match &hl.target {
HyperlinkTarget::External(url) => {
out.push('[');
out.push_str(&text);
out.push_str("](");
out.push_str(url);
out.push(')');
},
HyperlinkTarget::Internal(anchor) => {
out.push('[');
out.push_str(&text);
out.push_str("](#");
out.push_str(anchor);
out.push(')');
},
}
},
}
}
out.push('\n');
if heading_level.is_some() {
out.push('\n');
}
},
BlockElement::Table(table) => {
markdown_table(table, ctx, out);
},
}
}
}
fn markdown_run(run: &Run, out: &mut String) {
let bold = run
.properties
.as_ref()
.and_then(|rp| rp.bold)
.unwrap_or(false);
let italic = run
.properties
.as_ref()
.and_then(|rp| rp.italic)
.unwrap_or(false);
let strike = run
.properties
.as_ref()
.and_then(|rp| rp.strike.or(rp.dstrike))
.unwrap_or(false);
let mut text = String::new();
for content in &run.content {
match content {
RunContent::Text(t) => text.push_str(t),
RunContent::Break(BreakType::Line) => text.push_str(" \n"),
RunContent::Break(BreakType::Page | BreakType::Column) => {
text.push_str("\n\n---\n\n");
},
RunContent::Tab => text.push('\t'),
RunContent::Drawing(drawing) => {
markdown_drawing(drawing, &mut text);
},
}
}
if text.is_empty() {
return;
}
if strike {
out.push_str("~~");
}
if bold && italic {
out.push_str("***");
} else if bold {
out.push_str("**");
} else if italic {
out.push('*');
}
out.push_str(&text);
if bold && italic {
out.push_str("***");
} else if bold {
out.push_str("**");
} else if italic {
out.push('*');
}
if strike {
out.push_str("~~");
}
}
fn markdown_drawing(drawing: &DrawingInfo, out: &mut String) {
out.push_str(";
out.push_str(&drawing.relationship_id);
out.push(')');
}
fn markdown_table(table: &Table, _ctx: &MarkdownCtx, out: &mut String) {
if table.rows.is_empty() {
return;
}
let mut row_texts: Vec<Vec<String>> = Vec::new();
let mut max_cols = 0usize;
for row in &table.rows {
let mut cells: Vec<String> = Vec::new();
for cell in &row.cells {
let mut cell_text = String::new();
plain_text_blocks(&cell.content, &mut cell_text);
let cell_text = cell_text.trim().replace('\n', " ");
cells.push(cell_text);
}
max_cols = max_cols.max(cells.len());
row_texts.push(cells);
}
for row in &mut row_texts {
while row.len() < max_cols {
row.push(String::new());
}
}
if let Some(first) = row_texts.first() {
out.push('|');
for cell in first {
out.push(' ');
out.push_str(cell);
out.push_str(" |");
}
out.push('\n');
out.push('|');
for _ in 0..max_cols {
out.push_str(" --- |");
}
out.push('\n');
for row in row_texts.iter().skip(1) {
out.push('|');
for cell in row {
out.push(' ');
out.push_str(cell);
out.push_str(" |");
}
out.push('\n');
}
}
out.push('\n');
}
fn runs_to_plain_text(runs: &[Run]) -> String {
let mut text = String::new();
for run in runs {
plain_text_run(run, &mut text);
}
text
}