use crate::error::{Error, Result};
use office_oxide::pptx::write::{PptxWriter, Run};
const EMU_PER_PT: f32 = 12_700.0;
pub fn to_pptx_bytes_layout(doc: &crate::document::PdfDocument) -> Result<Vec<u8>> {
let n_pages = doc.page_count()?;
if n_pages == 0 {
return Err(Error::InvalidOperation("PDF has zero pages".into()));
}
let lookups = doc.page_font_face_lookups().unwrap_or_default();
let mut writer = PptxWriter::new();
if let Ok(fonts) = doc.extract_embedded_fonts() {
for (name, data) in fonts {
writer.embed_font(name, data);
}
}
let (x1_0, y1_0, x2_0, y2_0) = doc.get_page_media_box(0)?;
let page0_w_pt = (x2_0 - x1_0).abs();
let page0_h_pt = (y2_0 - y1_0).abs();
let pres_cx = (page0_w_pt * EMU_PER_PT) as u64;
let pres_cy = (page0_h_pt * EMU_PER_PT) as u64;
writer.set_presentation_size(pres_cx, pres_cy);
for page_idx in 0..n_pages {
let (x1, y1, x2, y2) = doc.get_page_media_box(page_idx)?;
let page_w_pt = (x2 - x1).abs();
let page_h_pt = (y2 - y1).abs();
let mut spans = doc.extract_spans(page_idx).unwrap_or_default();
if let Ok(chars) = doc.extract_chars(page_idx) {
let chars_horizontal_dominant = if chars.is_empty() {
true
} else {
let horiz = chars
.iter()
.filter(|c| c.rotation_degrees.abs() < 5.0)
.count();
horiz * 4 >= chars.len() * 3
};
spans.retain(|s| {
!crate::converters::pdf_to_ir::span_overlaps_rotated_chars(
s,
&chars,
chars_horizontal_dominant,
)
});
}
let music_regions =
crate::converters::music_region_finder::find_music_regions(doc, page_idx);
if !music_regions.is_empty() {
spans.retain(|s| {
!music_regions
.iter()
.any(|r| crate::converters::music_region_finder::rect_contains_bbox(r, &s.bbox))
});
}
merge_hyphenated_spans(&mut spans);
let font_lookup = lookups.get(page_idx).cloned().unwrap_or_default();
let slide = writer.add_slide();
let lines = crate::converters::layout_lines::group_spans_into_lines(spans);
for line in &lines {
let x_pt = line.x_pt.max(0.0).min(page_w_pt);
let y_top_pt = (page_h_pt - line.y_pt - line.height_pt)
.max(0.0)
.min(page_h_pt);
let w_pt = (line.width_pt * 1.5).max(line.width_pt + 16.0).max(8.0);
let h_pt = line.height_pt.max(
line.spans
.iter()
.map(|s| s.font_size)
.fold(0.0_f32, f32::max)
* 1.4,
);
let x_emu = (x_pt * EMU_PER_PT) as i64;
let y_emu = (y_top_pt * EMU_PER_PT) as i64;
let cx_emu = (w_pt * EMU_PER_PT) as i64;
let cy_emu = (h_pt * EMU_PER_PT) as i64;
let mut runs: Vec<Run> = Vec::with_capacity(line.spans.len());
let mut prev_right_pt: Option<f32> = None;
for (i, span) in line.spans.iter().enumerate() {
let text = span.text.trim_matches('\u{0000}');
if text.is_empty() {
continue;
}
if let Some(prev_right) = prev_right_pt {
let gap = span.bbox.x - prev_right;
let needs_space = gap > span.font_size * 0.25
&& !runs
.last()
.and_then(|r| r.text.chars().last())
.map(|c| c.is_whitespace())
.unwrap_or(false)
&& !text.starts_with(|c: char| c.is_whitespace());
if needs_space && i > 0 {
runs.push(Run::new(" "));
}
}
let raw_font = span.font_name.as_str();
let resolved: String = if let Some(real) = font_lookup.get(raw_font) {
real.clone()
} else {
let stripped = raw_font
.split_once('+')
.map(|(_, rest)| rest)
.unwrap_or(raw_font);
if !stripped.is_empty() && stripped.chars().any(char::is_alphabetic) {
stripped.to_string()
} else if span.is_monospace {
"Courier New".to_string()
} else {
"Times New Roman".to_string()
}
};
let mut run = Run::new(text)
.font(resolved)
.font_size(span.font_size as f64);
if span.font_weight.is_bold() {
run = run.bold();
}
if span.is_italic {
run = run.italic();
}
if !(span.color.r == 0.0 && span.color.g == 0.0 && span.color.b == 0.0) {
let hex = format!(
"{:02X}{:02X}{:02X}",
(span.color.r * 255.0).round().clamp(0.0, 255.0) as u8,
(span.color.g * 255.0).round().clamp(0.0, 255.0) as u8,
(span.color.b * 255.0).round().clamp(0.0, 255.0) as u8,
);
run = run.color(hex);
}
runs.push(run);
prev_right_pt = Some(span.bbox.x + span.bbox.width);
}
if !runs.is_empty() {
slide.add_rich_text_box(&runs, x_emu, y_emu, cx_emu, cy_emu);
}
}
let mut existing_rects_pdf: Vec<(f32, f32, f32, f32)> = Vec::new();
if let Ok(imgs) = doc.extract_images(page_idx) {
for img in imgs {
let bbox = match img.bbox() {
Some(b) => *b,
None => continue,
};
let png = match img.to_png_bytes() {
Ok(b) if !b.is_empty() => b,
_ => continue,
};
let x_pt = bbox.x.max(0.0).min(page_w_pt);
let y_top_pt = (page_h_pt - bbox.y - bbox.height).max(0.0);
let y_top_pt = y_top_pt.min(page_h_pt);
let w_pt = bbox.width.max(1.0);
let h_pt = bbox.height.max(1.0);
existing_rects_pdf.push((bbox.x, bbox.y, bbox.width, bbox.height));
slide.add_image(
png,
office_oxide::ir::ImageFormat::Png,
(x_pt * EMU_PER_PT) as i64,
(y_top_pt * EMU_PER_PT) as i64,
(w_pt * EMU_PER_PT) as u64,
(h_pt * EMU_PER_PT) as u64,
);
}
}
#[cfg(feature = "rendering")]
{
let regions = crate::converters::form_xobject_finder::rasterize_form_and_inline_regions(
doc,
page_idx,
page_h_pt,
&existing_rects_pdf,
);
for ((x_pdf, y_pdf, w, h), png) in regions {
let x_pt = x_pdf.max(0.0).min(page_w_pt);
let y_top_pt = (page_h_pt - y_pdf - h).max(0.0).min(page_h_pt);
let w_pt = w.max(1.0);
let h_pt = h.max(1.0);
slide.add_image(
png,
office_oxide::ir::ImageFormat::Png,
(x_pt * EMU_PER_PT) as i64,
(y_top_pt * EMU_PER_PT) as i64,
(w_pt * EMU_PER_PT) as u64,
(h_pt * EMU_PER_PT) as u64,
);
}
}
#[cfg(feature = "rendering")]
if !music_regions.is_empty() {
let regions = crate::converters::music_region_finder::rasterize_music_regions(
doc, page_idx, page_h_pt,
);
for ((x_pdf, y_pdf, w, h), png) in regions {
let x_pt = x_pdf.max(0.0).min(page_w_pt);
let y_top_pt = (page_h_pt - y_pdf - h).max(0.0).min(page_h_pt);
let w_pt = w.max(1.0);
let h_pt = h.max(1.0);
slide.add_image(
png,
office_oxide::ir::ImageFormat::Png,
(x_pt * EMU_PER_PT) as i64,
(y_top_pt * EMU_PER_PT) as i64,
(w_pt * EMU_PER_PT) as u64,
(h_pt * EMU_PER_PT) as u64,
);
}
}
}
let mut buf = std::io::Cursor::new(Vec::new());
writer
.write_to(&mut buf)
.map_err(|e| Error::InvalidOperation(format!("PPTX layout export: {e}")))?;
Ok(buf.into_inner())
}
fn merge_hyphenated_spans(spans: &mut Vec<crate::layout::text_block::TextSpan>) {
if spans.len() < 2 {
return;
}
let mut i = 0;
while i + 1 < spans.len() {
let curr_ends_hyphen = spans[i].text.ends_with('-');
let same_size = (spans[i].font_size - spans[i + 1].font_size).abs() < 0.01;
let next_starts_lower = spans[i + 1]
.text
.chars()
.next()
.map(|c| c.is_ascii_lowercase())
.unwrap_or(false);
if curr_ends_hyphen && same_size && next_starts_lower {
let merged_text =
format!("{}{}", &spans[i].text[..spans[i].text.len() - 1], &spans[i + 1].text);
spans[i].text = merged_text;
spans[i].bbox.width += spans[i + 1].bbox.width;
spans.remove(i + 1);
} else {
i += 1;
}
}
}