use crate::error::{Error, Result};
use crate::objects::{PdfArray, PdfDict, PdfName, PdfObject, PdfStream};
use super::LoadedDocument;
#[derive(Debug, Clone)]
pub struct EmbeddedPage {
pub name: String,
pub width: f64,
pub height: f64,
pub(crate) xobject: PdfStream,
#[allow(dead_code)]
pub(crate) resources: Option<PdfDict>,
}
impl EmbeddedPage {
pub fn aspect_ratio(&self) -> f64 {
if self.height != 0.0 {
self.width / self.height
} else {
1.0
}
}
pub fn fit_dimensions(&self, max_width: f64, max_height: f64) -> (f64, f64) {
let scale_x = max_width / self.width;
let scale_y = max_height / self.height;
let scale = scale_x.min(scale_y);
(self.width * scale, self.height * scale)
}
}
impl LoadedDocument {
pub fn extract_page(&mut self, page_index: usize) -> Result<EmbeddedPage> {
let page = self.page(page_index)?;
let media_box = self.get_page_media_box(&page)?;
let width = media_box[2] - media_box[0];
let height = media_box[3] - media_box[1];
let content_data = self.get_page_content(&page)?;
let resources = self.get_page_resources(&page)?;
let mut xobject_dict = PdfDict::new();
xobject_dict.set("Type", PdfObject::Name(PdfName::new("XObject")));
xobject_dict.set("Subtype", PdfObject::Name(PdfName::new("Form")));
xobject_dict.set("FormType", PdfObject::Integer(1));
let bbox = PdfArray::from(vec![
PdfObject::Real(media_box[0]),
PdfObject::Real(media_box[1]),
PdfObject::Real(media_box[2]),
PdfObject::Real(media_box[3]),
]);
xobject_dict.set("BBox", PdfObject::Array(bbox));
let matrix = PdfArray::from(vec![
PdfObject::Integer(1),
PdfObject::Integer(0),
PdfObject::Integer(0),
PdfObject::Integer(1),
PdfObject::Real(-media_box[0]),
PdfObject::Real(-media_box[1]),
]);
xobject_dict.set("Matrix", PdfObject::Array(matrix));
if let Some(ref res) = resources {
xobject_dict.set("Resources", PdfObject::Dict(res.clone()));
}
let xobject = PdfStream::new(xobject_dict, content_data);
Ok(EmbeddedPage {
name: format!("Page{}", page_index),
width,
height,
xobject,
resources,
})
}
fn get_page_media_box(&mut self, page: &PdfDict) -> Result<[f64; 4]> {
if let Some(arr) = page.get_array("MediaBox") {
return self.parse_rect_array(arr);
}
if let Some(parent_ref) = page.get_ref("Parent") {
let parent = self.resolve_dict(parent_ref)?;
if let Some(arr) = parent.get_array("MediaBox") {
return self.parse_rect_array(arr);
}
}
Ok([0.0, 0.0, 612.0, 792.0])
}
fn parse_rect_array(&self, arr: &PdfArray) -> Result<[f64; 4]> {
if arr.len() < 4 {
return Err(Error::InvalidStructure(
"Invalid rectangle array".to_string(),
));
}
let get_num = |i: usize| -> f64 {
match arr.get(i) {
Some(PdfObject::Integer(n)) => *n as f64,
Some(PdfObject::Real(n)) => *n,
_ => 0.0,
}
};
Ok([get_num(0), get_num(1), get_num(2), get_num(3)])
}
fn get_page_content(&mut self, page: &PdfDict) -> Result<Vec<u8>> {
match page.get("Contents") {
Some(PdfObject::Reference(content_ref)) => {
let stream = self.resolve_stream(*content_ref)?;
stream.decode()
}
Some(PdfObject::Array(arr)) => {
let mut all_content = Vec::new();
for i in 0..arr.len() {
if let Some(content_ref) = arr.get_reference(i) {
let stream = self.resolve_stream(content_ref)?;
let decoded = stream.decode()?;
if !all_content.is_empty() {
all_content.push(b'\n');
}
all_content.extend_from_slice(&decoded);
}
}
Ok(all_content)
}
Some(PdfObject::Stream(stream)) => {
stream.decode()
}
None => {
Ok(Vec::new())
}
_ => Err(Error::InvalidStructure(
"Invalid Contents entry in page".to_string(),
)),
}
}
fn get_page_resources(&mut self, page: &PdfDict) -> Result<Option<PdfDict>> {
match page.get("Resources") {
Some(PdfObject::Reference(res_ref)) => {
let resources = self.resolve_dict(*res_ref)?;
Ok(Some(resources))
}
Some(PdfObject::Dict(d)) => Ok(Some(d.clone())),
None => {
if let Some(parent_ref) = page.get_ref("Parent") {
let parent = self.resolve_dict(parent_ref)?;
match parent.get("Resources") {
Some(PdfObject::Reference(res_ref)) => {
let resources = self.resolve_dict(*res_ref)?;
Ok(Some(resources))
}
Some(PdfObject::Dict(d)) => Ok(Some(d.clone())),
_ => Ok(None),
}
} else {
Ok(None)
}
}
_ => Ok(None),
}
}
pub fn extract_all_pages(&mut self) -> Result<Vec<EmbeddedPage>> {
let count = self.page_count()?;
let mut pages = Vec::with_capacity(count);
for i in 0..count {
pages.push(self.extract_page(i)?);
}
Ok(pages)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::api::Document;
#[test]
fn test_extract_page() {
let mut doc = Document::new();
doc.text_at("Hello, World!", [72.0, 700.0]);
let pdf_data = doc.render().unwrap();
let mut loaded = LoadedDocument::load(pdf_data).unwrap();
let embedded = loaded.extract_page(0).unwrap();
assert!(embedded.width > 0.0);
assert!(embedded.height > 0.0);
assert!(!embedded.xobject.data().is_empty());
}
#[test]
fn test_extract_all_pages() {
let mut doc = Document::new();
doc.text_at("Page 1", [72.0, 700.0]);
doc.start_new_page();
doc.text_at("Page 2", [72.0, 700.0]);
let pdf_data = doc.render().unwrap();
let mut loaded = LoadedDocument::load(pdf_data).unwrap();
let pages = loaded.extract_all_pages().unwrap();
assert_eq!(pages.len(), 2);
}
}