use crate::bbox::BoundingBox;
use crate::error::{Error, Result};
use lopdf::{Document, Object};
pub fn apply_cropbox(
doc: &mut Document,
page_num: usize,
bbox: &BoundingBox,
clip_content: bool,
) -> Result<()> {
let page_id = doc
.page_iter()
.nth(page_num)
.ok_or_else(|| Error::InvalidPage(format!("page {} not found", page_num)))?;
let page_dict = doc
.get_object_mut(page_id)
.map_err(|e| Error::PdfParse(format!("failed to get page {}: {}", page_num, e)))?
.as_dict_mut()
.map_err(|e| Error::PdfParse(format!("page {} is not a dictionary: {}", page_num, e)))?;
let cropbox = Object::Array(vec![
Object::Real(bbox.left as f32),
Object::Real(bbox.bottom as f32),
Object::Real(bbox.right as f32),
Object::Real(bbox.top as f32),
]);
page_dict.set("CropBox", cropbox);
if clip_content {
filter_page_content(doc, page_id, bbox)?;
}
Ok(())
}
fn filter_page_content(doc: &mut Document, page_id: (u32, u16), bbox: &BoundingBox) -> Result<()> {
use crate::content_filter::filter_content_stream;
#[cfg(target_arch = "wasm32")]
{
use wasm_bindgen::JsValue;
web_sys::console::log_1(&JsValue::from_str("[DEBUG] Filtering page content..."));
}
let (contents_ref, resources) = {
let page = doc
.get_object(page_id)
.map_err(|e| Error::PdfParse(format!("failed to get page: {}", e)))?
.as_dict()
.map_err(|e| Error::PdfParse(format!("page is not a dictionary: {}", e)))?;
let resources = page
.get(b"Resources")
.ok()
.and_then(|obj| obj.as_dict().ok()).cloned();
let contents_ref = match page.get(b"Contents") {
Ok(obj) => obj.clone(),
Err(_) => {
return Ok(());
}
};
(contents_ref, resources)
};
let mut all_form_xobjects = vec![];
match contents_ref {
Object::Reference(ref_id) => {
let stream = doc
.get_object(ref_id)
.map_err(|e| Error::PdfParse(format!("failed to get stream: {}", e)))?
.as_stream()
.map_err(|e| Error::PdfParse(format!("object is not a stream: {}", e)))?;
let (filtered_content, form_xobjects) =
filter_content_stream(doc, stream, resources.as_ref(), bbox)?;
all_form_xobjects.extend(form_xobjects);
let stream_mut = doc
.get_object_mut(ref_id)
.map_err(|e| Error::PdfParse(format!("failed to get stream mut: {}", e)))?
.as_stream_mut()
.map_err(|e| Error::PdfParse(format!("object is not a stream: {}", e)))?;
stream_mut.set_plain_content(filtered_content);
}
Object::Array(ref streams) => {
#[cfg(target_arch = "wasm32")]
{
use wasm_bindgen::JsValue;
web_sys::console::log_1(&JsValue::from_str(&format!(
"[DEBUG] Page has {} content streams",
streams.len()
)));
}
#[cfg(debug_assertions)]
eprintln!("[DEBUG] Page has {} content streams (array)", streams.len());
for (_idx, stream_ref) in streams.iter().enumerate() {
if let Object::Reference(ref_id) = stream_ref {
#[cfg(target_arch = "wasm32")]
{
use wasm_bindgen::JsValue;
web_sys::console::log_1(&JsValue::from_str(&format!(
"[DEBUG] Filtering content stream {} of {}",
_idx + 1,
streams.len()
)));
}
let stream = doc
.get_object(*ref_id)
.map_err(|e| Error::PdfParse(format!("failed to get stream: {}", e)))?
.as_stream()
.map_err(|e| Error::PdfParse(format!("object is not a stream: {}", e)))?;
let (filtered_content, form_xobjects) =
filter_content_stream(doc, stream, resources.as_ref(), bbox)?;
all_form_xobjects.extend(form_xobjects);
let stream_mut = doc
.get_object_mut(*ref_id)
.map_err(|e| Error::PdfParse(format!("failed to get stream mut: {}", e)))?
.as_stream_mut()
.map_err(|e| Error::PdfParse(format!("object is not a stream: {}", e)))?;
stream_mut.set_plain_content(filtered_content);
}
}
}
_ => {
return Err(Error::PdfParse(
"Contents is not a reference or array".to_string(),
));
}
}
#[cfg(target_arch = "wasm32")]
{
use wasm_bindgen::JsValue;
web_sys::console::log_1(&JsValue::from_str(&format!(
"[DEBUG] Skipping Form XObject filtering ({} found) - coordinate transformation not yet implemented",
all_form_xobjects.len()
)));
}
#[cfg(target_arch = "wasm32")]
{
use wasm_bindgen::JsValue;
web_sys::console::log_1(&JsValue::from_str("[DEBUG] Content filtering complete"));
}
Ok(())
}
pub fn get_page_dimensions(doc: &Document, page_num: usize) -> Result<(f64, f64)> {
let page_id = doc
.page_iter()
.nth(page_num)
.ok_or_else(|| Error::InvalidPage(format!("page {} not found", page_num)))?;
let page = doc
.get_object(page_id)
.map_err(|e| Error::PdfParse(format!("failed to get page {}: {}", page_num, e)))?
.as_dict()
.map_err(|e| Error::PdfParse(format!("page {} is not a dictionary: {}", page_num, e)))?;
let media_box = page
.get(b"MediaBox")
.map_err(|e| Error::PdfParse(format!("MediaBox not found: {}", e)))?
.as_array()
.map_err(|e| Error::PdfParse(format!("MediaBox is not an array: {}", e)))?;
if media_box.len() != 4 {
return Err(Error::PdfParse(format!(
"MediaBox has wrong length: {}",
media_box.len()
)));
}
let left = media_box[0]
.as_f32()
.map(|f| f as f64)
.or_else(|_| media_box[0].as_i64().map(|i| i as f64))
.map_err(|e| Error::PdfParse(format!("invalid MediaBox left: {}", e)))?;
let bottom = media_box[1]
.as_f32()
.map(|f| f as f64)
.or_else(|_| media_box[1].as_i64().map(|i| i as f64))
.map_err(|e| Error::PdfParse(format!("invalid MediaBox bottom: {}", e)))?;
let right = media_box[2]
.as_f32()
.map(|f| f as f64)
.or_else(|_| media_box[2].as_i64().map(|i| i as f64))
.map_err(|e| Error::PdfParse(format!("invalid MediaBox right: {}", e)))?;
let top = media_box[3]
.as_f32()
.map(|f| f as f64)
.or_else(|_| media_box[3].as_i64().map(|i| i as f64))
.map_err(|e| Error::PdfParse(format!("invalid MediaBox top: {}", e)))?;
let width = right - left;
let height = top - bottom;
Ok((width, height))
}
pub fn get_page_count(doc: &Document) -> usize {
doc.get_pages().len()
}