use crate::geometry::{Matrix, Rect};
use crate::pages::PageBoxes;
use crate::pages::boxes::object_to_f64;
use lopdf::content::{Content, Operation};
use lopdf::{Document, Object};
use std::collections::HashSet;
pub struct ContentFilter;
impl ContentFilter {
pub fn remove_outside_trim(doc: &mut Document) -> crate::Result<()> {
let pages = doc.get_pages();
for &page_id in pages.values() {
let trim = PageBoxes::read(doc, page_id);
Self::filter_page(doc, page_id, trim?.trim_or_media())?;
}
doc.prune_objects();
Ok(())
}
pub fn filter_page(
doc: &mut Document,
page_id: lopdf::ObjectId,
trim: &Rect,
) -> crate::Result<()> {
let content = doc.get_and_decode_page_content(page_id)?;
let stream_ids = doc.get_page_contents(page_id);
let filtered = filter_operations(&content.operations, Some(*trim));
let new_content = Content {
operations: filtered,
};
let bytes = new_content.encode()?;
let stream_id = stream_ids[0];
if let Ok(Object::Stream(stream)) = doc.get_object_mut(stream_id) {
stream.set_plain_content(bytes);
}
if stream_ids.len() > 1 {
for &extra_id in &stream_ids[1..] {
if let Ok(Object::Stream(s)) = doc.get_object_mut(extra_id) {
s.set_plain_content(Vec::new());
}
}
if let Ok(page_obj) = doc.get_object_mut(page_id)
&& let Ok(dict) = page_obj.as_dict_mut()
{
dict.set("Contents", Object::Reference(stream_id));
}
}
let referenced = collect_referenced_resources(&new_content.operations);
prune_page_resources(doc, page_id, &referenced)?;
Ok(())
}
}
fn subpath_bbox_is_outside(points: &[(f64, f64)], ctm: &Matrix, trim: &Rect) -> bool {
if points.is_empty() {
return false;
}
let mut xmin = f64::INFINITY;
let mut xmax = f64::NEG_INFINITY;
let mut ymin = f64::INFINITY;
let mut ymax = f64::NEG_INFINITY;
for &(x, y) in points {
let (px, py) = ctm.transform_point(x, y);
xmin = xmin.min(px);
xmax = xmax.max(px);
ymin = ymin.min(py);
ymax = ymax.max(py);
}
Rect::from_corners(xmin, ymin, xmax, ymax).is_outside(trim)
}
pub fn operands_to_matrix(operands: &[lopdf::Object]) -> Matrix {
let value: Vec<f64> = operands.iter().map(object_to_f64).collect();
Matrix::from_values(value[0], value[1], value[2], value[3], value[4], value[5])
}
pub fn operands_to_rect(operands: &[lopdf::Object]) -> Rect {
let value: Vec<f64> = operands.iter().map(object_to_f64).collect();
Rect::from_corners(value[0], value[1], value[0] + value[2], value[1] + value[3])
}
fn re_is_outside(operands: &[lopdf::Object], ctm: &Matrix, trim: &Rect) -> bool {
let local_rect = operands_to_rect(operands);
let page_rect = ctm.transform_rect(&local_rect);
page_rect.is_outside(trim)
}
fn filter_operations(operations: &[Operation], trim: Option<Rect>) -> Vec<Operation> {
let mut output: Vec<Operation> = Vec::new();
let mut ctm_stack: Vec<Matrix> = vec![Matrix::identity()];
let mut block_stack: Vec<Vec<Operation>> = Vec::new();
for operation in operations {
match operation.operator.as_str() {
"q" => {
let last = ctm_stack.last().copied().unwrap_or(Matrix::identity());
ctm_stack.push(last);
block_stack.push(vec![operation.clone()]);
}
"Q" => {
ctm_stack.pop();
if let Some(mut block) = block_stack.pop() {
block.push(operation.clone());
let filtered_block = filter_block(block, trim.as_ref(), &ctm_stack);
if let Some(parent) = block_stack.last_mut() {
parent.extend(filtered_block);
} else {
output.extend(filtered_block);
}
}
}
"cm" => {
let m = operands_to_matrix(&operation.operands);
if let Some(top) = ctm_stack.last_mut() {
*top = m.concat(top);
} else {
ctm_stack.push(m);
}
if let Some(block) = block_stack.last_mut() {
block.push(operation.clone());
} else {
output.push(operation.clone());
}
}
_ => {
if let Some(block) = block_stack.last_mut() {
block.push(operation.clone());
} else {
output.push(operation.clone());
}
}
}
}
let output = remove_outside_re_f_pairs(output, &Matrix::identity(), trim.as_ref());
output
}
fn filter_block(
block: Vec<Operation>,
trim: Option<&Rect>,
ctm_stack: &[Matrix],
) -> Vec<Operation> {
let base_ctm = ctm_stack.last().copied().unwrap_or(Matrix::identity());
if block_is_outside_image(&block, &base_ctm, trim) {
return vec![];
}
remove_outside_re_f_pairs(block, &base_ctm, trim)
}
fn block_is_outside_image(block: &[Operation], base_ctm: &Matrix, trim: Option<&Rect>) -> bool {
let mut ctm_stack: Vec<Matrix> = vec![*base_ctm];
let mut has_cm_stack: Vec<bool> = vec![false];
for operation in block {
match operation.operator.as_str() {
"q" => {
let last = ctm_stack.last().cloned().unwrap_or(Matrix::identity());
ctm_stack.push(last);
has_cm_stack.push(false);
}
"Q" => {
if !ctm_stack.is_empty() {
ctm_stack.pop();
}
has_cm_stack.pop();
}
"cm" => {
let m = operands_to_matrix(&operation.operands);
if let Some(top) = ctm_stack.last_mut() {
*top = m.concat(top)
} else {
ctm_stack.push(m)
}
if let Some(flag) = has_cm_stack.last_mut() {
*flag = true;
}
}
"Do" => {
let has_ctm = has_cm_stack.last().copied().unwrap_or(false);
if has_ctm && let Some(trim) = trim {
let ctm = ctm_stack.last().copied().unwrap_or(Matrix::identity());
let det = (ctm.a * ctm.d - ctm.b * ctm.c).abs();
if det > 2.0 {
let unit_rect = Rect::new(0.0, 0.0, 1.0, 1.0);
let page_rect = ctm.transform_rect(&unit_rect);
if page_rect.is_outside(trim) {
return true;
}
}
}
}
_ => {}
}
}
false
}
fn remove_outside_re_f_pairs(
block: Vec<Operation>,
base_ctm: &Matrix,
trim: Option<&Rect>,
) -> Vec<Operation> {
let mut result: Vec<Operation> = Vec::new();
let mut ctm_stack: Vec<Matrix> = vec![*base_ctm];
let mut i = 0;
let mut in_path = false;
#[allow(clippy::type_complexity)]
let mut subpaths: Vec<(Vec<Operation>, Vec<(f64, f64)>)> = Vec::new();
let mut current_operation: Vec<Operation> = Vec::new();
let mut current_points: Vec<(f64, f64)> = Vec::new();
let mut has_clip = false;
while i < block.len() {
let operation = &block[i];
if in_path {
match operation.operator.as_str() {
"m" => {
subpaths.push((
std::mem::take(&mut current_operation),
std::mem::take(&mut current_points),
));
let x = object_to_f64(&operation.operands[0]);
let y = object_to_f64(&operation.operands[1]);
current_operation = vec![operation.clone()];
current_points = vec![(x, y)];
i += 1;
}
"l" => {
current_points.push((
object_to_f64(&operation.operands[0]),
object_to_f64(&operation.operands[1]),
));
current_operation.push(operation.clone());
i += 1;
}
"c" => {
for chunk in operation.operands.chunks(2) {
current_points.push((object_to_f64(&chunk[0]), object_to_f64(&chunk[1])));
}
current_operation.push(operation.clone());
i += 1;
}
"v" | "y" => {
for chunk in operation.operands.chunks(2) {
current_points.push((object_to_f64(&chunk[0]), object_to_f64(&chunk[1])));
}
current_operation.push(operation.clone());
i += 1;
}
"h" => {
current_operation.push(operation.clone());
i += 1;
}
"re" => {
subpaths.push((
std::mem::take(&mut current_operation),
std::mem::take(&mut current_points),
));
let x = object_to_f64(&operation.operands[0]);
let y = object_to_f64(&operation.operands[1]);
let w = object_to_f64(&operation.operands[2]);
let h_val = object_to_f64(&operation.operands[3]);
current_operation = vec![operation.clone()];
current_points = vec![(x, y), (x + w, y), (x + w, y + h_val), (x, y + h_val)];
i += 1;
}
"W" | "W*" => {
has_clip = true;
current_operation.push(operation.clone());
i += 1;
}
"S" | "s" | "f" | "f*" | "F" | "B" | "B*" | "b" | "b*" | "n" => {
subpaths.push((
std::mem::take(&mut current_operation),
std::mem::take(&mut current_points),
));
in_path = false;
let paint = operation.operator.as_str();
let ctm = ctm_stack.last().copied().unwrap_or(Matrix::identity());
if has_clip || paint == "n" {
for (ops, _) in subpaths.drain(..) {
result.extend(ops);
}
result.push(operation.clone());
} else if paint == "S" || paint == "s" {
let mut kept: Vec<Operation> = Vec::new();
for (sub_ops, sub_pts) in subpaths.drain(..) {
let outside =
trim.is_some_and(|t| subpath_bbox_is_outside(&sub_pts, &ctm, t));
if !outside {
kept.extend(sub_ops);
}
}
if !kept.is_empty() {
result.extend(kept);
result.push(Operation {
operator: paint.to_string(),
operands: vec![],
});
}
} else {
let all_outside = trim.is_some_and(|t| {
!subpaths.is_empty()
&& subpaths
.iter()
.all(|(_, pts)| subpath_bbox_is_outside(pts, &ctm, t))
});
if !all_outside {
for (ops, _) in subpaths.drain(..) {
result.extend(ops);
}
result.push(operation.clone());
}
subpaths.clear();
}
has_clip = false;
i += 1;
}
_ => {
subpaths.push((
std::mem::take(&mut current_operation),
std::mem::take(&mut current_points),
));
for (ops, _) in subpaths.drain(..) {
result.extend(ops);
}
in_path = false;
has_clip = false;
result.push(operation.clone());
i += 1;
}
}
continue;
}
match operation.operator.as_str() {
"q" => {
let last = ctm_stack.last().copied().unwrap_or(Matrix::identity());
ctm_stack.push(last);
result.push(operation.clone());
i += 1;
}
"Q" => {
if !ctm_stack.is_empty() {
ctm_stack.pop();
}
result.push(operation.clone());
i += 1;
}
"cm" => {
let m = operands_to_matrix(&operation.operands);
if let Some(top) = ctm_stack.last_mut() {
*top = m.concat(top);
} else {
ctm_stack.push(m);
};
result.push(operation.clone());
i += 1;
}
"re" => {
let next_operation = block.get(i + 1).map(|o| o.operator.as_str());
if next_operation == Some("f") || next_operation == Some("f*") {
if let Some(trim) = trim {
let local_ctm = ctm_stack.last().copied().unwrap_or(Matrix::identity());
if re_is_outside(&operation.operands, &local_ctm, trim) {
i += 2;
continue;
}
}
result.push(operation.clone());
i += 1;
} else {
in_path = true;
subpaths.clear();
has_clip = false;
let x = object_to_f64(&operation.operands[0]);
let y = object_to_f64(&operation.operands[1]);
let w = object_to_f64(&operation.operands[2]);
let h_val = object_to_f64(&operation.operands[3]);
current_operation = vec![operation.clone()];
current_points = vec![(x, y), (x + w, y), (x + w, y + h_val), (x, y + h_val)];
i += 1;
}
}
"m" => {
in_path = true;
subpaths.clear();
has_clip = false;
let x = object_to_f64(&operation.operands[0]);
let y = object_to_f64(&operation.operands[1]);
current_operation = vec![operation.clone()];
current_points = vec![(x, y)];
i += 1;
}
_ => {
result.push(operation.clone());
i += 1;
}
}
}
result
}
fn collect_referenced_resources(operations: &[Operation]) -> HashSet<Vec<u8>> {
let mut names = HashSet::new();
for operation in operations {
match operation.operator.as_str() {
"gs" | "Do" | "cs" | "CS" | "scn" | "SCN" | "sh" => {
if let Some(lopdf::Object::Name(n)) = operation.operands.first() {
names.insert(n.clone());
}
}
"Tf" => {
if let Some(lopdf::Object::Name(n)) = operation.operands.first() {
names.insert(n.clone());
}
}
_ => {}
}
}
names
}
fn prune_page_resources(
document: &mut lopdf::Document,
page_id: lopdf::ObjectId,
referenced: &HashSet<Vec<u8>>,
) -> lopdf::Result<()> {
let page = document.get_dictionary(page_id)?;
let resources_obj = page.get(b"Resources")?;
let resources_id = match resources_obj {
Object::Reference(id) => *id,
Object::Dictionary(_) => {
page_id
}
_ => return Ok(()),
};
let is_inline = resources_id == page_id;
let mut indirect_subs: Vec<lopdf::ObjectId> = Vec::new();
let mut inline_keys: Vec<Vec<u8>> = Vec::new();
{
let resources = if is_inline {
let page_dict = document.get_dictionary(page_id)?;
page_dict.get(b"Resources")?.as_dict()?
} else {
document.get_dictionary(resources_id)?
};
for key in &[b"ExtGState" as &[u8], b"Font", b"XObject", b"ColorSpace"] {
match resources.get(key) {
Ok(Object::Reference(sub_id)) => indirect_subs.push(*sub_id),
Ok(Object::Dictionary(_)) => inline_keys.push(key.to_vec()),
_ => {}
}
}
}
for sub_id in indirect_subs {
if let Ok(sub_dict) = document.get_object_mut(sub_id)?.as_dict_mut() {
let to_remove: Vec<Vec<u8>> = sub_dict
.iter()
.filter(|(name, _)| !referenced.contains(*name))
.map(|(name, _)| name.clone())
.collect();
for name in to_remove {
sub_dict.remove(&name);
}
}
}
let resources_dict = if is_inline {
let page_dict = document.get_object_mut(page_id)?.as_dict_mut()?;
page_dict.get_mut(b"Resources")?.as_dict_mut()?
} else {
document.get_object_mut(resources_id)?.as_dict_mut()?
};
for key in &inline_keys {
if let Ok(Object::Dictionary(sub_dict)) = resources_dict.get_mut(key.as_slice()) {
let to_remove: Vec<Vec<u8>> = sub_dict
.iter()
.filter(|(name, _)| !referenced.contains(*name))
.map(|(name, _)| name.clone())
.collect();
for name in to_remove {
sub_dict.remove(&name);
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pages::boxes::object_to_f64;
use lopdf::Object;
use lopdf::content::Operation;
#[test]
fn object_to_f64_integer() {
assert!((object_to_f64(&Object::Integer(42)) - 42.0).abs() < 1e-10);
}
#[test]
fn object_to_f64_real() {
assert!((object_to_f64(&Object::Real(3.14)) - 3.14 as f64).abs() < 0.01);
}
#[test]
fn object_to_f64_negative() {
assert!((object_to_f64(&Object::Integer(-7)) - (-7.0)).abs() < 1e-10);
assert!((object_to_f64(&Object::Real(-2.5)) - (-2.5 as f64)).abs() < 0.01);
}
#[test]
fn operands_to_rect_basic() {
let ops = vec![
Object::Real(10.0),
Object::Real(20.0),
Object::Real(100.0),
Object::Real(50.0),
];
let r = operands_to_rect(&ops);
assert!((r.x - 10.0).abs() < 1e-10);
assert!((r.y - 20.0).abs() < 1e-10);
assert!((r.width - 100.0).abs() < 1e-10);
assert!((r.height - 50.0).abs() < 1e-10);
}
#[test]
fn operands_to_rect_integers() {
let ops = vec![
Object::Integer(0),
Object::Integer(0),
Object::Integer(612),
Object::Integer(792),
];
let r = operands_to_rect(&ops);
assert!((r.x - 0.0).abs() < 1e-10);
assert!((r.y - 0.0).abs() < 1e-10);
assert!((r.width - 612.0).abs() < 1e-10);
assert!((r.height - 792.0).abs() < 1e-10);
}
#[test]
fn operands_to_rect_negative_dims() {
let ops = vec![
Object::Real(100.0),
Object::Real(200.0),
Object::Real(-50.0),
Object::Real(-30.0),
];
let r = operands_to_rect(&ops);
assert!((r.x - 50.0).abs() < 1e-10);
assert!((r.y - 170.0).abs() < 1e-10);
assert!((r.width - 50.0).abs() < 1e-10);
assert!((r.height - 30.0).abs() < 1e-10);
}
#[test]
fn operands_to_matrix_identity() {
let ops = vec![
Object::Real(1.0),
Object::Real(0.0),
Object::Real(0.0),
Object::Real(1.0),
Object::Real(0.0),
Object::Real(0.0),
];
let m = operands_to_matrix(&ops);
let (x, y) = m.transform_point(5.0, 7.0);
assert!((x - 5.0).abs() < 1e-10);
assert!((y - 7.0).abs() < 1e-10);
}
#[test]
fn operands_to_matrix_translation() {
let ops = vec![
Object::Real(1.0),
Object::Real(0.0),
Object::Real(0.0),
Object::Real(1.0),
Object::Real(100.0),
Object::Real(200.0),
];
let m = operands_to_matrix(&ops);
let (x, y) = m.transform_point(0.0, 0.0);
assert!((x - 100.0).abs() < 1e-10);
assert!((y - 200.0).abs() < 1e-10);
}
#[test]
fn operands_to_matrix_known_ctm() {
let ops = vec![
Object::Real(1.02883),
Object::Real(0.0),
Object::Real(0.0),
Object::Real(-1.03942),
Object::Real(336.0),
Object::Real(426.0),
];
let m = operands_to_matrix(&ops);
assert!((m.a - 1.02883).abs() < 1e-5);
assert!((m.d - (-1.03942)).abs() < 1e-5);
assert!((m.e - 336.0).abs() < 1e-5);
assert!((m.f - 426.0).abs() < 1e-5);
}
#[test]
fn re_is_outside_identity_ctm_inside() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let ops = vec![
Object::Real(100.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
];
assert!(!re_is_outside(&ops, &ctm, &trim));
}
#[test]
fn re_is_outside_identity_ctm_outside() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let ops = vec![
Object::Real(650.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
];
assert!(re_is_outside(&ops, &ctm, &trim));
}
#[test]
fn re_is_outside_straddling() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let ops = vec![
Object::Real(635.0),
Object::Real(100.0),
Object::Real(20.0),
Object::Real(10.0),
];
assert!(!re_is_outside(&ops, &ctm, &trim));
}
#[test]
fn re_is_outside_with_ctm_transform() {
let ctm = Matrix::from_values(1.0, 0.0, 0.0, 1.0, 700.0, 0.0);
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let ops = vec![
Object::Real(0.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
];
assert!(re_is_outside(&ops, &ctm, &trim));
}
#[test]
fn subpath_empty_is_not_outside() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(0.0, 0.0, 100.0, 100.0);
assert!(!subpath_bbox_is_outside(&[], &ctm, &trim));
}
#[test]
fn subpath_inside() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(0.0, 0.0, 100.0, 100.0);
let pts = vec![(10.0, 10.0), (50.0, 50.0), (30.0, 70.0)];
assert!(!subpath_bbox_is_outside(&pts, &ctm, &trim));
}
#[test]
fn subpath_outside() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(0.0, 0.0, 100.0, 100.0);
let pts = vec![(200.0, 200.0), (300.0, 300.0)];
assert!(subpath_bbox_is_outside(&pts, &ctm, &trim));
}
#[test]
fn subpath_with_ctm_moves_inside_outside() {
let ctm = Matrix::from_values(1.0, 0.0, 0.0, 1.0, 200.0, 200.0);
let trim = Rect::from_corners(0.0, 0.0, 100.0, 100.0);
let pts = vec![(10.0, 10.0)];
assert!(subpath_bbox_is_outside(&pts, &ctm, &trim));
}
fn op(operator: &str, operands: Vec<Object>) -> Operation {
Operation {
operator: operator.to_string(),
operands,
}
}
#[test]
fn filter_operations_no_trim_passes_all() {
let ops = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(0.0),
Object::Real(0.0),
Object::Real(10.0),
Object::Real(10.0),
],
),
op("f", vec![]),
op("Q", vec![]),
];
let result = filter_operations(&ops, None);
assert_eq!(result.len(), ops.len());
}
#[test]
fn filter_operations_removes_outside_re_f() {
let trim = Some(Rect::from_corners(30.0, 30.0, 642.0, 822.0));
let ops = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(700.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("f", vec![]),
op("Q", vec![]),
];
let result = filter_operations(&ops, trim);
let non_q: Vec<_> = result
.iter()
.filter(|o| o.operator != "q" && o.operator != "Q")
.collect();
assert!(
non_q.is_empty(),
"outside re+f should be removed, got {:?}",
non_q.iter().map(|o| &o.operator).collect::<Vec<_>>()
);
}
#[test]
fn filter_operations_keeps_inside_re_f() {
let trim = Some(Rect::from_corners(30.0, 30.0, 642.0, 822.0));
let ops = vec![
op(
"re",
vec![
Object::Real(100.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("f", vec![]),
];
let result = filter_operations(&ops, trim);
assert_eq!(result.len(), 2, "inside re+f should be kept");
}
#[test]
fn filter_operations_block_all_outside_dropped() {
let trim = Some(Rect::from_corners(30.0, 30.0, 642.0, 822.0));
let ops = vec![
op("q", vec![]),
op(
"cm",
vec![
Object::Real(100.0),
Object::Real(0.0),
Object::Real(0.0),
Object::Real(100.0),
Object::Real(1000.0),
Object::Real(1000.0),
],
),
op("Do", vec![Object::Name(b"Im1".to_vec())]),
op("Q", vec![]),
];
let result = filter_operations(&ops, trim);
assert!(
result.is_empty() || !result.iter().any(|o| o.operator == "Do"),
"outside image block should be dropped"
);
}
#[test]
#[allow(non_snake_case)]
fn filter_operations_preserves_q_Q_balance() {
let trim = Some(Rect::from_corners(30.0, 30.0, 642.0, 822.0));
let ops = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(100.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("f", vec![]),
op("Q", vec![]),
op("q", vec![]),
op(
"re",
vec![
Object::Real(700.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("f", vec![]),
op("Q", vec![]),
];
let result = filter_operations(&ops, trim);
let q_count = result.iter().filter(|o| o.operator == "q").count();
let big_q_count = result.iter().filter(|o| o.operator == "Q").count();
assert_eq!(q_count, big_q_count, "q/Q must be balanced");
}
#[test]
fn block_no_do_is_not_outside() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let block = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(100.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("f", vec![]),
op("Q", vec![]),
];
assert!(!block_is_outside_image(&block, &ctm, Some(&trim)));
}
#[test]
fn block_image_outside_trim_detected() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let block = vec![
op("q", vec![]),
op(
"cm",
vec![
Object::Real(500.0),
Object::Real(0.0),
Object::Real(0.0),
Object::Real(500.0),
Object::Real(2000.0),
Object::Real(2000.0),
],
),
op("Do", vec![Object::Name(b"Im1".to_vec())]),
op("Q", vec![]),
];
assert!(block_is_outside_image(&block, &ctm, Some(&trim)));
}
#[test]
fn block_image_inside_trim_not_detected() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let block = vec![
op("q", vec![]),
op(
"cm",
vec![
Object::Real(100.0),
Object::Real(0.0),
Object::Real(0.0),
Object::Real(100.0),
Object::Real(100.0),
Object::Real(100.0),
],
),
op("Do", vec![Object::Name(b"Im1".to_vec())]),
op("Q", vec![]),
];
assert!(!block_is_outside_image(&block, &ctm, Some(&trim)));
}
#[test]
fn block_no_trim_always_false() {
let ctm = Matrix::identity();
let block = vec![
op("q", vec![]),
op(
"cm",
vec![
Object::Real(500.0),
Object::Real(0.0),
Object::Real(0.0),
Object::Real(500.0),
Object::Real(2000.0),
Object::Real(2000.0),
],
),
op("Do", vec![Object::Name(b"Im1".to_vec())]),
op("Q", vec![]),
];
assert!(!block_is_outside_image(&block, &ctm, None));
}
#[test]
fn collects_do_names() {
let ops = vec![
op("Do", vec![Object::Name(b"Im1".to_vec())]),
op("Do", vec![Object::Name(b"Im2".to_vec())]),
];
let refs = collect_referenced_resources(&ops);
assert!(refs.contains(&b"Im1".to_vec()));
assert!(refs.contains(&b"Im2".to_vec()));
assert_eq!(refs.len(), 2);
}
#[test]
fn collects_gs_and_tf_names() {
let ops = vec![
op("gs", vec![Object::Name(b"GS0".to_vec())]),
op(
"Tf",
vec![Object::Name(b"F1".to_vec()), Object::Integer(12)],
),
];
let refs = collect_referenced_resources(&ops);
assert!(refs.contains(&b"GS0".to_vec()));
assert!(refs.contains(&b"F1".to_vec()));
}
#[test]
fn collects_colorspace_names() {
let ops = vec![
op("cs", vec![Object::Name(b"CS0".to_vec())]),
op("CS", vec![Object::Name(b"CS1".to_vec())]),
op("scn", vec![Object::Name(b"P0".to_vec())]),
op("sh", vec![Object::Name(b"Sh0".to_vec())]),
];
let refs = collect_referenced_resources(&ops);
assert!(refs.contains(&b"CS0".to_vec()));
assert!(refs.contains(&b"CS1".to_vec()));
assert!(refs.contains(&b"P0".to_vec()));
assert!(refs.contains(&b"Sh0".to_vec()));
}
#[test]
fn ignores_non_resource_ops() {
let ops = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(0.0),
Object::Real(0.0),
Object::Real(10.0),
Object::Real(10.0),
],
),
op("f", vec![]),
op("Q", vec![]),
];
let refs = collect_referenced_resources(&ops);
assert!(refs.is_empty());
}
#[test]
fn removes_outside_re_f_pair() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let block = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(700.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("f", vec![]),
op("Q", vec![]),
];
let result = remove_outside_re_f_pairs(block, &ctm, Some(&trim));
let has_re = result.iter().any(|o| o.operator == "re");
assert!(!has_re, "outside re should be removed");
}
#[test]
fn keeps_inside_re_f_pair() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let block = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(100.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("f", vec![]),
op("Q", vec![]),
];
let result = remove_outside_re_f_pairs(block, &ctm, Some(&trim));
let has_re = result.iter().any(|o| o.operator == "re");
assert!(has_re, "inside re should be kept");
}
#[test]
fn preserves_clipping_paths() {
let ctm = Matrix::identity();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
let block = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(700.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("W", vec![]),
op("n", vec![]),
op("Q", vec![]),
];
let result = remove_outside_re_f_pairs(block, &ctm, Some(&trim));
let has_w = result.iter().any(|o| o.operator == "W");
assert!(has_w, "clipping path must be preserved even if outside");
}
#[test]
fn no_trim_keeps_everything() {
let ctm = Matrix::identity();
let block = vec![
op("q", vec![]),
op(
"re",
vec![
Object::Real(700.0),
Object::Real(100.0),
Object::Real(50.0),
Object::Real(50.0),
],
),
op("f", vec![]),
op("Q", vec![]),
];
let result = remove_outside_re_f_pairs(block.clone(), &ctm, None);
assert_eq!(result.len(), block.len());
}
fn fixture() -> Option<(lopdf::Document, lopdf::ObjectId)> {
let path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures/pdf_test_data_print_v2.pdf");
if !path.exists() {
return None;
}
let file = std::fs::File::open(&path).ok()?;
let doc = lopdf::Document::load_from(file).ok()?;
let page_id = doc.get_pages()[&1];
Some((doc, page_id))
}
#[test]
fn filter_page_reduces_operations() {
let Some((mut doc, page_id)) = fixture() else {
return;
};
let before = doc
.get_and_decode_page_content(page_id)
.unwrap()
.operations
.len();
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
ContentFilter::filter_page(&mut doc, page_id, &trim).unwrap();
let after = doc
.get_and_decode_page_content(page_id)
.unwrap()
.operations
.len();
assert!(after < before, "before={before}, after={after}");
}
#[test]
fn filter_page_keeps_at_least_one_do() {
let Some((mut doc, page_id)) = fixture() else {
return;
};
let trim = Rect::from_corners(30.0, 30.0, 642.0, 822.0);
ContentFilter::filter_page(&mut doc, page_id, &trim).unwrap();
let content = doc.get_and_decode_page_content(page_id).unwrap();
let do_count = content
.operations
.iter()
.filter(|o| o.operator == "Do")
.count();
assert!(do_count >= 1, "at least one Do should survive");
}
}