Skip to main content

oxidize_pdf/operations/
overlay.rs

1//! PDF overlay/watermark functionality
2//!
3//! Implements overlay operations for superimposing pages from one PDF onto another.
4//! Common use cases: watermarks ("DRAFT", "CONFIDENTIAL"), logos, stamps.
5//!
6//! # Technical approach
7//!
8//! Each overlay page is converted to a Form XObject (ISO 32000-1 §8.10) and
9//! injected into the target page's content stream with appropriate CTM
10//! (Coordinate Transformation Matrix) for positioning and scaling.
11
12use super::{OperationError, OperationResult, PageRange};
13use crate::geometry::{Point, Rectangle};
14use crate::graphics::{ExtGState, FormXObject};
15use crate::parser::{PdfDocument, PdfReader};
16use crate::{Document, Page};
17use std::collections::{HashMap, HashSet};
18use std::io::{Read, Seek};
19use std::path::Path;
20
21/// Position for overlay placement on the target page.
22#[derive(Debug, Clone, PartialEq)]
23pub enum OverlayPosition {
24    /// Centered on the page
25    Center,
26    /// Top-left corner
27    TopLeft,
28    /// Top-right corner
29    TopRight,
30    /// Bottom-left corner
31    BottomLeft,
32    /// Bottom-right corner
33    BottomRight,
34    /// Custom position (x, y) in points from bottom-left
35    Custom(f64, f64),
36}
37
38impl Default for OverlayPosition {
39    fn default() -> Self {
40        Self::Center
41    }
42}
43
44/// Options for overlay operations.
45#[derive(Debug, Clone)]
46pub struct OverlayOptions {
47    /// Which pages to apply the overlay to (default: all)
48    pub pages: PageRange,
49    /// Position of the overlay on the target page
50    pub position: OverlayPosition,
51    /// Opacity of the overlay (0.0 = transparent, 1.0 = opaque)
52    pub opacity: f64,
53    /// Scale factor for the overlay (1.0 = original size)
54    pub scale: f64,
55    /// If true, cycle through overlay pages when base has more pages than overlay
56    pub repeat: bool,
57}
58
59impl Default for OverlayOptions {
60    fn default() -> Self {
61        Self {
62            pages: PageRange::All,
63            position: OverlayPosition::Center,
64            opacity: 1.0,
65            scale: 1.0,
66            repeat: false,
67        }
68    }
69}
70
71impl OverlayOptions {
72    /// Validates the options, returning an error if invalid.
73    pub fn validate(&self) -> OperationResult<()> {
74        if self.scale <= 0.0 {
75            return Err(OperationError::ProcessingError(
76                "Overlay scale must be greater than 0".to_string(),
77            ));
78        }
79        Ok(())
80    }
81
82    /// Returns the opacity clamped to [0.0, 1.0].
83    fn clamped_opacity(&self) -> f64 {
84        self.opacity.clamp(0.0, 1.0)
85    }
86}
87
88/// Computes the CTM (Coordinate Transformation Matrix) for positioning the overlay.
89///
90/// Returns `[sx, 0, 0, sy, tx, ty]` where:
91/// - `sx`, `sy` = scale factors
92/// - `tx`, `ty` = translation offsets
93pub(crate) fn compute_ctm(
94    base_w: f64,
95    base_h: f64,
96    overlay_w: f64,
97    overlay_h: f64,
98    scale: f64,
99    position: &OverlayPosition,
100) -> [f64; 6] {
101    let scaled_w = overlay_w * scale;
102    let scaled_h = overlay_h * scale;
103
104    let (tx, ty) = match position {
105        OverlayPosition::Center => ((base_w - scaled_w) / 2.0, (base_h - scaled_h) / 2.0),
106        OverlayPosition::TopLeft => (0.0, base_h - scaled_h),
107        OverlayPosition::TopRight => (base_w - scaled_w, base_h - scaled_h),
108        OverlayPosition::BottomLeft => (0.0, 0.0),
109        OverlayPosition::BottomRight => (base_w - scaled_w, 0.0),
110        OverlayPosition::Custom(x, y) => (*x, *y),
111    };
112
113    [scale, 0.0, 0.0, scale, tx, ty]
114}
115
116/// Converts a parser `PdfDictionary` directly to a writer `objects::Dictionary`.
117///
118/// Used to pass overlay page resources into the Form XObject's resource dictionary.
119/// References are resolved against `doc` (the source/overlay document) so that
120/// the resulting writer objects contain inline data rather than dangling IDs
121/// from the source PDF. See issue #156.
122fn convert_parser_dict_to_objects_dict<R: Read + Seek>(
123    parser_dict: &crate::parser::objects::PdfDictionary,
124    doc: &PdfDocument<R>,
125) -> crate::objects::Dictionary {
126    let mut result = crate::objects::Dictionary::new();
127    for (key, value) in &parser_dict.0 {
128        let converted = convert_parser_obj_to_objects_obj(value, doc);
129        result.set(key.as_str(), converted);
130    }
131    result
132}
133
134/// Converts a single parser `PdfObject` to a writer `objects::Object`.
135///
136/// `PdfObject::Reference` values are resolved against `doc` (the source document)
137/// and recursively converted, so the returned writer object tree contains only
138/// inline data — no references to foreign object IDs. This prevents dangling
139/// references when the writer assigns new IDs in the destination PDF (issue #156).
140fn convert_parser_obj_to_objects_obj<R: Read + Seek>(
141    obj: &crate::parser::objects::PdfObject,
142    doc: &PdfDocument<R>,
143) -> crate::objects::Object {
144    use crate::objects::Object as WObj;
145    use crate::parser::objects::PdfObject as PObj;
146
147    match obj {
148        PObj::Null => WObj::Null,
149        PObj::Boolean(b) => WObj::Boolean(*b),
150        PObj::Integer(i) => WObj::Integer(*i),
151        PObj::Real(r) => WObj::Real(*r),
152        PObj::String(s) => WObj::String(String::from_utf8_lossy(s.as_bytes()).to_string()),
153        PObj::Name(n) => WObj::Name(n.as_str().to_string()),
154        PObj::Array(arr) => {
155            let items: Vec<WObj> = arr
156                .0
157                .iter()
158                .map(|item| convert_parser_obj_to_objects_obj(item, doc))
159                .collect();
160            WObj::Array(items)
161        }
162        PObj::Dictionary(dict) => WObj::Dictionary(convert_parser_dict_to_objects_dict(dict, doc)),
163        PObj::Stream(stream) => {
164            let dict = convert_parser_dict_to_objects_dict(&stream.dict, doc);
165            WObj::Stream(dict, stream.data.clone())
166        }
167        PObj::Reference(num, gen) => {
168            // Resolve the reference against the SOURCE document so we get the
169            // actual object data instead of a raw ID that belongs to the overlay
170            // PDF. The writer will later externalize any inline streams with
171            // fresh IDs valid in the destination PDF.
172            match doc.get_object(*num, *gen as u16) {
173                Ok(resolved) => convert_parser_obj_to_objects_obj(&resolved, doc),
174                Err(_) => {
175                    tracing::warn!(
176                        "Could not resolve reference {} {} R from overlay; replacing with Null",
177                        num,
178                        gen
179                    );
180                    WObj::Null
181                }
182            }
183        }
184    }
185}
186
187/// Applies overlay pages onto a base document.
188pub struct PdfOverlay<R: Read + Seek> {
189    base_doc: PdfDocument<R>,
190    overlay_doc: PdfDocument<R>,
191}
192
193impl<R: Read + Seek> PdfOverlay<R> {
194    /// Creates a new overlay applicator.
195    pub fn new(base_doc: PdfDocument<R>, overlay_doc: PdfDocument<R>) -> Self {
196        Self {
197            base_doc,
198            overlay_doc,
199        }
200    }
201
202    /// Applies the overlay and returns the resulting document.
203    pub fn apply(&self, options: &OverlayOptions) -> OperationResult<Document> {
204        options.validate()?;
205
206        let base_count =
207            self.base_doc
208                .page_count()
209                .map_err(|e| OperationError::ParseError(e.to_string()))? as usize;
210
211        if base_count == 0 {
212            return Err(OperationError::NoPagesToProcess);
213        }
214
215        let overlay_count =
216            self.overlay_doc
217                .page_count()
218                .map_err(|e| OperationError::ParseError(e.to_string()))? as usize;
219
220        if overlay_count == 0 {
221            return Err(OperationError::ProcessingError(
222                "Overlay PDF has no pages".to_string(),
223            ));
224        }
225
226        let target_indices = options.pages.get_indices(base_count)?;
227        let clamped_opacity = options.clamped_opacity();
228
229        let mut output_doc = Document::new();
230
231        for page_idx in 0..base_count {
232            let parsed_base = self
233                .base_doc
234                .get_page(page_idx as u32)
235                .map_err(|e| OperationError::ParseError(e.to_string()))?;
236
237            let mut page = Page::from_parsed_with_content(&parsed_base, &self.base_doc)
238                .map_err(OperationError::PdfError)?;
239
240            if target_indices.contains(&page_idx) {
241                // Determine which overlay page to use
242                let target_pos = target_indices
243                    .iter()
244                    .position(|&i| i == page_idx)
245                    .unwrap_or(0);
246
247                let overlay_page_idx = if options.repeat || overlay_count == 1 {
248                    target_pos % overlay_count
249                } else if target_pos < overlay_count {
250                    target_pos
251                } else {
252                    // No overlay page available for this target, skip overlay
253                    output_doc.add_page(page);
254                    continue;
255                };
256
257                self.apply_overlay_to_page(
258                    &mut page,
259                    overlay_page_idx,
260                    &parsed_base,
261                    clamped_opacity,
262                    options.scale,
263                    &options.position,
264                )?;
265            }
266
267            output_doc.add_page(page);
268        }
269
270        Ok(output_doc)
271    }
272
273    /// Applies a single overlay page onto a base page.
274    fn apply_overlay_to_page(
275        &self,
276        page: &mut Page,
277        overlay_page_idx: usize,
278        parsed_base: &crate::parser::page_tree::ParsedPage,
279        opacity: f64,
280        scale: f64,
281        position: &OverlayPosition,
282    ) -> OperationResult<()> {
283        let parsed_overlay = self
284            .overlay_doc
285            .get_page(overlay_page_idx as u32)
286            .map_err(|e| OperationError::ParseError(e.to_string()))?;
287
288        // Extract overlay content streams
289        let overlay_streams = self
290            .overlay_doc
291            .get_page_content_streams(&parsed_overlay)
292            .map_err(|e| OperationError::ParseError(e.to_string()))?;
293
294        let mut overlay_content = Vec::new();
295        for stream in &overlay_streams {
296            overlay_content.extend_from_slice(stream);
297            overlay_content.push(b'\n');
298        }
299
300        // Build Form XObject from overlay content
301        let ov_w = parsed_overlay.width();
302        let ov_h = parsed_overlay.height();
303        let bbox = Rectangle::new(Point::new(0.0, 0.0), Point::new(ov_w, ov_h));
304
305        let mut form = FormXObject::new(bbox).with_content(overlay_content);
306
307        // Preserve overlay page resources in the Form XObject so fonts, images, etc. are available
308        if let Some(resources) = parsed_overlay.get_resources() {
309            let writer_dict = convert_parser_dict_to_objects_dict(resources, &self.overlay_doc);
310            form = form.with_resources(writer_dict);
311        }
312
313        let xobj_name = format!("Overlay{}", overlay_page_idx);
314        // Overlay-generated names are under our control (`Overlay{n}`)
315        // and always valid per ISO 32000-1 §7.3.5, so `?` is defensive
316        // here rather than a practical failure mode.
317        page.add_form_xobject(&xobj_name, form)?;
318
319        // Calculate CTM for positioning and scaling
320        let base_w = parsed_base.width();
321        let base_h = parsed_base.height();
322        let ctm = compute_ctm(base_w, base_h, ov_w, ov_h, scale, position);
323
324        // Build overlay operators: q [gs] cm Do Q
325        let mut ops = String::new();
326        ops.push_str("q\n");
327
328        // Apply opacity via ExtGState if opacity is less than 1.0
329        if (opacity - 1.0).abs() > f64::EPSILON {
330            let mut state = ExtGState::new();
331            state.alpha_fill = Some(opacity);
332            state.alpha_stroke = Some(opacity);
333
334            let registered_name = page
335                .graphics()
336                .extgstate_manager_mut()
337                .add_state(state)
338                .map_err(|e| OperationError::ProcessingError(format!("ExtGState error: {e}")))?;
339
340            ops.push_str(&format!("/{} gs\n", registered_name));
341        }
342
343        // Apply CTM for positioning and scaling
344        ops.push_str(&format!(
345            "{} {} {} {} {} {} cm\n",
346            ctm[0], ctm[1], ctm[2], ctm[3], ctm[4], ctm[5]
347        ));
348
349        // Invoke the Form XObject
350        ops.push_str(&format!("/{} Do\n", xobj_name));
351        ops.push_str("Q\n");
352
353        // Append overlay operators to page content (renders on top of
354        // existing content).
355        //
356        // The overlay path composes a `cm` matrix + `/<xobj> Do` — it
357        // does NOT emit `Tj` operators directly. The XObject invoked
358        // carries its own font references and character data (those
359        // live in the source PDF's resources, independent of this
360        // Document's `custom_fonts` registry). Consequently there are
361        // no fonts OF THE TARGET DOCUMENT referenced inside `ops`, and
362        // the issue-#204 font-usage map is correctly empty here. If a
363        // future overlay variant starts embedding inline `Tj` against
364        // target-document fonts, it must populate this map.
365        let font_usage: HashMap<String, HashSet<char>> = HashMap::new();
366        page.append_raw_content(ops.as_bytes(), &font_usage);
367
368        Ok(())
369    }
370}
371
372/// High-level function to apply a PDF overlay/watermark.
373///
374/// Reads the base PDF and overlay PDF from disk, applies the overlay
375/// according to the given options, and writes the result to the output path.
376///
377/// # Arguments
378///
379/// * `base_path` - Path to the base PDF document
380/// * `overlay_path` - Path to the overlay/watermark PDF
381/// * `output_path` - Path for the output PDF
382/// * `options` - Overlay configuration (position, opacity, scale, etc.)
383///
384/// # Example
385///
386/// ```rust,no_run
387/// use oxidize_pdf::operations::{overlay_pdf, OverlayOptions, OverlayPosition};
388///
389/// // Apply a centered watermark at 30% opacity
390/// overlay_pdf(
391///     "document.pdf",
392///     "watermark.pdf",
393///     "output.pdf",
394///     OverlayOptions {
395///         opacity: 0.3,
396///         position: OverlayPosition::Center,
397///         ..Default::default()
398///     },
399/// ).unwrap();
400/// ```
401pub fn overlay_pdf<P, Q, R>(
402    base_path: P,
403    overlay_path: Q,
404    output_path: R,
405    options: OverlayOptions,
406) -> OperationResult<()>
407where
408    P: AsRef<Path>,
409    Q: AsRef<Path>,
410    R: AsRef<Path>,
411{
412    let base_reader = PdfReader::open(base_path.as_ref())
413        .map_err(|e| OperationError::ParseError(format!("Failed to open base PDF: {e}")))?;
414    let base_doc = PdfDocument::new(base_reader);
415
416    let overlay_reader = PdfReader::open(overlay_path.as_ref())
417        .map_err(|e| OperationError::ParseError(format!("Failed to open overlay PDF: {e}")))?;
418    let overlay_doc = PdfDocument::new(overlay_reader);
419
420    let overlay_applicator = PdfOverlay::new(base_doc, overlay_doc);
421    let mut doc = overlay_applicator.apply(&options)?;
422    doc.save(output_path)?;
423    Ok(())
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429
430    #[test]
431    fn test_overlay_options_default() {
432        let opts = OverlayOptions::default();
433        assert_eq!(opts.opacity, 1.0);
434        assert_eq!(opts.scale, 1.0);
435        assert!(!opts.repeat);
436        assert!(matches!(opts.position, OverlayPosition::Center));
437        assert!(matches!(opts.pages, PageRange::All));
438    }
439
440    #[test]
441    fn test_overlay_options_validate_ok() {
442        let opts = OverlayOptions::default();
443        assert!(opts.validate().is_ok());
444    }
445
446    #[test]
447    fn test_overlay_options_validate_zero_scale() {
448        let opts = OverlayOptions {
449            scale: 0.0,
450            ..Default::default()
451        };
452        assert!(opts.validate().is_err());
453    }
454
455    #[test]
456    fn test_overlay_options_validate_negative_scale() {
457        let opts = OverlayOptions {
458            scale: -1.0,
459            ..Default::default()
460        };
461        assert!(opts.validate().is_err());
462    }
463
464    #[test]
465    fn test_overlay_options_validate_high_opacity_ok() {
466        let opts = OverlayOptions {
467            opacity: 2.5,
468            ..Default::default()
469        };
470        // opacity > 1.0 is clamped, not rejected
471        assert!(opts.validate().is_ok());
472        assert_eq!(opts.clamped_opacity(), 1.0);
473    }
474
475    #[test]
476    fn test_overlay_options_clamped_opacity() {
477        assert_eq!(
478            OverlayOptions {
479                opacity: -0.5,
480                ..Default::default()
481            }
482            .clamped_opacity(),
483            0.0
484        );
485        assert_eq!(
486            OverlayOptions {
487                opacity: 0.5,
488                ..Default::default()
489            }
490            .clamped_opacity(),
491            0.5
492        );
493        assert_eq!(
494            OverlayOptions {
495                opacity: 3.0,
496                ..Default::default()
497            }
498            .clamped_opacity(),
499            1.0
500        );
501    }
502
503    #[test]
504    fn test_compute_ctm_center_same_size() {
505        let ctm = compute_ctm(595.0, 842.0, 595.0, 842.0, 1.0, &OverlayPosition::Center);
506        assert_eq!(ctm[0], 1.0);
507        assert_eq!(ctm[3], 1.0);
508        assert!((ctm[4] - 0.0).abs() < 0.001);
509        assert!((ctm[5] - 0.0).abs() < 0.001);
510    }
511
512    #[test]
513    fn test_compute_ctm_center_different_sizes() {
514        let ctm = compute_ctm(595.0, 842.0, 200.0, 200.0, 1.0, &OverlayPosition::Center);
515        assert!((ctm[4] - 197.5).abs() < 0.001);
516        assert!((ctm[5] - 321.0).abs() < 0.001);
517    }
518
519    #[test]
520    fn test_compute_ctm_with_scale() {
521        let ctm = compute_ctm(595.0, 842.0, 595.0, 842.0, 0.5, &OverlayPosition::Center);
522        assert!((ctm[0] - 0.5).abs() < 0.001);
523        assert!((ctm[3] - 0.5).abs() < 0.001);
524        // Centered: tx = (595 - 595*0.5) / 2 = 148.75
525        assert!((ctm[4] - 148.75).abs() < 0.001);
526        assert!((ctm[5] - 210.5).abs() < 0.001);
527    }
528
529    #[test]
530    fn test_compute_ctm_bottom_left() {
531        let ctm = compute_ctm(
532            595.0,
533            842.0,
534            200.0,
535            200.0,
536            1.0,
537            &OverlayPosition::BottomLeft,
538        );
539        assert!((ctm[4]).abs() < 0.001);
540        assert!((ctm[5]).abs() < 0.001);
541    }
542
543    #[test]
544    fn test_compute_ctm_bottom_right() {
545        let ctm = compute_ctm(
546            595.0,
547            842.0,
548            200.0,
549            200.0,
550            1.0,
551            &OverlayPosition::BottomRight,
552        );
553        assert!((ctm[4] - 395.0).abs() < 0.001);
554        assert!((ctm[5]).abs() < 0.001);
555    }
556
557    #[test]
558    fn test_compute_ctm_top_left() {
559        let ctm = compute_ctm(595.0, 842.0, 200.0, 200.0, 1.0, &OverlayPosition::TopLeft);
560        assert!((ctm[4]).abs() < 0.001);
561        assert!((ctm[5] - 642.0).abs() < 0.001);
562    }
563
564    #[test]
565    fn test_compute_ctm_top_right() {
566        let ctm = compute_ctm(595.0, 842.0, 200.0, 200.0, 1.0, &OverlayPosition::TopRight);
567        assert!((ctm[4] - 395.0).abs() < 0.001);
568        assert!((ctm[5] - 642.0).abs() < 0.001);
569    }
570
571    #[test]
572    fn test_compute_ctm_custom_position() {
573        let ctm = compute_ctm(
574            595.0,
575            842.0,
576            200.0,
577            200.0,
578            1.0,
579            &OverlayPosition::Custom(100.0, 150.0),
580        );
581        assert!((ctm[4] - 100.0).abs() < 0.001);
582        assert!((ctm[5] - 150.0).abs() < 0.001);
583    }
584
585    #[test]
586    fn test_overlay_position_default() {
587        assert_eq!(OverlayPosition::default(), OverlayPosition::Center);
588    }
589
590    #[test]
591    fn test_overlay_position_equality() {
592        assert_eq!(OverlayPosition::Center, OverlayPosition::Center);
593        assert_eq!(
594            OverlayPosition::Custom(1.0, 2.0),
595            OverlayPosition::Custom(1.0, 2.0)
596        );
597        assert_ne!(OverlayPosition::Center, OverlayPosition::TopLeft);
598    }
599
600    /// Issue #156: unresolvable references must degrade to Null, not panic.
601    #[test]
602    fn test_unresolvable_reference_degrades_to_null() {
603        use crate::objects::Object as WObj;
604        use crate::parser::objects::{PdfDictionary, PdfName, PdfObject as PObj};
605
606        // Build a PdfDictionary containing a reference to a non-existent object.
607        let mut dict = PdfDictionary::new();
608        dict.0
609            .insert(PdfName::new("SMask".to_string()), PObj::Reference(99999, 0));
610        dict.0
611            .insert(PdfName::new("Width".to_string()), PObj::Integer(100));
612
613        // Create a minimal in-memory PDF to use as the document for resolution.
614        let mut doc_builder = crate::Document::new();
615        let page = crate::Page::a4();
616        doc_builder.add_page(page);
617        let pdf_bytes = doc_builder.to_bytes().unwrap();
618
619        let reader = crate::parser::PdfReader::new(std::io::Cursor::new(pdf_bytes)).unwrap();
620        let pdf_doc = crate::parser::PdfDocument::new(reader);
621
622        let result = convert_parser_dict_to_objects_dict(&dict, &pdf_doc);
623
624        // The unresolvable reference (99999 0 R) should become Null.
625        let smask_key = "SMask";
626        let smask_val = result.get(smask_key);
627        assert!(
628            matches!(smask_val, Some(WObj::Null)),
629            "Unresolvable reference should become Null, got: {:?}",
630            smask_val
631        );
632
633        // Other values should convert normally.
634        let width_val = result.get("Width");
635        assert!(
636            matches!(width_val, Some(WObj::Integer(100))),
637            "Normal integer should convert, got: {:?}",
638            width_val
639        );
640    }
641}