Skip to main content

pdf_annot/
flatten.rs

1#![cfg(feature = "write")]
2//! Annotation flattening implementation.
3//!
4//! Flattens non-widget, non-link annotations containing appearance streams (/AP /N)
5//! directly into the page content stream.
6
7use lopdf::{dictionary, Object, ObjectId, Stream};
8use std::collections::HashSet;
9
10/// Pick a `/XObject` resource name of the form `FlatAnnotN` that is not already
11/// present in `xobjects`. `next` is advanced past the chosen index so repeated
12/// calls stay monotonic. This makes flattening collision-proof: it never
13/// overwrites a pre-existing XObject, and a second flatten pass on an
14/// already-flattened page cannot clobber the names from the first.
15fn unique_xobject_name(xobjects: &lopdf::Dictionary, next: &mut usize) -> String {
16    loop {
17        let candidate = format!("FlatAnnot{}", *next);
18        *next += 1;
19        if !xobjects.has(candidate.as_bytes()) {
20            return candidate;
21        }
22    }
23}
24
25/// Convert a PDF object to a f64 number.
26fn as_number(obj: &Object) -> Option<f64> {
27    match obj {
28        Object::Integer(i) => Some(*i as f64),
29        Object::Real(f) => Some(*f as f64),
30        _ => None,
31    }
32}
33
34/// Walk parent Pages chain to resolve inherited Resources dictionary.
35fn resolve_inherited_resources_clone(
36    doc: &lopdf::Document,
37    page_id: ObjectId,
38) -> Option<lopdf::Dictionary> {
39    let mut current = page_id;
40    let mut visited = HashSet::new();
41    loop {
42        if !visited.insert(current) {
43            return None;
44        }
45        let dict = doc.get_dictionary(current).ok()?;
46        if let Ok(res_obj) = dict.get(b"Resources") {
47            return match res_obj {
48                Object::Dictionary(d) => Some(d.clone()),
49                Object::Reference(id) => doc.get_dictionary(*id).ok().cloned(),
50                _ => None,
51            };
52        }
53        match dict.get(b"Parent") {
54            Ok(Object::Reference(id)) => current = *id,
55            _ => return None,
56        }
57    }
58}
59
60/// Resolve page /Contents to a flat list of stream ObjectIds.
61fn resolve_content_streams(doc: &lopdf::Document, page_id: ObjectId) -> Vec<ObjectId> {
62    let page_obj = match doc.get_object(page_id) {
63        Ok(obj) => obj,
64        Err(_) => return Vec::new(),
65    };
66    let page_dict = match page_obj {
67        Object::Dictionary(ref d) => d,
68        _ => return Vec::new(),
69    };
70    match page_dict.get(b"Contents").ok() {
71        Some(c) => flatten_content_refs(doc, c),
72        None => Vec::new(),
73    }
74}
75
76fn flatten_content_refs(doc: &lopdf::Document, obj: &Object) -> Vec<ObjectId> {
77    match obj {
78        Object::Reference(id) => {
79            if let Ok(Object::Array(arr)) = doc.get_object(*id) {
80                return arr
81                    .iter()
82                    .flat_map(|o| flatten_content_refs(doc, o))
83                    .collect();
84            }
85            vec![*id]
86        }
87        Object::Array(arr) => arr
88            .iter()
89            .flat_map(|o| flatten_content_refs(doc, o))
90            .collect(),
91        _ => Vec::new(),
92    }
93}
94
95/// Wrap existing page content in q/Q to isolate graphics state.
96fn wrap_existing_content_in_save_restore(doc: &mut lopdf::Document, page_id: ObjectId) {
97    let existing = resolve_content_streams(doc, page_id);
98    if existing.is_empty() {
99        return;
100    }
101
102    let q_stream = Stream::new(dictionary! {}, b"q\n".to_vec());
103    let q_id = doc.add_object(Object::Stream(q_stream));
104
105    let big_q_stream = Stream::new(dictionary! {}, b"\nQ\n".to_vec());
106    let big_q_id = doc.add_object(Object::Stream(big_q_stream));
107
108    // Build flat array: [q, ...existing streams..., Q]
109    let mut new_arr = Vec::with_capacity(existing.len() + 2);
110    new_arr.push(Object::Reference(q_id));
111    for id in existing {
112        new_arr.push(Object::Reference(id));
113    }
114    new_arr.push(Object::Reference(big_q_id));
115    let wrapped = Object::Array(new_arr);
116
117    if let Ok(Object::Dictionary(ref mut d)) = doc.get_object_mut(page_id) {
118        d.set("Contents", wrapped);
119    }
120}
121
122/// Append a content stream reference to a page's Contents.
123fn append_content_to_page(doc: &mut lopdf::Document, page_id: ObjectId, content_id: ObjectId) {
124    let existing = resolve_content_streams(doc, page_id);
125    let new_contents = if existing.is_empty() {
126        Object::Reference(content_id)
127    } else {
128        let mut arr: Vec<Object> = existing.into_iter().map(Object::Reference).collect();
129        arr.push(Object::Reference(content_id));
130        Object::Array(arr)
131    };
132
133    if let Ok(Object::Dictionary(ref mut d)) = doc.get_object_mut(page_id) {
134        d.set("Contents", new_contents);
135    }
136}
137
138/// Compute the coordinate transformation mapping appearance BBox to Rect.
139fn get_matrix_and_bbox(n_stream_dict: &lopdf::Dictionary, rect: &[f64; 4]) -> Option<[f64; 6]> {
140    let bbox = n_stream_dict.get(b"BBox").ok()?;
141    let bbox_arr = match bbox {
142        Object::Array(ref arr) if arr.len() >= 4 => arr,
143        _ => return None,
144    };
145    let bx0 = as_number(&bbox_arr[0])?;
146    let by0 = as_number(&bbox_arr[1])?;
147    let bx1 = as_number(&bbox_arr[2])?;
148    let by1 = as_number(&bbox_arr[3])?;
149
150    let bw = bx1 - bx0;
151    let bh = by1 - by0;
152    if bw == 0.0 || bh == 0.0 {
153        return None;
154    }
155
156    let rx0 = rect[0];
157    let ry0 = rect[1];
158    let rx1 = rect[2];
159    let ry1 = rect[3];
160    let rw = rx1 - rx0;
161    let rh = ry1 - ry0;
162
163    // Optional Matrix in appearance stream
164    let matrix_arr = n_stream_dict.get(b"Matrix").ok().and_then(|m| match m {
165        Object::Array(ref arr) if arr.len() >= 6 => Some(arr),
166        _ => None,
167    });
168
169    if let Some(m) = matrix_arr {
170        let ma = as_number(&m[0]).unwrap_or(1.0);
171        let mb = as_number(&m[1]).unwrap_or(0.0);
172        let mc = as_number(&m[2]).unwrap_or(0.0);
173        let md = as_number(&m[3]).unwrap_or(1.0);
174        let me = as_number(&m[4]).unwrap_or(0.0);
175        let mf = as_number(&m[5]).unwrap_or(0.0);
176
177        let corners = [(bx0, by0), (bx1, by0), (bx0, by1), (bx1, by1)];
178        let mut t_corners = [(0.0, 0.0); 4];
179        for (i, &(x, y)) in corners.iter().enumerate() {
180            t_corners[i] = (ma * x + mc * y + me, mb * x + md * y + mf);
181        }
182        let t_bx0 = t_corners.iter().map(|p| p.0).fold(f64::INFINITY, f64::min);
183        let t_by0 = t_corners.iter().map(|p| p.1).fold(f64::INFINITY, f64::min);
184        let t_bx1 = t_corners
185            .iter()
186            .map(|p| p.0)
187            .fold(f64::NEG_INFINITY, f64::max);
188        let t_by1 = t_corners
189            .iter()
190            .map(|p| p.1)
191            .fold(f64::NEG_INFINITY, f64::max);
192
193        let t_bw = t_bx1 - t_bx0;
194        let t_bh = t_by1 - t_by0;
195        if t_bw == 0.0 || t_bh == 0.0 {
196            return None;
197        }
198
199        let sx = rw / t_bw;
200        let sy = rh / t_bh;
201        let tx = rx0 - t_bx0 * sx;
202        let ty = ry0 - t_by0 * sy;
203
204        Some([
205            ma * sx,
206            mb * sy,
207            mc * sx,
208            md * sy,
209            me * sx + tx,
210            mf * sy + ty,
211        ])
212    } else {
213        let sx = rw / bw;
214        let sy = rh / bh;
215        let tx = rx0 - bx0 * sx;
216        let ty = ry0 - by0 * sy;
217        Some([sx, 0.0, 0.0, sy, tx, ty])
218    }
219}
220
221/// Flatten all non-widget, non-link annotations on all pages.
222///
223/// Reference integrity: when a markup annotation is flattened, its associated
224/// `/Popup` companion annotation is removed as well, so no `/Parent` reference
225/// is left dangling. Cross-annotation `/IRT` (reply threads) and structure-tree
226/// `OBJR` references that point at a flattened annotation are NOT rewritten;
227/// per ISO 32000-1 §7.3.10 a reference to a removed object resolves to the null
228/// object, which conforming readers tolerate. Rewriting those references is
229/// intentionally out of scope (tracked as a follow-up).
230pub fn flatten_annotations(doc: &mut lopdf::Document) -> Result<(), crate::error::AnnotBuildError> {
231    let page_ids: Vec<ObjectId> = doc.get_pages().values().copied().collect();
232    let mut deleted_objects = HashSet::new();
233    // Popup companions of flattened markup annotations. Removed alongside their
234    // parent so the popup's /Parent back-reference does not dangle.
235    let mut popups_to_remove: HashSet<ObjectId> = HashSet::new();
236
237    for page_id in page_ids {
238        // Retrieve /Annots array
239        let page_dict = match doc.get_object(page_id) {
240            Ok(Object::Dictionary(ref d)) => d.clone(),
241            _ => continue,
242        };
243
244        let annots_arr = match page_dict.get(b"Annots") {
245            Ok(Object::Array(ref arr)) => arr.clone(),
246            Ok(Object::Reference(id)) => match doc.get_object(*id) {
247                Ok(Object::Array(ref arr)) => arr.clone(),
248                _ => continue,
249            },
250            _ => continue,
251        };
252
253        if annots_arr.is_empty() {
254            continue;
255        }
256
257        let mut remaining_annots = Vec::new();
258        let mut draw_ops = Vec::new();
259
260        let mut resources = if let Ok(res_obj) = page_dict.get(b"Resources") {
261            match res_obj {
262                Object::Dictionary(ref d) => d.clone(),
263                Object::Reference(id) => match doc.get_object(*id) {
264                    Ok(Object::Dictionary(ref d)) => d.clone(),
265                    _ => lopdf::Dictionary::new(),
266                },
267                _ => lopdf::Dictionary::new(),
268            }
269        } else {
270            resolve_inherited_resources_clone(doc, page_id).unwrap_or_default()
271        };
272
273        let mut xobjects = match resources.get(b"XObject") {
274            Ok(Object::Dictionary(ref d)) => d.clone(),
275            Ok(Object::Reference(id)) => match doc.get_object(*id) {
276                Ok(Object::Dictionary(ref d)) => d.clone(),
277                _ => lopdf::Dictionary::new(),
278            },
279            _ => lopdf::Dictionary::new(),
280        };
281
282        let mut flat_annot_count = 0;
283
284        for annot_obj in &annots_arr {
285            let annot_id = match annot_obj {
286                Object::Reference(id) => *id,
287                _ => {
288                    remaining_annots.push(annot_obj.clone());
289                    continue;
290                }
291            };
292
293            let annot_dict = match doc.get_object(annot_id) {
294                Ok(Object::Dictionary(ref d)) => d.clone(),
295                _ => {
296                    remaining_annots.push(annot_obj.clone());
297                    continue;
298                }
299            };
300
301            let subtype = match annot_dict.get(b"Subtype") {
302                Ok(Object::Name(ref name)) => name.as_slice(),
303                _ => {
304                    remaining_annots.push(annot_obj.clone());
305                    continue;
306                }
307            };
308
309            // Skip Widget and Link annotations
310            if subtype == b"Widget" || subtype == b"Link" {
311                remaining_annots.push(annot_obj.clone());
312                continue;
313            }
314
315            // Must have appearance dictionary /AP with normal appearance /N
316            let ap = match annot_dict.get(b"AP") {
317                Ok(Object::Dictionary(ref d)) => d.clone(),
318                Ok(Object::Reference(id)) => match doc.get_object(*id) {
319                    Ok(Object::Dictionary(ref d)) => d.clone(),
320                    _ => {
321                        remaining_annots.push(annot_obj.clone());
322                        continue;
323                    }
324                },
325                _ => {
326                    remaining_annots.push(annot_obj.clone());
327                    continue;
328                }
329            };
330
331            let n_obj = match ap.get(b"N") {
332                Ok(obj) => obj.clone(),
333                _ => {
334                    remaining_annots.push(annot_obj.clone());
335                    continue;
336                }
337            };
338
339            let (n_stream_id, n_stream) = match n_obj {
340                Object::Reference(id) => match doc.get_object(id) {
341                    Ok(Object::Stream(ref s)) => (Some(id), s.clone()),
342                    _ => {
343                        remaining_annots.push(annot_obj.clone());
344                        continue;
345                    }
346                },
347                Object::Stream(ref s) => (None, s.clone()),
348                _ => {
349                    remaining_annots.push(annot_obj.clone());
350                    continue;
351                }
352            };
353
354            // Get Rect
355            let rect_obj = match annot_dict.get(b"Rect") {
356                Ok(Object::Array(ref arr)) if arr.len() >= 4 => arr,
357                _ => {
358                    remaining_annots.push(annot_obj.clone());
359                    continue;
360                }
361            };
362            let rx0 = match as_number(&rect_obj[0]) {
363                Some(v) => v,
364                None => {
365                    remaining_annots.push(annot_obj.clone());
366                    continue;
367                }
368            };
369            let ry0 = match as_number(&rect_obj[1]) {
370                Some(v) => v,
371                None => {
372                    remaining_annots.push(annot_obj.clone());
373                    continue;
374                }
375            };
376            let rx1 = match as_number(&rect_obj[2]) {
377                Some(v) => v,
378                None => {
379                    remaining_annots.push(annot_obj.clone());
380                    continue;
381                }
382            };
383            let ry1 = match as_number(&rect_obj[3]) {
384                Some(v) => v,
385                None => {
386                    remaining_annots.push(annot_obj.clone());
387                    continue;
388                }
389            };
390            let rect = [rx0, ry0, rx1, ry1];
391
392            // Compute matrix
393            let matrix = match get_matrix_and_bbox(&n_stream.dict, &rect) {
394                Some(m) => m,
395                None => {
396                    remaining_annots.push(annot_obj.clone());
397                    continue;
398                }
399            };
400
401            // Set Form XObject type/subtype
402            let stream_id = match n_stream_id {
403                Some(id) => {
404                    if let Ok(Object::Stream(ref mut s)) = doc.get_object_mut(id) {
405                        s.dict.set("Type", Object::Name(b"XObject".to_vec()));
406                        s.dict.set("Subtype", Object::Name(b"Form".to_vec()));
407                    }
408                    id
409                }
410                None => {
411                    let mut s = n_stream.clone();
412                    s.dict.set("Type", Object::Name(b"XObject".to_vec()));
413                    s.dict.set("Subtype", Object::Name(b"Form".to_vec()));
414                    doc.add_object(Object::Stream(s))
415                }
416            };
417
418            // Insert into Resources /XObject under a name that does not collide
419            // with any existing entry (including names left by a previous flatten
420            // pass), so flattening never overwrites an unrelated XObject.
421            let ap_name = unique_xobject_name(&xobjects, &mut flat_annot_count);
422            xobjects.set(ap_name.as_bytes().to_vec(), Object::Reference(stream_id));
423
424            // Append Do operator
425            let draw_op = format!(
426                "q\n{} {} {} {} {} {} cm\n/{} Do\nQ\n",
427                matrix[0], matrix[1], matrix[2], matrix[3], matrix[4], matrix[5], ap_name
428            );
429            draw_ops.extend_from_slice(draw_op.as_bytes());
430
431            deleted_objects.insert(annot_id);
432            // Mark the markup annotation's popup companion for removal so its
433            // /Parent back-reference does not dangle after flattening.
434            if let Ok(Object::Reference(popup_id)) = annot_dict.get(b"Popup") {
435                popups_to_remove.insert(*popup_id);
436            }
437        }
438
439        if !draw_ops.is_empty() {
440            // Write /XObject resources back to resources dict
441            resources.set("XObject", Object::Dictionary(xobjects));
442
443            if let Ok(Object::Dictionary(ref mut pd)) = doc.get_object_mut(page_id) {
444                pd.set("Resources", Object::Dictionary(resources));
445            }
446
447            // Wrap existing page content in q/Q
448            wrap_existing_content_in_save_restore(doc, page_id);
449
450            // Create new content stream and append
451            let new_content_stream = Stream::new(dictionary! {}, draw_ops);
452            let new_content_id = doc.add_object(Object::Stream(new_content_stream));
453            append_content_to_page(doc, page_id, new_content_id);
454
455            // Drop popup companions of flattened annots so the array does not
456            // keep an interactive popup whose /Parent now points at nothing.
457            remaining_annots.retain(|o| match o {
458                Object::Reference(id) => !popups_to_remove.contains(id),
459                _ => true,
460            });
461
462            // Write modified /Annots array or remove it if empty
463            if let Ok(Object::Dictionary(ref mut pd)) = doc.get_object_mut(page_id) {
464                if remaining_annots.is_empty() {
465                    pd.remove(b"Annots");
466                } else {
467                    pd.set("Annots", Object::Array(remaining_annots));
468                }
469            }
470        }
471    }
472
473    // Purge the deleted annotation dictionaries and their popup companions.
474    // `delete_object` also strips every remaining reference to each id across the
475    // whole document — page `/Annots`, `/IRT` reply links, structure-tree `OBJR`
476    // `/Obj` entries, and `/Popup` back-references — so no dangling reference to a
477    // flattened annotation survives. (Cost is O(deleted × objects); flattening is
478    // not a hot path.)
479    for id in deleted_objects.into_iter().chain(popups_to_remove) {
480        doc.delete_object(id);
481    }
482
483    Ok(())
484}
485
486#[cfg(test)]
487mod tests {
488    use super::unique_xobject_name;
489    use lopdf::{Dictionary, Object};
490
491    #[test]
492    fn unique_xobject_name_skips_existing_entries() {
493        let mut xobjects = Dictionary::new();
494        xobjects.set("FlatAnnot0", Object::Null);
495        xobjects.set("FlatAnnot1", Object::Null);
496
497        // FlatAnnot0/1 already exist, so the first free name is FlatAnnot2.
498        let mut next = 0usize;
499        assert_eq!(unique_xobject_name(&xobjects, &mut next), "FlatAnnot2");
500
501        // After registering it, the next call stays monotonic and skips it.
502        xobjects.set("FlatAnnot2", Object::Null);
503        assert_eq!(unique_xobject_name(&xobjects, &mut next), "FlatAnnot3");
504    }
505
506    #[test]
507    fn unique_xobject_name_empty_dict_starts_at_zero() {
508        let xobjects = Dictionary::new();
509        let mut next = 0usize;
510        assert_eq!(unique_xobject_name(&xobjects, &mut next), "FlatAnnot0");
511        assert_eq!(unique_xobject_name(&xobjects, &mut next), "FlatAnnot1");
512    }
513}