Skip to main content

pdfium_render/pdf/document/
bookmarks.rs

1//! Defines the [PdfBookmarks] struct, exposing functionality related to the
2//! bookmarks contained within a single `PdfDocument`.
3
4use crate::bindgen::{FPDF_BOOKMARK, FPDF_DOCUMENT};
5use crate::error::{PdfiumError, PdfiumInternalError};
6use crate::pdf::document::bookmark::PdfBookmark;
7use crate::pdfium::PdfiumLibraryBindingsAccessor;
8use std::collections::HashSet;
9use std::marker::PhantomData;
10
11/// The bookmarks contained within a single `PdfDocument`.
12///
13/// Bookmarks in PDF files form a tree structure, branching out from a top-level root bookmark.
14/// The [PdfBookmarks::root()] returns the root bookmark in the containing `PdfDocument`, if any;
15/// use the root's [PdfBookmark::first_child()] and [PdfBookmark::next_sibling()] functions to
16/// traverse the bookmark tree.
17///
18/// To search the tree for a bookmark with a specific title, use the [PdfBookmarks::find_first_by_title()]
19/// and [PdfBookmarks::find_all_by_title()] functions. To traverse the tree breadth-first, visiting
20/// every bookmark in the tree, create an iterator using the [PdfBookmarks::iter()] function.
21pub struct PdfBookmarks<'a> {
22    document_handle: FPDF_DOCUMENT,
23    lifetime: PhantomData<&'a FPDF_DOCUMENT>,
24}
25
26impl<'a> PdfBookmarks<'a> {
27    #[inline]
28    pub(crate) fn from_pdfium(document_handle: FPDF_DOCUMENT) -> Self {
29        Self {
30            document_handle,
31            lifetime: PhantomData,
32        }
33    }
34
35    /// Returns the internal `FPDF_DOCUMENT` handle of the `PdfDocument` containing
36    /// this [PdfBookmarks] collection.
37    #[inline]
38    pub(crate) fn document_handle(&self) -> FPDF_DOCUMENT {
39        self.document_handle
40    }
41
42    /// Returns the root [PdfBookmark] in the containing `PdfDocument`, if any.
43    pub fn root(&self) -> Option<PdfBookmark<'_>> {
44        let bookmark_handle = unsafe {
45            self.bindings()
46                .FPDFBookmark_GetFirstChild(self.document_handle, std::ptr::null_mut())
47        };
48
49        if bookmark_handle.is_null() {
50            None
51        } else {
52            Some(PdfBookmark::from_pdfium(
53                bookmark_handle,
54                None,
55                self.document_handle,
56            ))
57        }
58    }
59
60    /// Returns the first [PdfBookmark] in the containing `PdfDocument` that has a title matching
61    /// the given string.
62    ///
63    /// Note that bookmarks are not required to have unique titles, so in theory any number of
64    /// bookmarks could match a given title. This function only ever returns the first. To return
65    /// all matches, use [PdfBookmarks::find_all_by_title()].
66    pub fn find_first_by_title(&self, title: &str) -> Result<PdfBookmark<'_>, PdfiumError> {
67        let handle = unsafe {
68            self.bindings()
69                .FPDFBookmark_Find_str(self.document_handle, title)
70        };
71
72        if handle.is_null() {
73            Err(PdfiumError::PdfiumLibraryInternalError(
74                PdfiumInternalError::Unknown,
75            ))
76        } else {
77            Ok(PdfBookmark::from_pdfium(handle, None, self.document_handle))
78        }
79    }
80
81    /// Returns all [PdfBookmark] objects in the containing `PdfDocument` that have a title
82    /// matching the given string.
83    ///
84    /// Note that bookmarks are not required to have unique titles, so in theory any number of
85    /// bookmarks could match a given title. This function returns all matches by performing
86    /// a complete breadth-first traversal of the entire bookmark tree. To return just the first
87    /// match, use [PdfBookmarks::find_first_by_title()].
88    pub fn find_all_by_title(&self, title: &str) -> Vec<PdfBookmark<'_>> {
89        self.iter()
90            .filter(|bookmark| match bookmark.title() {
91                Some(bookmark_title) => bookmark_title == title,
92                None => false,
93            })
94            .collect()
95    }
96
97    /// Returns a depth-first prefix-order iterator over all the [PdfBookmark]
98    /// objects in the containing `PdfDocument`, starting from the top-level
99    /// root bookmark.
100    #[inline]
101    pub fn iter(&self) -> PdfBookmarksIterator<'_> {
102        PdfBookmarksIterator::new(self.root(), true, None, self.document_handle())
103    }
104}
105
106impl<'a> PdfiumLibraryBindingsAccessor<'a> for PdfBookmarks<'a> {}
107
108#[cfg(feature = "thread_safe")]
109unsafe impl<'a> Send for PdfBookmarks<'a> {}
110
111#[cfg(feature = "thread_safe")]
112unsafe impl<'a> Sync for PdfBookmarks<'a> {}
113
114/// An iterator over all the [PdfBookmark] objects in a [PdfBookmarks] collection.
115pub struct PdfBookmarksIterator<'a> {
116    document_handle: FPDF_DOCUMENT,
117
118    // If true, recurse into descendants.
119    include_descendants: bool,
120
121    // A stack of pairs of (Bookmark Node, Node's Parent). The parent may be NULL
122    // if it is a root node, or if the parent is unknown.
123    pending_stack: Vec<(FPDF_BOOKMARK, FPDF_BOOKMARK)>,
124
125    // The set of nodes already visited. This ensures we terminate if the document's
126    // bookmark graph is cyclic.
127    visited: HashSet<FPDF_BOOKMARK>,
128
129    // This bookmark will not be returned by the iterator (but its siblings and
130    // descendants will be explored). May be NULL.
131    skip_sibling: FPDF_BOOKMARK,
132
133    lifetime: PhantomData<&'a FPDF_BOOKMARK>,
134}
135
136impl<'a> PdfBookmarksIterator<'a> {
137    pub(crate) fn new(
138        start_node: Option<PdfBookmark<'a>>,
139        include_descendants: bool,
140        skip_sibling: Option<PdfBookmark<'a>>,
141        document_handle: FPDF_DOCUMENT,
142    ) -> Self {
143        let mut result = PdfBookmarksIterator {
144            document_handle,
145            include_descendants,
146            pending_stack: Vec::with_capacity(20),
147            visited: HashSet::new(),
148            skip_sibling: std::ptr::null_mut(),
149            lifetime: PhantomData,
150        };
151
152        // If we have a skip-sibling, record its handle.
153        if let Some(skip_sibling) = skip_sibling {
154            result.skip_sibling = skip_sibling.bookmark_handle();
155        }
156
157        // Push the start node onto the stack to initiate graph traversal.
158        if let Some(start_node) = start_node {
159            result.pending_stack.push((
160                start_node.bookmark_handle(),
161                start_node
162                    .parent()
163                    .map(|parent| parent.bookmark_handle())
164                    .unwrap_or(std::ptr::null_mut()),
165            ));
166        }
167
168        result
169    }
170}
171
172impl<'a> Iterator for PdfBookmarksIterator<'a> {
173    type Item = PdfBookmark<'a>;
174
175    fn next(&mut self) -> Option<Self::Item> {
176        // A straightforward tail-recursive function to walk the bookmarks might
177        // look about like this:
178        //
179        // pub fn walk(node: Option<PdfBookmark<'a>>) {
180        //     if let Some(node) = node) {
181        //         visit(&node);
182        //         walk(node.first_child());
183        //         walk(node.next_sibling());
184        //     }
185        // }
186        //
187        // This iterator implements that algorithm with the following additional
188        // complexities:
189        //
190        // - Iterators, of course, can't take advantage of recursion. So the
191        //   call stack which is implicit in the recursive version becomes an
192        //   explicit stack retained in PdfIterator::pending_stack.
193        // - For efficiency, the iterator internally operates with FPDF_BOOKMARK
194        //   handles, and only constructs PdfBookmark objects right before
195        //   they're returned.
196        // - PdfIterator::visited keeps a HashSet of visited nodes, to ensure
197        //   termination even if the PDF's bookmark graph is cyclic.
198        // - PdfIterator::skip_sibling keeps a FPDF_BOOKMARK that will not be
199        //   returned by the iterator (but, importantly, its siblings will
200        //   still be explored).
201
202        while let Some((node, parent)) = self.pending_stack.pop() {
203            if node.is_null() || self.visited.contains(&node) {
204                continue;
205            }
206            self.visited.insert(node);
207
208            // Add our next sibling to the stack first, so we'll come back to it
209            // after having addressed our descendants. It's okay if it's NULL,
210            // we'll handle that when it comes off the stack.
211            self.pending_stack.push((
212                unsafe {
213                    self.bindings()
214                        .FPDFBookmark_GetNextSibling(self.document_handle, node)
215                },
216                parent,
217            ));
218
219            // Add our first descendant to the stack if we should include them.
220            // Again, it's okay if it's NULL.
221            if self.include_descendants {
222                self.pending_stack.push((
223                    unsafe {
224                        self.bindings()
225                            .FPDFBookmark_GetFirstChild(self.document_handle, node)
226                    },
227                    node,
228                ));
229            }
230
231            // If the present node isn't the one we're meant to skip, return it.
232            if node != self.skip_sibling {
233                let parent = if parent.is_null() { None } else { Some(parent) };
234                return Some(PdfBookmark::from_pdfium(node, parent, self.document_handle));
235            }
236        }
237
238        // If we get here then the stack is empty and we're done.
239        None
240    }
241}
242
243impl<'a> PdfiumLibraryBindingsAccessor<'a> for PdfBookmarksIterator<'a> {}
244
245#[cfg(feature = "thread_safe")]
246unsafe impl<'a> Send for PdfBookmarksIterator<'a> {}
247
248#[cfg(feature = "thread_safe")]
249unsafe impl<'a> Sync for PdfBookmarksIterator<'a> {}