pdfium_render/pdf/document/
bookmarks.rs

1//! Defines the [PdfBookmarks] struct, exposing functionality related to the
2//! bookmarks contained within a single `PdfDocument`.
3
4use crate::bindgen::{FPDF_BOOKMARK, FPDF_DOCUMENT};
5use crate::bindings::PdfiumLibraryBindings;
6use crate::error::{PdfiumError, PdfiumInternalError};
7use crate::pdf::document::bookmark::PdfBookmark;
8use std::collections::HashSet;
9use std::ptr::null_mut;
10
11/// The bookmarks contained within a single `PdfDocument`.
12///
13/// Bookmarks in PDF files form a tree structure, branching out from a top-level root bookmark.
14/// The [PdfBookmarks::root()] returns the root bookmark in the containing `PdfDocument`, if any;
15/// use the root's [PdfBookmark::first_child()] and [PdfBookmark::next_sibling()] functions to
16/// traverse the bookmark tree.
17///
18/// To search the tree for a bookmark with a specific title, use the [PdfBookmarks::find_first_by_title()]
19/// and [PdfBookmarks::find_all_by_title()] functions. To traverse the tree breadth-first, visiting
20/// every bookmark in the tree, create an iterator using the [PdfBookmarks::iter()] function.
21pub struct PdfBookmarks<'a> {
22    document_handle: FPDF_DOCUMENT,
23    bindings: &'a dyn PdfiumLibraryBindings,
24}
25
26impl<'a> PdfBookmarks<'a> {
27    #[inline]
28    pub(crate) fn from_pdfium(
29        document_handle: FPDF_DOCUMENT,
30        bindings: &'a dyn PdfiumLibraryBindings,
31    ) -> Self {
32        Self {
33            document_handle,
34            bindings,
35        }
36    }
37
38    /// Returns the internal `FPDF_DOCUMENT` handle of the `PdfDocument` containing
39    /// this [PdfBookmarks] collection.
40    #[inline]
41    pub(crate) fn document_handle(&self) -> FPDF_DOCUMENT {
42        self.document_handle
43    }
44
45    /// Returns the [PdfiumLibraryBindings] used by this [PdfBookmarks] collection.
46    #[inline]
47    pub fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
48        self.bindings
49    }
50
51    /// Returns the root [PdfBookmark] in the containing `PdfDocument`, if any.
52    pub fn root(&self) -> Option<PdfBookmark> {
53        let bookmark_handle = self
54            .bindings
55            .FPDFBookmark_GetFirstChild(self.document_handle, null_mut());
56
57        if bookmark_handle.is_null() {
58            None
59        } else {
60            Some(PdfBookmark::from_pdfium(
61                bookmark_handle,
62                None,
63                self.document_handle,
64                self.bindings,
65            ))
66        }
67    }
68
69    /// Returns the first [PdfBookmark] in the containing `PdfDocument` that has a title matching
70    /// the given string.
71    ///
72    /// Note that bookmarks are not required to have unique titles, so in theory any number of
73    /// bookmarks could match a given title. This function only ever returns the first. To return
74    /// all matches, use [PdfBookmarks::find_all_by_title()].
75    pub fn find_first_by_title(&self, title: &str) -> Result<PdfBookmark, PdfiumError> {
76        let handle = self
77            .bindings
78            .FPDFBookmark_Find_str(self.document_handle, title);
79
80        if handle.is_null() {
81            Err(PdfiumError::PdfiumLibraryInternalError(
82                PdfiumInternalError::Unknown,
83            ))
84        } else {
85            Ok(PdfBookmark::from_pdfium(
86                handle,
87                None,
88                self.document_handle,
89                self.bindings,
90            ))
91        }
92    }
93
94    /// Returns all [PdfBookmark] objects in the containing `PdfDocument` that have a title
95    /// matching the given string.
96    ///
97    /// Note that bookmarks are not required to have unique titles, so in theory any number of
98    /// bookmarks could match a given title. This function returns all matches by performing
99    /// a complete breadth-first traversal of the entire bookmark tree. To return just the first
100    /// match, use [PdfBookmarks::find_first_by_title()].
101    pub fn find_all_by_title(&self, title: &str) -> Vec<PdfBookmark> {
102        self.iter()
103            .filter(|bookmark| match bookmark.title() {
104                Some(bookmark_title) => bookmark_title == title,
105                None => false,
106            })
107            .collect()
108    }
109
110    /// Returns a depth-first prefix-order iterator over all the [PdfBookmark]
111    /// objects in the containing `PdfDocument`, starting from the top-level
112    /// root bookmark.
113    #[inline]
114    pub fn iter(&self) -> PdfBookmarksIterator {
115        PdfBookmarksIterator::new(
116            self.root(),
117            true,
118            None,
119            self.document_handle(),
120            self.bindings(),
121        )
122    }
123}
124
125/// An iterator over all the [PdfBookmark] objects in a [PdfBookmarks] collection.
126pub struct PdfBookmarksIterator<'a> {
127    // If true, recurse into descendants.
128    include_descendants: bool,
129    // Stack of pairs of (Bookmark Node, Node's Parent). The parent may be NULL
130    // if its a root node or the parent is unknown.
131    pending_stack: Vec<(FPDF_BOOKMARK, FPDF_BOOKMARK)>,
132    // Set of nodes already visitied. This ensures we terminate if the PDF's
133    // bookmark graph is cyclic.
134    visited: HashSet<FPDF_BOOKMARK>,
135    // This bookmark will not be returned by the iterator (but its siblings and
136    // descendants will be explored). May be NULL.
137    skip_sibling: FPDF_BOOKMARK,
138    document_handle: FPDF_DOCUMENT,
139    bindings: &'a dyn PdfiumLibraryBindings,
140}
141
142impl<'a> PdfBookmarksIterator<'a> {
143    pub(crate) fn new(
144        start_node: Option<PdfBookmark<'a>>,
145        include_descendants: bool,
146        skip_sibling: Option<PdfBookmark<'a>>,
147        document_handle: FPDF_DOCUMENT,
148        bindings: &'a dyn PdfiumLibraryBindings,
149    ) -> Self {
150        let mut result = PdfBookmarksIterator {
151            document_handle,
152            include_descendants,
153            pending_stack: Vec::with_capacity(20),
154            visited: HashSet::new(),
155            skip_sibling: null_mut(),
156            bindings,
157        };
158
159        // If we have a skip-sibling, record its handle.
160        if let Some(skip_sibling) = skip_sibling {
161            result.skip_sibling = skip_sibling.bookmark_handle();
162        }
163
164        // Push the start node onto the stack to initiate graph traversal.
165        if let Some(start_node) = start_node {
166            result.pending_stack.push((
167                start_node.bookmark_handle(),
168                start_node
169                    .parent()
170                    .map(|parent| parent.bookmark_handle())
171                    .unwrap_or(null_mut()),
172            ));
173        }
174
175        result
176    }
177}
178
179impl<'a> Iterator for PdfBookmarksIterator<'a> {
180    type Item = PdfBookmark<'a>;
181
182    fn next(&mut self) -> Option<Self::Item> {
183        // A straightforward tail-recursive function to walk the bookmarks might
184        // look about like this:
185        //
186        // pub fn walk(node: Option<PdfBookmark<'a>>) {
187        //     if let Some(node) = node) {
188        //         visit(&node);
189        //         walk(node.first_child());
190        //         walk(node.next_sibling());
191        //     }
192        // }
193        //
194        // This iterator implements that algorithm with the following additional
195        // complexities:
196        //
197        // - Iterators, of course, can't take advantage of recursion. So the
198        //   call stack which is implicit in the recursive version becomes an
199        //   explicit stack retained in PdfIterator::pending_stack.
200        // - For efficiency, the iterator internally operates with FPDF_BOOKMARK
201        //   handles, and only constructs PdfBookmark objects right before
202        //   they're returned.
203        // - PdfIterator::visited keeps a HashSet of visited nodes, to ensure
204        //   termination even if the PDF's bookmark graph is cyclic.
205        // - PdfIterator::skip_sibling keeps a FPDF_BOOKMARK that will not be
206        //   returned by the iterator (but, importantly, it's siblings will
207        //   still be explored).
208
209        while let Some((node, parent)) = self.pending_stack.pop() {
210            if node.is_null() || self.visited.contains(&node) {
211                continue;
212            }
213            self.visited.insert(node);
214
215            // Add our next sibling to the stack first, so we'll come back to it
216            // after having addressed our descendants. It's okay if it's NULL,
217            // we'll handle that when it comes off the stack.
218            self.pending_stack.push((
219                self.bindings
220                    .FPDFBookmark_GetNextSibling(self.document_handle, node),
221                parent,
222            ));
223
224            // Add our first descendant to the stack if we should include them.
225            // Again, its okay if it's NULL.
226            if self.include_descendants {
227                self.pending_stack.push((
228                    self.bindings
229                        .FPDFBookmark_GetFirstChild(self.document_handle, node),
230                    node,
231                ));
232            }
233
234            // If the present node isn't the one we're meant to skip, return it.
235            if node != self.skip_sibling {
236                let parent = if parent.is_null() { None } else { Some(parent) };
237                return Some(PdfBookmark::from_pdfium(
238                    node,
239                    parent,
240                    self.document_handle,
241                    self.bindings,
242                ));
243            }
244        }
245
246        // If we got here, then the stack is empty and we're done.
247        None
248    }
249}