pdfium_render/pdf/document/bookmarks.rs
1//! Defines the [PdfBookmarks] struct, exposing functionality related to the
2//! bookmarks contained within a single `PdfDocument`.
3
4use crate::bindgen::{FPDF_BOOKMARK, FPDF_DOCUMENT};
5use crate::bindings::PdfiumLibraryBindings;
6use crate::error::{PdfiumError, PdfiumInternalError};
7use crate::pdf::document::bookmark::PdfBookmark;
8use std::collections::HashSet;
9use std::ptr::null_mut;
10
11/// The bookmarks contained within a single `PdfDocument`.
12///
13/// Bookmarks in PDF files form a tree structure, branching out from a top-level root bookmark.
14/// The [PdfBookmarks::root()] returns the root bookmark in the containing `PdfDocument`, if any;
15/// use the root's [PdfBookmark::first_child()] and [PdfBookmark::next_sibling()] functions to
16/// traverse the bookmark tree.
17///
18/// To search the tree for a bookmark with a specific title, use the [PdfBookmarks::find_first_by_title()]
19/// and [PdfBookmarks::find_all_by_title()] functions. To traverse the tree breadth-first, visiting
20/// every bookmark in the tree, create an iterator using the [PdfBookmarks::iter()] function.
21pub struct PdfBookmarks<'a> {
22 document_handle: FPDF_DOCUMENT,
23 bindings: &'a dyn PdfiumLibraryBindings,
24}
25
26impl<'a> PdfBookmarks<'a> {
27 #[inline]
28 pub(crate) fn from_pdfium(
29 document_handle: FPDF_DOCUMENT,
30 bindings: &'a dyn PdfiumLibraryBindings,
31 ) -> Self {
32 Self {
33 document_handle,
34 bindings,
35 }
36 }
37
38 /// Returns the internal `FPDF_DOCUMENT` handle of the `PdfDocument` containing
39 /// this [PdfBookmarks] collection.
40 #[inline]
41 pub(crate) fn document_handle(&self) -> FPDF_DOCUMENT {
42 self.document_handle
43 }
44
45 /// Returns the [PdfiumLibraryBindings] used by this [PdfBookmarks] collection.
46 #[inline]
47 pub fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
48 self.bindings
49 }
50
51 /// Returns the root [PdfBookmark] in the containing `PdfDocument`, if any.
52 pub fn root(&self) -> Option<PdfBookmark> {
53 let bookmark_handle = self
54 .bindings
55 .FPDFBookmark_GetFirstChild(self.document_handle, null_mut());
56
57 if bookmark_handle.is_null() {
58 None
59 } else {
60 Some(PdfBookmark::from_pdfium(
61 bookmark_handle,
62 None,
63 self.document_handle,
64 self.bindings,
65 ))
66 }
67 }
68
69 /// Returns the first [PdfBookmark] in the containing `PdfDocument` that has a title matching
70 /// the given string.
71 ///
72 /// Note that bookmarks are not required to have unique titles, so in theory any number of
73 /// bookmarks could match a given title. This function only ever returns the first. To return
74 /// all matches, use [PdfBookmarks::find_all_by_title()].
75 pub fn find_first_by_title(&self, title: &str) -> Result<PdfBookmark, PdfiumError> {
76 let handle = self
77 .bindings
78 .FPDFBookmark_Find_str(self.document_handle, title);
79
80 if handle.is_null() {
81 Err(PdfiumError::PdfiumLibraryInternalError(
82 PdfiumInternalError::Unknown,
83 ))
84 } else {
85 Ok(PdfBookmark::from_pdfium(
86 handle,
87 None,
88 self.document_handle,
89 self.bindings,
90 ))
91 }
92 }
93
94 /// Returns all [PdfBookmark] objects in the containing `PdfDocument` that have a title
95 /// matching the given string.
96 ///
97 /// Note that bookmarks are not required to have unique titles, so in theory any number of
98 /// bookmarks could match a given title. This function returns all matches by performing
99 /// a complete breadth-first traversal of the entire bookmark tree. To return just the first
100 /// match, use [PdfBookmarks::find_first_by_title()].
101 pub fn find_all_by_title(&self, title: &str) -> Vec<PdfBookmark> {
102 self.iter()
103 .filter(|bookmark| match bookmark.title() {
104 Some(bookmark_title) => bookmark_title == title,
105 None => false,
106 })
107 .collect()
108 }
109
110 /// Returns a depth-first prefix-order iterator over all the [PdfBookmark]
111 /// objects in the containing `PdfDocument`, starting from the top-level
112 /// root bookmark.
113 #[inline]
114 pub fn iter(&self) -> PdfBookmarksIterator {
115 PdfBookmarksIterator::new(
116 self.root(),
117 true,
118 None,
119 self.document_handle(),
120 self.bindings(),
121 )
122 }
123}
124
125/// An iterator over all the [PdfBookmark] objects in a [PdfBookmarks] collection.
126pub struct PdfBookmarksIterator<'a> {
127 // If true, recurse into descendants.
128 include_descendants: bool,
129 // Stack of pairs of (Bookmark Node, Node's Parent). The parent may be NULL
130 // if its a root node or the parent is unknown.
131 pending_stack: Vec<(FPDF_BOOKMARK, FPDF_BOOKMARK)>,
132 // Set of nodes already visitied. This ensures we terminate if the PDF's
133 // bookmark graph is cyclic.
134 visited: HashSet<FPDF_BOOKMARK>,
135 // This bookmark will not be returned by the iterator (but its siblings and
136 // descendants will be explored). May be NULL.
137 skip_sibling: FPDF_BOOKMARK,
138 document_handle: FPDF_DOCUMENT,
139 bindings: &'a dyn PdfiumLibraryBindings,
140}
141
142impl<'a> PdfBookmarksIterator<'a> {
143 pub(crate) fn new(
144 start_node: Option<PdfBookmark<'a>>,
145 include_descendants: bool,
146 skip_sibling: Option<PdfBookmark<'a>>,
147 document_handle: FPDF_DOCUMENT,
148 bindings: &'a dyn PdfiumLibraryBindings,
149 ) -> Self {
150 let mut result = PdfBookmarksIterator {
151 document_handle,
152 include_descendants,
153 pending_stack: Vec::with_capacity(20),
154 visited: HashSet::new(),
155 skip_sibling: null_mut(),
156 bindings,
157 };
158
159 // If we have a skip-sibling, record its handle.
160 if let Some(skip_sibling) = skip_sibling {
161 result.skip_sibling = skip_sibling.bookmark_handle();
162 }
163
164 // Push the start node onto the stack to initiate graph traversal.
165 if let Some(start_node) = start_node {
166 result.pending_stack.push((
167 start_node.bookmark_handle(),
168 start_node
169 .parent()
170 .map(|parent| parent.bookmark_handle())
171 .unwrap_or(null_mut()),
172 ));
173 }
174
175 result
176 }
177}
178
179impl<'a> Iterator for PdfBookmarksIterator<'a> {
180 type Item = PdfBookmark<'a>;
181
182 fn next(&mut self) -> Option<Self::Item> {
183 // A straightforward tail-recursive function to walk the bookmarks might
184 // look about like this:
185 //
186 // pub fn walk(node: Option<PdfBookmark<'a>>) {
187 // if let Some(node) = node) {
188 // visit(&node);
189 // walk(node.first_child());
190 // walk(node.next_sibling());
191 // }
192 // }
193 //
194 // This iterator implements that algorithm with the following additional
195 // complexities:
196 //
197 // - Iterators, of course, can't take advantage of recursion. So the
198 // call stack which is implicit in the recursive version becomes an
199 // explicit stack retained in PdfIterator::pending_stack.
200 // - For efficiency, the iterator internally operates with FPDF_BOOKMARK
201 // handles, and only constructs PdfBookmark objects right before
202 // they're returned.
203 // - PdfIterator::visited keeps a HashSet of visited nodes, to ensure
204 // termination even if the PDF's bookmark graph is cyclic.
205 // - PdfIterator::skip_sibling keeps a FPDF_BOOKMARK that will not be
206 // returned by the iterator (but, importantly, it's siblings will
207 // still be explored).
208
209 while let Some((node, parent)) = self.pending_stack.pop() {
210 if node.is_null() || self.visited.contains(&node) {
211 continue;
212 }
213 self.visited.insert(node);
214
215 // Add our next sibling to the stack first, so we'll come back to it
216 // after having addressed our descendants. It's okay if it's NULL,
217 // we'll handle that when it comes off the stack.
218 self.pending_stack.push((
219 self.bindings
220 .FPDFBookmark_GetNextSibling(self.document_handle, node),
221 parent,
222 ));
223
224 // Add our first descendant to the stack if we should include them.
225 // Again, its okay if it's NULL.
226 if self.include_descendants {
227 self.pending_stack.push((
228 self.bindings
229 .FPDFBookmark_GetFirstChild(self.document_handle, node),
230 node,
231 ));
232 }
233
234 // If the present node isn't the one we're meant to skip, return it.
235 if node != self.skip_sibling {
236 let parent = if parent.is_null() { None } else { Some(parent) };
237 return Some(PdfBookmark::from_pdfium(
238 node,
239 parent,
240 self.document_handle,
241 self.bindings,
242 ));
243 }
244 }
245
246 // If we got here, then the stack is empty and we're done.
247 None
248 }
249}