pdfium_render/pdf/document/form.rs
1//! Defines the [PdfForm] struct, exposing functionality related to a form
2//! embedded in a `PdfDocument`.
3
4use crate::bindgen::{
5 FORMTYPE_ACRO_FORM, FORMTYPE_NONE, FORMTYPE_XFA_FOREGROUND, FORMTYPE_XFA_FULL, FPDF_DOCUMENT,
6 FPDF_FORMFILLINFO, FPDF_FORMHANDLE,
7};
8use crate::bindings::PdfiumLibraryBindings;
9use crate::error::PdfiumError;
10use crate::pdf::document::page::field::PdfFormFieldCommon;
11use crate::pdf::document::page::field::PdfFormFieldType;
12use crate::pdf::document::pages::PdfPages;
13use std::collections::HashMap;
14use std::ops::DerefMut;
15use std::pin::Pin;
16use std::ptr::null_mut;
17
18#[cfg(doc)]
19use crate::pdf::document::PdfDocument;
20
21/// The internal definition type of a [PdfForm] embedded in a [PdfDocument].
22#[derive(Copy, Clone, Debug, PartialEq)]
23pub enum PdfFormType {
24 // The FORMTYPE_COUNT constant simply specifies the number of form types supported
25 // by Pdfium; we do not need to expose it.
26 None = FORMTYPE_NONE as isize,
27 Acrobat = FORMTYPE_ACRO_FORM as isize,
28 XfaFull = FORMTYPE_XFA_FULL as isize,
29 XfaForeground = FORMTYPE_XFA_FOREGROUND as isize,
30}
31
32impl PdfFormType {
33 #[inline]
34 pub(crate) fn from_pdfium(form_type: u32) -> Result<PdfFormType, PdfiumError> {
35 match form_type {
36 FORMTYPE_NONE => Ok(PdfFormType::None),
37 FORMTYPE_ACRO_FORM => Ok(PdfFormType::Acrobat),
38 FORMTYPE_XFA_FULL => Ok(PdfFormType::XfaFull),
39 FORMTYPE_XFA_FOREGROUND => Ok(PdfFormType::XfaForeground),
40 _ => Err(PdfiumError::UnknownFormType),
41 }
42 }
43
44 #[inline]
45 #[allow(dead_code)]
46 // The as_pdfium() function is not currently used, but we expect it to be in future
47 pub(crate) fn as_pdfium(&self) -> u32 {
48 match self {
49 PdfFormType::None => FORMTYPE_NONE,
50 PdfFormType::Acrobat => FORMTYPE_ACRO_FORM,
51 PdfFormType::XfaFull => FORMTYPE_XFA_FULL,
52 PdfFormType::XfaForeground => FORMTYPE_XFA_FOREGROUND,
53 }
54 }
55}
56
57/// The [PdfForm] embedded inside a [PdfDocument].
58///
59/// Form fields in Pdfium are exposed as page annotations of type `PdfPageAnnotationType::Widget`
60/// or `PdfPageAnnotationType::XfaWidget`, depending on the type of form embedded inside the
61/// document. To retrieve the user-specified form field values, iterate over each annotation
62/// on each page in the document, filtering out annotations that do not contain a valid form field:
63///
64/// ```
65/// for page in document.pages.iter() {
66/// for annotation in page.annotations.iter() {
67/// if let Some(field) = annotation.as_form_field() {
68/// // We can now unwrap the specific type of form field
69/// // and access its properties, including any user-specified value.
70/// }
71/// }
72/// }
73/// ```
74///
75/// Alternatively, use the [PdfForm::field_values()] function to eagerly retrieve the values of all
76/// fields in the document as a map of (field name, field value) pairs.
77pub struct PdfForm<'a> {
78 form_handle: FPDF_FORMHANDLE,
79 document_handle: FPDF_DOCUMENT,
80
81 #[allow(dead_code)]
82 // The form_fill_info field is not currently used, but we expect it to be in future
83 form_fill_info: Pin<Box<FPDF_FORMFILLINFO>>,
84 bindings: &'a dyn PdfiumLibraryBindings,
85}
86
87impl<'a> PdfForm<'a> {
88 /// Attempts to bind to an embedded form, if any, inside the document with the given
89 /// document handle.
90 #[inline]
91 pub(crate) fn from_pdfium(
92 document_handle: FPDF_DOCUMENT,
93 bindings: &'a dyn PdfiumLibraryBindings,
94 ) -> Option<Self> {
95 // Pdfium does not load form field data or widgets (and therefore will not
96 // render them) until a call has been made to the
97 // FPDFDOC_InitFormFillEnvironment() function. This function takes a large
98 // struct, FPDF_FORMFILLINFO, which Pdfium uses to store a variety of form
99 // configuration information - mostly callback functions that should be called
100 // when the user interacts with a form field widget. Since pdfium-render has
101 // no concept of interactivity, we can leave all these set to None.
102
103 // We allocate the FPDF_FORMFILLINFO struct on the heap and pin its pointer location
104 // so Rust will not move it around. Pdfium retains the pointer location
105 // when we call FPDFDOC_InitFormFillEnvironment() and expects the pointer
106 // location to still be valid when we later call FPDFDOC_ExitFormFillEnvironment()
107 // during drop(); if we don't pin the struct's location it may move, and the
108 // call to FPDFDOC_ExitFormFillEnvironment() will segfault.
109
110 let mut form_fill_info = Box::pin(FPDF_FORMFILLINFO {
111 version: 2,
112 Release: None,
113 FFI_Invalidate: None,
114 FFI_OutputSelectedRect: None,
115 FFI_SetCursor: None,
116 FFI_SetTimer: None,
117 FFI_KillTimer: None,
118 FFI_GetLocalTime: None,
119 FFI_OnChange: None,
120 FFI_GetPage: None,
121 FFI_GetCurrentPage: None,
122 FFI_GetRotation: None,
123 FFI_ExecuteNamedAction: None,
124 FFI_SetTextFieldFocus: None,
125 FFI_DoURIAction: None,
126 FFI_DoGoToAction: None,
127 m_pJsPlatform: null_mut(),
128 xfa_disabled: 0,
129 FFI_DisplayCaret: None,
130 FFI_GetCurrentPageIndex: None,
131 FFI_SetCurrentPage: None,
132 FFI_GotoURL: None,
133 FFI_GetPageViewRect: None,
134 FFI_PageEvent: None,
135 FFI_PopupMenu: None,
136 FFI_OpenFile: None,
137 FFI_EmailTo: None,
138 FFI_UploadTo: None,
139 FFI_GetPlatform: None,
140 FFI_GetLanguage: None,
141 FFI_DownloadFromURL: None,
142 FFI_PostRequestURL: None,
143 FFI_PutRequestURL: None,
144 FFI_OnFocusChange: None,
145 FFI_DoURIActionWithKeyboardModifier: None,
146 });
147
148 let form_handle =
149 bindings.FPDFDOC_InitFormFillEnvironment(document_handle, form_fill_info.deref_mut());
150
151 if !form_handle.is_null() {
152 // There is a form embedded in this document, and we retrieved a valid handle to it.
153
154 let form = PdfForm {
155 form_handle,
156 document_handle,
157 form_fill_info,
158 bindings,
159 };
160
161 if form.form_type() != PdfFormType::None {
162 // The form is valid.
163
164 Some(form)
165 } else {
166 // The form is valid, but empty. No point returning it.
167
168 None
169 }
170 } else {
171 // There is no form embedded in this document.
172
173 None
174 }
175 }
176
177 /// Returns the internal `FPDF_FORMHANDLE` handle for this [PdfForm].
178 #[inline]
179 pub(crate) fn handle(&self) -> FPDF_FORMHANDLE {
180 self.form_handle
181 }
182
183 /// Returns the [PdfiumLibraryBindings] used by this [PdfForm].
184 #[inline]
185 pub fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
186 self.bindings
187 }
188
189 /// Returns the [PdfFormType] of this [PdfForm].
190 #[inline]
191 pub fn form_type(&self) -> PdfFormType {
192 PdfFormType::from_pdfium(self.bindings.FPDF_GetFormType(self.document_handle) as u32)
193 .unwrap()
194 }
195
196 /// Captures a string representation of the value of every form field on every page of
197 /// the given [PdfPages] collection, returning a map of (field name, field value) pairs.
198 ///
199 /// This function assumes that all form fields in the document have unique field names
200 /// except for radio button and checkbox control groups.
201 pub fn field_values(&self, pages: &'a PdfPages<'a>) -> HashMap<String, Option<String>> {
202 let mut result = HashMap::new();
203
204 let field_value_true = Some("true".to_string());
205
206 let field_value_false = Some("false".to_string());
207
208 for page in pages.iter() {
209 for annotation in page.annotations().iter() {
210 if let Some(field) = annotation.as_form_field() {
211 let field_type = field.field_type();
212
213 let field_value = match field_type {
214 PdfFormFieldType::Checkbox => {
215 if field
216 .as_checkbox_field()
217 .unwrap()
218 .is_checked()
219 .unwrap_or(false)
220 {
221 field_value_true.clone()
222 } else {
223 field_value_false.clone()
224 }
225 }
226 PdfFormFieldType::ComboBox => field.as_combo_box_field().unwrap().value(),
227 PdfFormFieldType::ListBox => field.as_list_box_field().unwrap().value(),
228 PdfFormFieldType::RadioButton => {
229 let field = field.as_radio_button_field().unwrap();
230
231 if field.is_checked().unwrap_or(false) {
232 field.group_value()
233 } else {
234 field_value_false.clone()
235 }
236 }
237 PdfFormFieldType::Text => field.as_text_field().unwrap().value(),
238 PdfFormFieldType::PushButton
239 | PdfFormFieldType::Signature
240 | PdfFormFieldType::Unknown => None,
241 };
242
243 // A group of checkbox or radio button controls all share the same name, so
244 // as we iterate over the controls, the value of the group will be updated.
245 // Only the value of the last control in the group will be captured.
246 // This isn't the behaviour we want; we prefer to capture the value of
247 // a checked control in preference to an unchecked control.
248
249 let field_name = field.name().unwrap_or_default();
250
251 if (field_type == PdfFormFieldType::Checkbox
252 || field_type == PdfFormFieldType::RadioButton)
253 && result.contains_key(&field_name)
254 {
255 // Only overwrite an existing entry for this control group if
256 // this field is set.
257
258 if field_value != field_value_false {
259 result.insert(field_name, field_value);
260 }
261 } else {
262 // For all other control types, we assume that field names are unique.
263
264 result.insert(field_name, field_value);
265 }
266 }
267 }
268 }
269
270 result
271 }
272}
273
274impl<'a> Drop for PdfForm<'a> {
275 /// Closes this [PdfForm], releasing held memory.
276 #[inline]
277 fn drop(&mut self) {
278 self.bindings
279 .FPDFDOC_ExitFormFillEnvironment(self.form_handle);
280 }
281}