pdfium_render/pdf/document/form.rs
1//! Defines the [PdfForm] struct, exposing functionality related to a form
2//! embedded in a `PdfDocument`.
3
4use crate::bindgen::{
5 FORMTYPE_ACRO_FORM, FORMTYPE_NONE, FORMTYPE_XFA_FOREGROUND, FORMTYPE_XFA_FULL, FPDF_DOCUMENT,
6 FPDF_FORMFILLINFO, FPDF_FORMHANDLE,
7};
8use crate::error::PdfiumError;
9use crate::pdf::document::page::field::PdfFormFieldCommon;
10use crate::pdf::document::page::field::PdfFormFieldType;
11use crate::pdf::document::pages::PdfPages;
12use crate::pdfium::PdfiumLibraryBindingsAccessor;
13use std::collections::HashMap;
14use std::marker::PhantomData;
15use std::ops::DerefMut;
16use std::pin::Pin;
17
18#[cfg(doc)]
19use crate::pdf::document::PdfDocument;
20
21/// The internal definition type of a [PdfForm] embedded in a [PdfDocument].
22#[derive(Copy, Clone, Debug, PartialEq)]
23pub enum PdfFormType {
24 // The FORMTYPE_COUNT constant simply specifies the number of form types supported
25 // by Pdfium; we do not need to expose it.
26 None = FORMTYPE_NONE as isize,
27 Acrobat = FORMTYPE_ACRO_FORM as isize,
28 XfaFull = FORMTYPE_XFA_FULL as isize,
29 XfaForeground = FORMTYPE_XFA_FOREGROUND as isize,
30}
31
32impl PdfFormType {
33 #[inline]
34 pub(crate) fn from_pdfium(form_type: u32) -> Result<PdfFormType, PdfiumError> {
35 match form_type {
36 FORMTYPE_NONE => Ok(PdfFormType::None),
37 FORMTYPE_ACRO_FORM => Ok(PdfFormType::Acrobat),
38 FORMTYPE_XFA_FULL => Ok(PdfFormType::XfaFull),
39 FORMTYPE_XFA_FOREGROUND => Ok(PdfFormType::XfaForeground),
40 _ => Err(PdfiumError::UnknownFormType),
41 }
42 }
43
44 #[inline]
45 #[allow(dead_code)]
46 // The as_pdfium() function is not currently used, but we expect it to be in future
47 pub(crate) fn as_pdfium(&self) -> u32 {
48 match self {
49 PdfFormType::None => FORMTYPE_NONE,
50 PdfFormType::Acrobat => FORMTYPE_ACRO_FORM,
51 PdfFormType::XfaFull => FORMTYPE_XFA_FULL,
52 PdfFormType::XfaForeground => FORMTYPE_XFA_FOREGROUND,
53 }
54 }
55}
56
57/// The [PdfForm] embedded inside a [PdfDocument].
58///
59/// Form fields in Pdfium are exposed as page annotations of type `PdfPageAnnotationType::Widget`
60/// or `PdfPageAnnotationType::XfaWidget`, depending on the type of form embedded inside the
61/// document. To retrieve the user-specified form field values, iterate over each annotation
62/// on each page in the document, filtering out annotations that do not contain a valid form field:
63///
64/// ```
65/// for page in document.pages.iter() {
66/// for annotation in page.annotations.iter() {
67/// if let Some(field) = annotation.as_form_field() {
68/// // We can now unwrap the specific type of form field
69/// // and access its properties, including any user-specified value.
70/// }
71/// }
72/// }
73/// ```
74///
75/// Alternatively, use the [PdfForm::field_values()] function to eagerly retrieve the values of all
76/// fields in the document as a map of (field name, field value) pairs.
77pub struct PdfForm<'a> {
78 form_handle: FPDF_FORMHANDLE,
79 document_handle: FPDF_DOCUMENT,
80
81 #[allow(dead_code)]
82 // The form_fill_info field is not currently used, but we expect it to be in future
83 form_fill_info: Pin<Box<FPDF_FORMFILLINFO>>,
84
85 lifetime: PhantomData<&'a FPDF_FORMHANDLE>,
86}
87
88impl<'a> PdfForm<'a> {
89 /// Attempts to bind to an embedded form, if any, inside the document with the given
90 /// document handle.
91 #[inline]
92 pub(crate) fn from_pdfium(document_handle: FPDF_DOCUMENT) -> Option<Self> {
93 // Pdfium does not load form field data or widgets (and therefore will not
94 // render them) until a call has been made to the FPDFDOC_InitFormFillEnvironment()
95 // function. This function takes a large struct, FPDF_FORMFILLINFO, which Pdfium
96 // uses to store a variety of form configuration information, mostly callback functions
97 // that should be invoked when the user interacts with a form field widget.
98 // Since pdfium-render has no concept of interactivity, we can leave all these
99 // set to None.
100
101 // We allocate the FPDF_FORMFILLINFO struct on the heap and pin its pointer location
102 // so Rust will not move it around. Pdfium retains the pointer location
103 // when we call FPDFDOC_InitFormFillEnvironment() and expects the pointer
104 // location to still be valid when we later call FPDFDOC_ExitFormFillEnvironment()
105 // during drop(); if we don't pin the struct's location it may move, and the
106 // call to FPDFDOC_ExitFormFillEnvironment() will segfault.
107
108 let form_fill_info = Box::pin(FPDF_FORMFILLINFO {
109 version: 2,
110 Release: None,
111 FFI_Invalidate: None,
112 FFI_OutputSelectedRect: None,
113 FFI_SetCursor: None,
114 FFI_SetTimer: None,
115 FFI_KillTimer: None,
116 FFI_GetLocalTime: None,
117 FFI_OnChange: None,
118 FFI_GetPage: None,
119 FFI_GetCurrentPage: None,
120 FFI_GetRotation: None,
121 FFI_ExecuteNamedAction: None,
122 FFI_SetTextFieldFocus: None,
123 FFI_DoURIAction: None,
124 FFI_DoGoToAction: None,
125 m_pJsPlatform: std::ptr::null_mut(),
126 xfa_disabled: 0,
127 FFI_DisplayCaret: None,
128 FFI_GetCurrentPageIndex: None,
129 FFI_SetCurrentPage: None,
130 FFI_GotoURL: None,
131 FFI_GetPageViewRect: None,
132 FFI_PageEvent: None,
133 FFI_PopupMenu: None,
134 FFI_OpenFile: None,
135 FFI_EmailTo: None,
136 FFI_UploadTo: None,
137 FFI_GetPlatform: None,
138 FFI_GetLanguage: None,
139 FFI_DownloadFromURL: None,
140 FFI_PostRequestURL: None,
141 FFI_PutRequestURL: None,
142 FFI_OnFocusChange: None,
143 FFI_DoURIActionWithKeyboardModifier: None,
144 });
145
146 let mut form = PdfForm {
147 form_handle: std::ptr::null_mut(),
148 document_handle,
149 form_fill_info,
150 lifetime: PhantomData,
151 };
152
153 let form_handle = unsafe {
154 form.bindings()
155 .FPDFDOC_InitFormFillEnvironment(document_handle, form.form_fill_info.deref_mut())
156 };
157
158 if !form_handle.is_null() {
159 // There is a form embedded in this document, and we retrieved a valid handle to it.
160
161 form.form_handle = form_handle;
162
163 if form.form_type() != PdfFormType::None {
164 // The form is valid.
165
166 Some(form)
167 } else {
168 // The form is valid, but empty. No point returning it.
169
170 None
171 }
172 } else {
173 // There is no form embedded in this document.
174
175 None
176 }
177 }
178
179 /// Returns the internal `FPDF_FORMHANDLE` handle for this [PdfForm].
180 #[inline]
181 pub(crate) fn handle(&self) -> FPDF_FORMHANDLE {
182 self.form_handle
183 }
184
185 /// Returns the [PdfFormType] of this [PdfForm].
186 #[inline]
187 pub fn form_type(&self) -> PdfFormType {
188 PdfFormType::from_pdfium(
189 unsafe { self.bindings().FPDF_GetFormType(self.document_handle) } as u32,
190 )
191 .unwrap()
192 }
193
194 /// Captures a string representation of the value of every form field on every page of
195 /// the given [PdfPages] collection, returning a map of (field name, field value) pairs.
196 ///
197 /// This function assumes that all form fields in the document have unique field names
198 /// except for radio button and checkbox control groups.
199 pub fn field_values(&self, pages: &'a PdfPages<'a>) -> HashMap<String, Option<String>> {
200 let mut result = HashMap::new();
201
202 let field_value_true = Some("true".to_string());
203
204 let field_value_false = Some("false".to_string());
205
206 for page in pages.iter() {
207 for annotation in page.annotations().iter() {
208 if let Some(field) = annotation.as_form_field() {
209 let field_type = field.field_type();
210
211 let field_value = match field_type {
212 PdfFormFieldType::Checkbox => {
213 if field
214 .as_checkbox_field()
215 .unwrap()
216 .is_checked()
217 .unwrap_or(false)
218 {
219 field_value_true.clone()
220 } else {
221 field_value_false.clone()
222 }
223 }
224 PdfFormFieldType::ComboBox => field.as_combo_box_field().unwrap().value(),
225 PdfFormFieldType::ListBox => field.as_list_box_field().unwrap().value(),
226 PdfFormFieldType::RadioButton => {
227 let field = field.as_radio_button_field().unwrap();
228
229 if field.is_checked().unwrap_or(false) {
230 field.group_value()
231 } else {
232 field_value_false.clone()
233 }
234 }
235 PdfFormFieldType::Text => field.as_text_field().unwrap().value(),
236 PdfFormFieldType::PushButton
237 | PdfFormFieldType::Signature
238 | PdfFormFieldType::Unknown => None,
239 };
240
241 // A group of checkbox or radio button controls all share the same name, so
242 // as we iterate over the controls, the value of the group will be updated.
243 // Only the value of the last control in the group will be captured.
244 // This isn't the behaviour we want; we prefer to capture the value of
245 // a checked control in preference to an unchecked control.
246
247 let field_name = field.name().unwrap_or_default();
248
249 if (field_type == PdfFormFieldType::Checkbox
250 || field_type == PdfFormFieldType::RadioButton)
251 && result.contains_key(&field_name)
252 {
253 // Only overwrite an existing entry for this control group if
254 // this field is set.
255
256 if field_value != field_value_false {
257 result.insert(field_name, field_value);
258 }
259 } else {
260 // For all other control types, we assume that field names are unique.
261
262 result.insert(field_name, field_value);
263 }
264 }
265 }
266 }
267
268 result
269 }
270}
271
272impl<'a> Drop for PdfForm<'a> {
273 /// Closes this [PdfForm], releasing held memory.
274 #[inline]
275 fn drop(&mut self) {
276 unsafe {
277 self.bindings()
278 .FPDFDOC_ExitFormFillEnvironment(self.form_handle);
279 }
280 }
281}
282
283impl<'a> PdfiumLibraryBindingsAccessor<'a> for PdfForm<'a> {}
284
285#[cfg(feature = "thread_safe")]
286unsafe impl<'a> Send for PdfForm<'a> {}
287
288#[cfg(feature = "thread_safe")]
289unsafe impl<'a> Sync for PdfForm<'a> {}