pdfium_render/pdf/document/
form.rs

1//! Defines the [PdfForm] struct, exposing functionality related to a form
2//! embedded in a `PdfDocument`.
3
4use crate::bindgen::{
5    FORMTYPE_ACRO_FORM, FORMTYPE_NONE, FORMTYPE_XFA_FOREGROUND, FORMTYPE_XFA_FULL, FPDF_DOCUMENT,
6    FPDF_FORMFILLINFO, FPDF_FORMHANDLE,
7};
8use crate::bindings::PdfiumLibraryBindings;
9use crate::error::PdfiumError;
10use crate::pdf::document::page::field::PdfFormFieldCommon;
11use crate::pdf::document::page::field::PdfFormFieldType;
12use crate::pdf::document::pages::PdfPages;
13use std::collections::HashMap;
14use std::ops::DerefMut;
15use std::pin::Pin;
16use std::ptr::null_mut;
17
18#[cfg(doc)]
19use crate::pdf::document::PdfDocument;
20
21/// The internal definition type of a [PdfForm] embedded in a [PdfDocument].
22#[derive(Copy, Clone, Debug, PartialEq)]
23pub enum PdfFormType {
24    // The FORMTYPE_COUNT constant simply specifies the number of form types supported
25    // by Pdfium; we do not need to expose it.
26    None = FORMTYPE_NONE as isize,
27    Acrobat = FORMTYPE_ACRO_FORM as isize,
28    XfaFull = FORMTYPE_XFA_FULL as isize,
29    XfaForeground = FORMTYPE_XFA_FOREGROUND as isize,
30}
31
32impl PdfFormType {
33    #[inline]
34    pub(crate) fn from_pdfium(form_type: u32) -> Result<PdfFormType, PdfiumError> {
35        match form_type {
36            FORMTYPE_NONE => Ok(PdfFormType::None),
37            FORMTYPE_ACRO_FORM => Ok(PdfFormType::Acrobat),
38            FORMTYPE_XFA_FULL => Ok(PdfFormType::XfaFull),
39            FORMTYPE_XFA_FOREGROUND => Ok(PdfFormType::XfaForeground),
40            _ => Err(PdfiumError::UnknownFormType),
41        }
42    }
43
44    #[inline]
45    #[allow(dead_code)]
46    // The as_pdfium() function is not currently used, but we expect it to be in future
47    pub(crate) fn as_pdfium(&self) -> u32 {
48        match self {
49            PdfFormType::None => FORMTYPE_NONE,
50            PdfFormType::Acrobat => FORMTYPE_ACRO_FORM,
51            PdfFormType::XfaFull => FORMTYPE_XFA_FULL,
52            PdfFormType::XfaForeground => FORMTYPE_XFA_FOREGROUND,
53        }
54    }
55}
56
57/// The [PdfForm] embedded inside a [PdfDocument].
58///
59/// Form fields in Pdfium are exposed as page annotations of type `PdfPageAnnotationType::Widget`
60/// or `PdfPageAnnotationType::XfaWidget`, depending on the type of form embedded inside the
61/// document. To retrieve the user-specified form field values, iterate over each annotation
62/// on each page in the document, filtering out annotations that do not contain a valid form field:
63///
64/// ```
65/// for page in document.pages.iter() {
66///     for annotation in page.annotations.iter() {
67///         if let Some(field) = annotation.as_form_field() {
68///             // We can now unwrap the specific type of form field
69///             // and access its properties, including any user-specified value.
70///         }
71///     }
72/// }
73/// ```
74///
75/// Alternatively, use the [PdfForm::field_values()] function to eagerly retrieve the values of all
76/// fields in the document as a map of (field name, field value) pairs.
77pub struct PdfForm<'a> {
78    form_handle: FPDF_FORMHANDLE,
79    document_handle: FPDF_DOCUMENT,
80
81    #[allow(dead_code)]
82    // The form_fill_info field is not currently used, but we expect it to be in future
83    form_fill_info: Pin<Box<FPDF_FORMFILLINFO>>,
84    bindings: &'a dyn PdfiumLibraryBindings,
85}
86
87impl<'a> PdfForm<'a> {
88    /// Attempts to bind to an embedded form, if any, inside the document with the given
89    /// document handle.
90    #[inline]
91    pub(crate) fn from_pdfium(
92        document_handle: FPDF_DOCUMENT,
93        bindings: &'a dyn PdfiumLibraryBindings,
94    ) -> Option<Self> {
95        // Pdfium does not load form field data or widgets (and therefore will not
96        // render them) until a call has been made to the
97        // FPDFDOC_InitFormFillEnvironment() function. This function takes a large
98        // struct, FPDF_FORMFILLINFO, which Pdfium uses to store a variety of form
99        // configuration information - mostly callback functions that should be called
100        // when the user interacts with a form field widget. Since pdfium-render has
101        // no concept of interactivity, we can leave all these set to None.
102
103        // We allocate the FPDF_FORMFILLINFO struct on the heap and pin its pointer location
104        // so Rust will not move it around. Pdfium retains the pointer location
105        // when we call FPDFDOC_InitFormFillEnvironment() and expects the pointer
106        // location to still be valid when we later call FPDFDOC_ExitFormFillEnvironment()
107        // during drop(); if we don't pin the struct's location it may move, and the
108        // call to FPDFDOC_ExitFormFillEnvironment() will segfault.
109
110        let mut form_fill_info = Box::pin(FPDF_FORMFILLINFO {
111            version: 2,
112            Release: None,
113            FFI_Invalidate: None,
114            FFI_OutputSelectedRect: None,
115            FFI_SetCursor: None,
116            FFI_SetTimer: None,
117            FFI_KillTimer: None,
118            FFI_GetLocalTime: None,
119            FFI_OnChange: None,
120            FFI_GetPage: None,
121            FFI_GetCurrentPage: None,
122            FFI_GetRotation: None,
123            FFI_ExecuteNamedAction: None,
124            FFI_SetTextFieldFocus: None,
125            FFI_DoURIAction: None,
126            FFI_DoGoToAction: None,
127            m_pJsPlatform: null_mut(),
128            xfa_disabled: 0,
129            FFI_DisplayCaret: None,
130            FFI_GetCurrentPageIndex: None,
131            FFI_SetCurrentPage: None,
132            FFI_GotoURL: None,
133            FFI_GetPageViewRect: None,
134            FFI_PageEvent: None,
135            FFI_PopupMenu: None,
136            FFI_OpenFile: None,
137            FFI_EmailTo: None,
138            FFI_UploadTo: None,
139            FFI_GetPlatform: None,
140            FFI_GetLanguage: None,
141            FFI_DownloadFromURL: None,
142            FFI_PostRequestURL: None,
143            FFI_PutRequestURL: None,
144            FFI_OnFocusChange: None,
145            FFI_DoURIActionWithKeyboardModifier: None,
146        });
147
148        let form_handle =
149            bindings.FPDFDOC_InitFormFillEnvironment(document_handle, form_fill_info.deref_mut());
150
151        if !form_handle.is_null() {
152            // There is a form embedded in this document, and we retrieved a valid handle to it.
153
154            let form = PdfForm {
155                form_handle,
156                document_handle,
157                form_fill_info,
158                bindings,
159            };
160
161            if form.form_type() != PdfFormType::None {
162                // The form is valid.
163
164                Some(form)
165            } else {
166                // The form is valid, but empty. No point returning it.
167
168                None
169            }
170        } else {
171            // There is no form embedded in this document.
172
173            None
174        }
175    }
176
177    /// Returns the internal `FPDF_FORMHANDLE` handle for this [PdfForm].
178    #[inline]
179    pub(crate) fn handle(&self) -> FPDF_FORMHANDLE {
180        self.form_handle
181    }
182
183    /// Returns the [PdfiumLibraryBindings] used by this [PdfForm].
184    #[inline]
185    pub fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
186        self.bindings
187    }
188
189    /// Returns the [PdfFormType] of this [PdfForm].
190    #[inline]
191    pub fn form_type(&self) -> PdfFormType {
192        PdfFormType::from_pdfium(self.bindings.FPDF_GetFormType(self.document_handle) as u32)
193            .unwrap()
194    }
195
196    /// Captures a string representation of the value of every form field on every page of
197    /// the given [PdfPages] collection, returning a map of (field name, field value) pairs.
198    ///
199    /// This function assumes that all form fields in the document have unique field names
200    /// except for radio button and checkbox control groups.
201    pub fn field_values(&self, pages: &'a PdfPages<'a>) -> HashMap<String, Option<String>> {
202        let mut result = HashMap::new();
203
204        let field_value_true = Some("true".to_string());
205
206        let field_value_false = Some("false".to_string());
207
208        for page in pages.iter() {
209            for annotation in page.annotations().iter() {
210                if let Some(field) = annotation.as_form_field() {
211                    let field_type = field.field_type();
212
213                    let field_value = match field_type {
214                        PdfFormFieldType::Checkbox => {
215                            if field
216                                .as_checkbox_field()
217                                .unwrap()
218                                .is_checked()
219                                .unwrap_or(false)
220                            {
221                                field_value_true.clone()
222                            } else {
223                                field_value_false.clone()
224                            }
225                        }
226                        PdfFormFieldType::ComboBox => field.as_combo_box_field().unwrap().value(),
227                        PdfFormFieldType::ListBox => field.as_list_box_field().unwrap().value(),
228                        PdfFormFieldType::RadioButton => {
229                            let field = field.as_radio_button_field().unwrap();
230
231                            if field.is_checked().unwrap_or(false) {
232                                field.group_value()
233                            } else {
234                                field_value_false.clone()
235                            }
236                        }
237                        PdfFormFieldType::Text => field.as_text_field().unwrap().value(),
238                        PdfFormFieldType::PushButton
239                        | PdfFormFieldType::Signature
240                        | PdfFormFieldType::Unknown => None,
241                    };
242
243                    // A group of checkbox or radio button controls all share the same name, so
244                    // as we iterate over the controls, the value of the group will be updated.
245                    // Only the value of the last control in the group will be captured.
246                    // This isn't the behaviour we want; we prefer to capture the value of
247                    // a checked control in preference to an unchecked control.
248
249                    let field_name = field.name().unwrap_or_default();
250
251                    if (field_type == PdfFormFieldType::Checkbox
252                        || field_type == PdfFormFieldType::RadioButton)
253                        && result.contains_key(&field_name)
254                    {
255                        // Only overwrite an existing entry for this control group if
256                        // this field is set.
257
258                        if field_value != field_value_false {
259                            result.insert(field_name, field_value);
260                        }
261                    } else {
262                        // For all other control types, we assume that field names are unique.
263
264                        result.insert(field_name, field_value);
265                    }
266                }
267            }
268        }
269
270        result
271    }
272}
273
274impl<'a> Drop for PdfForm<'a> {
275    /// Closes this [PdfForm], releasing held memory.
276    #[inline]
277    fn drop(&mut self) {
278        self.bindings
279            .FPDFDOC_ExitFormFillEnvironment(self.form_handle);
280    }
281}