Skip to main content

pdf_form/
lib.rs

1#[macro_use]
2extern crate bitflags;
3#[macro_use]
4extern crate derive_error;
5
6mod utils;
7
8use std::collections::VecDeque;
9use std::io;
10use std::io::Write;
11use std::path::Path;
12use std::str;
13
14use bitflags::_core::str::from_utf8;
15
16use lopdf::content::{Content, Operation};
17use lopdf::{Document, Object, ObjectId, StringFormat};
18
19use crate::utils::*;
20
21/// A PDF Form that contains fillable fields
22///
23/// Use this struct to load an existing PDF with a fillable form using the `load` method.  It will
24/// analyze the PDF and identify the fields. Then you can get and set the content of the fields by
25/// index.
26pub struct Form {
27    doc: Document,
28    form_ids: Vec<ObjectId>,
29}
30
31/// The possible types of fillable form fields in a PDF
32#[derive(Debug)]
33pub enum FieldType {
34    Button,
35    Radio,
36    CheckBox,
37    ListBox,
38    ComboBox,
39    Text,
40    Unknown,
41}
42
43#[derive(Debug, Error)]
44/// Errors that may occur while loading a PDF
45pub enum LoadError {
46    /// An Lopdf Error
47    LopdfError(lopdf::Error),
48    /// The reference `ObjectId` did not point to any values
49    #[error(non_std, no_from)]
50    NoSuchReference(ObjectId),
51    /// An element that was expected to be a reference was not a reference
52    NotAReference,
53}
54
55/// Errors That may occur while setting values in a form
56#[derive(Debug, Error)]
57pub enum ValueError {
58    /// The method used to set the state is incompatible with the type of the field
59    TypeMismatch,
60    /// One or more selected values are not valid choices
61    InvalidSelection,
62    /// Multiple values were selected when only one was allowed
63    TooManySelected,
64    /// Readonly field cannot be edited
65    Readonly,
66}
67/// The current state of a form field
68#[derive(Debug)]
69pub enum FieldState {
70    /// Push buttons have no state
71    Button,
72    /// `selected` is the singular option from `options` that is selected
73    Radio {
74        selected: String,
75        options: Vec<String>,
76        readonly: bool,
77        required: bool,
78    },
79    /// The toggle state of the checkbox
80    CheckBox {
81        is_checked: bool,
82        readonly: bool,
83        required: bool,
84    },
85    /// `selected` is the list of selected options from `options`
86    ListBox {
87        selected: Vec<String>,
88        options: Vec<String>,
89        multiselect: bool,
90        readonly: bool,
91        required: bool,
92    },
93    /// `selected` is the list of selected options from `options`
94    ComboBox {
95        selected: Vec<String>,
96        options: Vec<String>,
97        editable: bool,
98        readonly: bool,
99        required: bool,
100    },
101    /// User Text Input
102    Text {
103        text: String,
104        readonly: bool,
105        required: bool,
106    },
107    /// Unknown fields have no state
108    Unknown,
109}
110
111trait PdfObjectDeref {
112    fn deref<'a>(&self, doc: &'a Document) -> Result<&'a Object, LoadError>;
113}
114
115impl PdfObjectDeref for Object {
116    fn deref<'a>(&self, doc: &'a Document) -> Result<&'a Object, LoadError> {
117        match *self {
118            Object::Reference(oid) => doc.objects.get(&oid).ok_or(LoadError::NoSuchReference(oid)),
119            _ => Err(LoadError::NotAReference),
120        }
121    }
122}
123
124impl Form {
125    /// Takes a reader containing a PDF with a fillable form, analyzes the content, and attempts to
126    /// identify all of the fields the form has.
127    pub fn load_from<R: io::Read>(reader: R) -> Result<Self, LoadError> {
128        let doc = Document::load_from(reader)?;
129        Self::load_doc(doc)
130    }
131
132    /// Takes a path to a PDF with a fillable form, analyzes the file, and attempts to identify all
133    /// of the fields the form has.
134    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, LoadError> {
135        let doc = Document::load(path)?;
136        Self::load_doc(doc)
137    }
138
139    fn load_doc(mut doc: Document) -> Result<Self, LoadError> {
140        let mut form_ids = Vec::new();
141        let mut queue = VecDeque::new();
142        // Block so borrow of doc ends before doc is moved into the result
143        {
144            doc.decompress();
145
146            let acroform = doc
147                .objects
148                .get_mut(
149                    &doc.trailer
150                        .get(b"Root")?
151                        .deref(&doc)?
152                        .as_dict()?
153                        .get(b"AcroForm")?
154                        .as_reference()?,
155                )
156                .ok_or(LoadError::NotAReference)?
157                .as_dict_mut()?;
158
159            let fields_list = acroform.get(b"Fields")?.as_array()?;
160            queue.append(&mut VecDeque::from(fields_list.clone()));
161
162            // Iterate over the fields
163            while let Some(objref) = queue.pop_front() {
164                let obj = objref.deref(&doc)?;
165                if let Object::Dictionary(ref dict) = *obj {
166                    // If the field has FT, it actually takes input.  Save this
167                    if dict.get(b"FT").is_ok() {
168                        form_ids.push(objref.as_reference().unwrap());
169                    }
170
171                    // If this field has kids, they might have FT, so add them to the queue
172                    if let Ok(&Object::Array(ref kids)) = dict.get(b"Kids") {
173                        queue.append(&mut VecDeque::from(kids.clone()));
174                    }
175                }
176            }
177        }
178        Ok(Form { doc, form_ids })
179    }
180
181    /// Returns the number of fields the form has
182    pub fn len(&self) -> usize {
183        self.form_ids.len()
184    }
185
186    /// Returns true if empty
187    pub fn is_empty(&self) -> bool {
188        self.len() == 0
189    }
190
191    /// Gets the type of field of the given index
192    ///
193    /// # Panics
194    /// This function will panic if the index is greater than the number of fields
195    pub fn get_type(&self, n: usize) -> FieldType {
196        // unwraps should be fine because load should have verified everything exists
197        let field = self
198            .doc
199            .objects
200            .get(&self.form_ids[n])
201            .unwrap()
202            .as_dict()
203            .unwrap();
204
205        let type_str = field.get(b"FT").unwrap().as_name_str().unwrap();
206        if type_str == "Btn" {
207            let flags = ButtonFlags::from_bits_truncate(get_field_flags(field));
208            if flags.intersects(ButtonFlags::RADIO | ButtonFlags::NO_TOGGLE_TO_OFF) {
209                FieldType::Radio
210            } else if flags.intersects(ButtonFlags::PUSHBUTTON) {
211                FieldType::Button
212            } else {
213                FieldType::CheckBox
214            }
215        } else if type_str == "Ch" {
216            let flags = ChoiceFlags::from_bits_truncate(get_field_flags(field));
217            if flags.intersects(ChoiceFlags::COBMO) {
218                FieldType::ComboBox
219            } else {
220                FieldType::ListBox
221            }
222        } else if type_str == "Tx" {
223            FieldType::Text
224        } else {
225            FieldType::Unknown
226        }
227    }
228
229    /// Gets the name of field of the given index
230    ///
231    /// # Panics
232    /// This function will panic if the index is greater than the number of fields
233    pub fn get_name(&self, n: usize) -> Option<String> {
234        // unwraps should be fine because load should have verified everything exists
235        let field = self
236            .doc
237            .objects
238            .get(&self.form_ids[n])
239            .unwrap()
240            .as_dict()
241            .unwrap();
242
243        // The "T" key refers to the name of the field
244        match field.get(b"T") {
245            Ok(Object::String(data, _)) => String::from_utf8(data.clone()).ok(),
246            _ => None,
247        }
248    }
249
250    /// Gets the types of all of the fields in the form
251    pub fn get_all_types(&self) -> Vec<FieldType> {
252        let mut res = Vec::with_capacity(self.len());
253        for i in 0..self.len() {
254            res.push(self.get_type(i))
255        }
256        res
257    }
258
259    /// Gets the names of all of the fields in the form
260    pub fn get_all_names(&self) -> Vec<Option<String>> {
261        let mut res = Vec::with_capacity(self.len());
262        for i in 0..self.len() {
263            res.push(self.get_name(i))
264        }
265        res
266    }
267
268    /// Gets the state of field of the given index
269    ///
270    /// # Panics
271    /// This function will panic if the index is greater than the number of fields
272    pub fn get_state(&self, n: usize) -> FieldState {
273        let field = self
274            .doc
275            .objects
276            .get(&self.form_ids[n])
277            .unwrap()
278            .as_dict()
279            .unwrap();
280        match self.get_type(n) {
281            FieldType::Button => FieldState::Button,
282            FieldType::Radio => FieldState::Radio {
283                selected: match field.get(b"V") {
284                    Ok(name) => name.as_name_str().unwrap().to_owned(),
285                    _ => match field.get(b"AS") {
286                        Ok(name) => name.as_name_str().unwrap().to_owned(),
287                        _ => "".to_owned(),
288                    },
289                },
290                options: self.get_possibilities(self.form_ids[n]),
291                readonly: is_read_only(field),
292                required: is_required(field),
293            },
294            FieldType::CheckBox => FieldState::CheckBox {
295                is_checked: match field.get(b"V") {
296                    Ok(name) => name.as_name_str().unwrap() == "Yes",
297                    _ => match field.get(b"AS") {
298                        Ok(name) => name.as_name_str().unwrap() == "Yes",
299                        _ => false,
300                    },
301                },
302                readonly: is_read_only(field),
303                required: is_required(field),
304            },
305            FieldType::ListBox => FieldState::ListBox {
306                // V field in a list box can be either text for one option, an array for many
307                // options, or null
308                selected: match field.get(b"V") {
309                    Ok(selection) => match *selection {
310                        Object::String(ref s, StringFormat::Literal) => {
311                            vec![str::from_utf8(&s).unwrap().to_owned()]
312                        }
313                        Object::Array(ref chosen) => {
314                            let mut res = Vec::new();
315                            for obj in chosen {
316                                if let Object::String(ref s, StringFormat::Literal) = *obj {
317                                    res.push(str::from_utf8(&s).unwrap().to_owned());
318                                }
319                            }
320                            res
321                        }
322                        _ => Vec::new(),
323                    },
324                    _ => Vec::new(),
325                },
326                // The options is an array of either text elements or arrays where the second
327                // element is what we want
328                options: match field.get(b"Opt") {
329                    Ok(&Object::Array(ref options)) => options
330                        .iter()
331                        .map(|x| match *x {
332                            Object::String(ref s, StringFormat::Literal) => {
333                                str::from_utf8(&s).unwrap().to_owned()
334                            }
335                            Object::Array(ref arr) => {
336                                if let Object::String(ref s, StringFormat::Literal) = &arr[1] {
337                                    str::from_utf8(&s).unwrap().to_owned()
338                                } else {
339                                    String::new()
340                                }
341                            }
342                            _ => String::new(),
343                        })
344                        .filter(|x| !x.is_empty())
345                        .collect(),
346                    _ => Vec::new(),
347                },
348                multiselect: {
349                    let flags = ChoiceFlags::from_bits_truncate(get_field_flags(field));
350                    flags.intersects(ChoiceFlags::MULTISELECT)
351                },
352                readonly: is_read_only(field),
353                required: is_required(field),
354            },
355            FieldType::ComboBox => FieldState::ComboBox {
356                // V field in a list box can be either text for one option, an array for many
357                // options, or null
358                selected: match field.get(b"V") {
359                    Ok(selection) => match *selection {
360                        Object::String(ref s, StringFormat::Literal) => {
361                            vec![str::from_utf8(&s).unwrap().to_owned()]
362                        }
363                        Object::Array(ref chosen) => {
364                            let mut res = Vec::new();
365                            for obj in chosen {
366                                if let Object::String(ref s, StringFormat::Literal) = *obj {
367                                    res.push(str::from_utf8(&s).unwrap().to_owned());
368                                }
369                            }
370                            res
371                        }
372                        _ => Vec::new(),
373                    },
374                    _ => Vec::new(),
375                },
376                // The options is an array of either text elements or arrays where the second
377                // element is what we want
378                options: match field.get(b"Opt") {
379                    Ok(&Object::Array(ref options)) => options
380                        .iter()
381                        .map(|x| match *x {
382                            Object::String(ref s, StringFormat::Literal) => {
383                                str::from_utf8(&s).unwrap().to_owned()
384                            }
385                            Object::Array(ref arr) => {
386                                if let Object::String(ref s, StringFormat::Literal) = &arr[1] {
387                                    str::from_utf8(&s).unwrap().to_owned()
388                                } else {
389                                    String::new()
390                                }
391                            }
392                            _ => String::new(),
393                        })
394                        .filter(|x| !x.is_empty())
395                        .collect(),
396                    _ => Vec::new(),
397                },
398                editable: {
399                    let flags = ChoiceFlags::from_bits_truncate(get_field_flags(field));
400
401                    flags.intersects(ChoiceFlags::EDIT)
402                },
403                readonly: is_read_only(field),
404                required: is_required(field),
405            },
406            FieldType::Text => FieldState::Text {
407                text: match field.get(b"V") {
408                    Ok(&Object::String(ref s, StringFormat::Literal)) => {
409                        str::from_utf8(&s.clone()).unwrap().to_owned()
410                    }
411                    _ => "".to_owned(),
412                },
413                readonly: is_read_only(field),
414                required: is_required(field),
415            },
416            FieldType::Unknown => FieldState::Unknown,
417        }
418    }
419
420    /// If the field at index `n` is a text field, fills in that field with the text `s`.
421    /// If it is not a text field, returns ValueError
422    ///
423    /// # Panics
424    /// Will panic if n is larger than the number of fields
425    pub fn set_text(&mut self, n: usize, s: String) -> Result<(), ValueError> {
426        match self.get_state(n) {
427            FieldState::Text { .. } => {
428                let field = self
429                    .doc
430                    .objects
431                    .get_mut(&self.form_ids[n])
432                    .unwrap()
433                    .as_dict_mut()
434                    .unwrap();
435
436                field.set("V", Object::string_literal(s.into_bytes()));
437
438                // Regenerate text appearance confoming the new text but ignore the result
439                let _ = self.regenerate_text_appearance(n);
440
441                Ok(())
442            }
443            _ => Err(ValueError::TypeMismatch),
444        }
445    }
446
447    /// Regenerates the appearance for the field at index `n` due to an alteration of the
448    /// original TextField value, the AP will be updated accordingly.
449    ///
450    /// # Incomplete
451    /// This function is not exhaustive as not parse the original TextField orientation
452    /// or the text alignment and other kind of enrichments, also doesn't discover for
453    /// the global document DA.
454    ///
455    /// A more sophisticated parser is needed here
456    fn regenerate_text_appearance(&mut self, n: usize) -> Result<(), lopdf::Error> {
457        let field = {
458            self.doc
459                .objects
460                .get(&self.form_ids[n])
461                .unwrap()
462                .as_dict()
463                .unwrap()
464        };
465
466        // The value of the object (should be a string)
467        let value = field.get(b"V")?.to_owned();
468
469        // The default appearance of the object (should be a string)
470        let da = field.get(b"DA")?.to_owned();
471
472        // The default appearance of the object (should be a string)
473        let rect = field
474            .get(b"Rect")?
475            .as_array()?
476            .iter()
477            .map(|object| {
478                object
479                    .as_f64()
480                    .unwrap_or(object.as_i64().unwrap_or(0) as f64) as f32
481            })
482            .collect::<Vec<_>>();
483
484        // Gets the object stream
485        let object_id = field.get(b"AP")?.as_dict()?.get(b"N")?.as_reference()?;
486        let stream = self.doc.get_object_mut(object_id)?.as_stream_mut()?;
487
488        // Decode and get the content, even if is compressed
489        let mut content = {
490            if let Ok(content) = stream.decompressed_content() {
491                Content::decode(&content)?
492            } else {
493                Content::decode(&stream.content)?
494            }
495        };
496
497        // Ignored operators
498        let ignored_operators = vec![
499            "bt", "tc", "tw", "tz", "g", "tm", "tr", "tf", "tj", "et", "q", "bmc", "emc",
500        ];
501
502        // Remove these ignored operators as we have to generate the text and fonts again
503        content.operations.retain(|operation| {
504            !ignored_operators.contains(&operation.operator.to_lowercase().as_str())
505        });
506
507        // Let's construct the text widget
508        content.operations.append(&mut vec![
509            Operation::new("BMC", vec!["Tx".into()]),
510            Operation::new("q", vec![]),
511            Operation::new("BT", vec![]),
512        ]);
513
514        let font = parse_font(match da {
515            Object::String(ref bytes, _) => Some(from_utf8(bytes)?),
516            _ => None,
517        });
518
519        // Define some helping font variables
520        let font_name = (font.0).0;
521        let font_size = (font.0).1;
522        let font_color = font.1;
523
524        // Set the font type and size and color
525        content.operations.append(&mut vec![
526            Operation::new("Tf", vec![font_name.into(), font_size.into()]),
527            Operation::new(
528                font_color.0,
529                match font_color.0 {
530                    "k" => vec![
531                        font_color.1.into(),
532                        font_color.2.into(),
533                        font_color.3.into(),
534                        font_color.4.into(),
535                    ],
536                    "rg" => vec![
537                        font_color.1.into(),
538                        font_color.2.into(),
539                        font_color.3.into(),
540                    ],
541                    _ => vec![font_color.1.into()],
542                },
543            ),
544        ]);
545
546        // Calcolate the text offset
547        let x = 2.0; // Suppose this fixed offset as we should have known the border here
548
549        // Formula picked up from Poppler
550        let dy = rect[1] - rect[3];
551        let y = if dy > 0.0 {
552            0.5 * dy - 0.4 * font_size as f32
553        } else {
554            0.5 * font_size as f32
555        };
556
557        // Set the text bounds, first are fixed at "1 0 0 1" and then the calculated x,y
558        content.operations.append(&mut vec![Operation::new(
559            "Tm",
560            vec![1.into(), 0.into(), 0.into(), 1.into(), x.into(), y.into()],
561        )]);
562
563        // Set the text value and some finalizing operations
564        content.operations.append(&mut vec![
565            Operation::new("Tj", vec![value]),
566            Operation::new("ET", vec![]),
567            Operation::new("Q", vec![]),
568            Operation::new("EMC", vec![]),
569        ]);
570
571        // Set the new content to the original stream and compress it
572        if let Ok(encoded_content) = content.encode() {
573            stream.set_plain_content(encoded_content);
574            let _ = stream.compress();
575        }
576
577        Ok(())
578    }
579
580    /// If the field at index `n` is a checkbox field, toggles the check box based on the value
581    /// `is_checked`.
582    /// If it is not a checkbox field, returns ValueError
583    ///
584    /// # Panics
585    /// Will panic if n is larger than the number of fields
586    pub fn set_check_box(&mut self, n: usize, is_checked: bool) -> Result<(), ValueError> {
587        match self.get_state(n) {
588            FieldState::CheckBox { .. } => {
589                let field = self
590                    .doc
591                    .objects
592                    .get_mut(&self.form_ids[n])
593                    .unwrap()
594                    .as_dict_mut()
595                    .unwrap();
596
597                let on = get_on_value(field);
598                let state = Object::Name(
599                    if is_checked { on.as_str() } else { "Off" }
600                        .to_owned()
601                        .into_bytes(),
602                );
603
604                field.set("V", state.clone());
605                field.set("AS", state);
606
607                Ok(())
608            }
609            _ => Err(ValueError::TypeMismatch),
610        }
611    }
612
613    /// If the field at index `n` is a radio field, toggles the radio button based on the value
614    /// `choice`
615    /// If it is not a radio button field or the choice is not a valid option, returns ValueError
616    ///
617    /// # Panics
618    /// Will panic if n is larger than the number of fields
619    pub fn set_radio(&mut self, n: usize, choice: String) -> Result<(), ValueError> {
620        match self.get_state(n) {
621            FieldState::Radio { options, .. } => {
622                if options.contains(&choice) {
623                    let field = self
624                        .doc
625                        .objects
626                        .get_mut(&self.form_ids[n])
627                        .unwrap()
628                        .as_dict_mut()
629                        .unwrap();
630                    field.set("V", Object::Name(choice.into_bytes()));
631                    Ok(())
632                } else {
633                    Err(ValueError::InvalidSelection)
634                }
635            }
636            _ => Err(ValueError::TypeMismatch),
637        }
638    }
639
640    /// If the field at index `n` is a listbox field, selects the options in `choice`
641    /// If it is not a listbox field or one of the choices is not a valid option, or if too many choices are selected, returns ValueError
642    ///
643    /// # Panics
644    /// Will panic if n is larger than the number of fields
645    pub fn set_list_box(&mut self, n: usize, choices: Vec<String>) -> Result<(), ValueError> {
646        match self.get_state(n) {
647            FieldState::ListBox {
648                options,
649                multiselect,
650                ..
651            } => {
652                if choices.iter().fold(true, |a, h| options.contains(h) && a) {
653                    if !multiselect && choices.len() > 1 {
654                        Err(ValueError::TooManySelected)
655                    } else {
656                        let field = self
657                            .doc
658                            .objects
659                            .get_mut(&self.form_ids[n])
660                            .unwrap()
661                            .as_dict_mut()
662                            .unwrap();
663                        match choices.len() {
664                            0 => field.set("V", Object::Null),
665                            1 => field.set(
666                                "V",
667                                Object::String(
668                                    choices[0].clone().into_bytes(),
669                                    StringFormat::Literal,
670                                ),
671                            ),
672                            _ => field.set(
673                                "V",
674                                Object::Array(
675                                    choices
676                                        .iter()
677                                        .map(|x| {
678                                            Object::String(
679                                                x.clone().into_bytes(),
680                                                StringFormat::Literal,
681                                            )
682                                        })
683                                        .collect(),
684                                ),
685                            ),
686                        };
687                        Ok(())
688                    }
689                } else {
690                    Err(ValueError::InvalidSelection)
691                }
692            }
693            _ => Err(ValueError::TypeMismatch),
694        }
695    }
696
697    /// If the field at index `n` is a combobox field, selects the options in `choice`
698    /// If it is not a combobox field or one of the choices is not a valid option, or if too many choices are selected, returns ValueError
699    ///
700    /// # Panics
701    /// Will panic if n is larger than the number of fields
702    pub fn set_combo_box(&mut self, n: usize, choice: String) -> Result<(), ValueError> {
703        match self.get_state(n) {
704            FieldState::ComboBox {
705                options, editable, ..
706            } => {
707                if options.contains(&choice) || editable {
708                    let field = self
709                        .doc
710                        .objects
711                        .get_mut(&self.form_ids[n])
712                        .unwrap()
713                        .as_dict_mut()
714                        .unwrap();
715                    field.set(
716                        "V",
717                        Object::String(choice.into_bytes(), StringFormat::Literal),
718                    );
719                    Ok(())
720                } else {
721                    Err(ValueError::InvalidSelection)
722                }
723            }
724            _ => Err(ValueError::TypeMismatch),
725        }
726    }
727
728    /// Saves the form to the specified path
729    pub fn save<P: AsRef<Path>>(&mut self, path: P) -> Result<(), io::Error> {
730        self.doc.save(path).map(|_| ())
731    }
732
733    /// Saves the form to the specified path
734    pub fn save_to<W: Write>(&mut self, target: &mut W) -> Result<(), io::Error> {
735        self.doc.save_to(target)
736    }
737
738    fn get_possibilities(&self, oid: ObjectId) -> Vec<String> {
739        let mut res = Vec::new();
740        let kids_obj = self
741            .doc
742            .objects
743            .get(&oid)
744            .unwrap()
745            .as_dict()
746            .unwrap()
747            .get(b"Kids");
748        if let Ok(&Object::Array(ref kids)) = kids_obj {
749            for (i, kid) in kids.iter().enumerate() {
750                let mut found = false;
751                if let Ok(&Object::Dictionary(ref appearance_states)) =
752                    kid.deref(&self.doc).unwrap().as_dict().unwrap().get(b"AP")
753                {
754                    if let Ok(&Object::Dictionary(ref normal_appearance)) =
755                        appearance_states.get(b"N")
756                    {
757                        for (key, _) in normal_appearance {
758                            if key != b"Off" {
759                                res.push(from_utf8(key).unwrap_or("").to_owned());
760                                found = true;
761                                break;
762                            }
763                        }
764                    }
765                }
766
767                if !found {
768                    res.push(i.to_string());
769                }
770            }
771        }
772
773        res
774    }
775}