Skip to main content

parse_acroform

Function parse_acroform 

Source
pub fn parse_acroform(pdf: &Pdf) -> Option<FieldTree>
Expand description

Parse the AcroForm dictionary from a PDF document and build a field tree.

Examples found in repository?
examples/corpus_gate.rs (line 57)
31fn main() {
32    let mut args = std::env::args().skip(1);
33    let input = args
34        .next()
35        .expect("usage: corpus_gate <input.pdf> [out.pdf]");
36    let output = args.next();
37
38    let bytes = std::fs::read(&input).expect("read input");
39    let doc_name = std::path::Path::new(&input)
40        .parent()
41        .and_then(|p| p.file_name())
42        .map(|s| s.to_string_lossy().into_owned())
43        .unwrap_or_else(|| input.clone());
44
45    // --- Model side (pdf-syntax parse) ---
46    let pdf = match pdf_syntax::Pdf::new(std::sync::Arc::new(bytes.clone())) {
47        Ok(p) => p,
48        Err(e) => {
49            println!(
50                "{{\"doc\":{},\"open_fail\":{}}}",
51                js(&doc_name),
52                js(&format!("{e:?}"))
53            );
54            std::process::exit(1);
55        }
56    };
57    let model = parse_acroform(&pdf)
58        .map(|t| build_form_model(&t))
59        .unwrap_or_default();
60
61    let mut n_text = 0;
62    let mut n_multiline = 0;
63    let mut n_comb = 0;
64    let mut n_password = 0;
65    let mut n_checkbox = 0;
66    let mut n_radio = 0;
67    let mut n_combo = 0;
68    let mut n_listbox = 0;
69    let mut n_push = 0;
70    let mut n_sig = 0;
71    let mut n_maxlen = 0;
72    let mut n_readonly = 0;
73    let mut n_required = 0;
74    let mut n_nonascii_onstate = 0;
75    let mut n_widgets = 0;
76    let mut first_text: Option<String> = None;
77    let mut first_checkbox: Option<String> = None;
78    let mut first_radio: Option<(String, String)> = None;
79
80    for f in &model {
81        n_widgets += f.widgets.len();
82        if f.max_len.is_some() {
83            n_maxlen += 1;
84        }
85        if f.read_only {
86            n_readonly += 1;
87        }
88        if f.required {
89            n_required += 1;
90        }
91        match &f.kind {
92            FormFieldKind::Text {
93                multiline,
94                comb,
95                password,
96            } => {
97                n_text += 1;
98                if *multiline {
99                    n_multiline += 1;
100                }
101                if *comb {
102                    n_comb += 1;
103                }
104                if *password {
105                    n_password += 1;
106                }
107                if first_text.is_none() && !f.read_only {
108                    first_text = Some(f.name.clone());
109                }
110            }
111            FormFieldKind::Checkbox { on_state, .. } => {
112                n_checkbox += 1;
113                if !on_state.is_ascii() {
114                    n_nonascii_onstate += 1;
115                }
116                if first_checkbox.is_none() && !f.read_only {
117                    first_checkbox = Some(f.name.clone());
118                }
119            }
120            FormFieldKind::RadioGroup { options } => {
121                n_radio += 1;
122                if options.iter().any(|o| !o.is_ascii()) {
123                    n_nonascii_onstate += 1;
124                }
125                if first_radio.is_none() && !f.read_only {
126                    if let Some(opt) = options.iter().find(|o| !o.is_empty()) {
127                        first_radio = Some((f.name.clone(), opt.clone()));
128                    }
129                }
130            }
131            FormFieldKind::ComboBox { .. } => n_combo += 1,
132            FormFieldKind::ListBox { .. } => n_listbox += 1,
133            FormFieldKind::PushButton => n_push += 1,
134            FormFieldKind::Signature => n_sig += 1,
135        }
136    }
137
138    // --- Writeback side (lopdf) ---
139    let mut fill_results = Vec::new();
140    let mut saved_ok = false;
141    if let Ok(mut ldoc) = lopdf::Document::load_mem(&bytes) {
142        if let Some(name) = &first_text {
143            let r = apply_field_value(&mut ldoc, name, WriteValue::Text("Gate Café € test"));
144            fill_results.push((
145                "text",
146                name.clone(),
147                r.map(|_| ()).map_err(|e| e.to_string()),
148            ));
149        }
150        if let Some(name) = &first_checkbox {
151            let r = apply_field_value(&mut ldoc, name, WriteValue::Checkbox(true));
152            fill_results.push((
153                "checkbox",
154                name.clone(),
155                r.map(|_| ()).map_err(|e| e.to_string()),
156            ));
157        }
158        if let Some((name, opt)) = &first_radio {
159            let r = apply_field_value(&mut ldoc, name, WriteValue::Radio(opt));
160            fill_results.push((
161                "radio",
162                name.clone(),
163                r.map(|_| ()).map_err(|e| e.to_string()),
164            ));
165        }
166        if let Some(out) = &output {
167            let mut buf = Vec::new();
168            if ldoc.save_to(&mut buf).is_ok() {
169                saved_ok = std::fs::write(out, &buf).is_ok();
170            }
171        } else {
172            let mut buf = Vec::new();
173            saved_ok = ldoc.save_to(&mut buf).is_ok();
174        }
175    }
176
177    let mut fills = String::new();
178    for (i, (kind, name, result)) in fill_results.iter().enumerate() {
179        if i > 0 {
180            fills.push(',');
181        }
182        match result {
183            Ok(()) => {
184                let _ = write!(
185                    fills,
186                    "{{\"kind\":\"{kind}\",\"field\":{},\"ok\":true}}",
187                    js(name)
188                );
189            }
190            Err(e) => {
191                let _ = write!(
192                    fills,
193                    "{{\"kind\":\"{kind}\",\"field\":{},\"ok\":false,\"error\":{}}}",
194                    js(name),
195                    js(e)
196                );
197            }
198        }
199    }
200
201    println!(
202        "{{\"doc\":{},\"fields\":{},\"widgets\":{n_widgets},\"text\":{n_text},\"multiline\":{n_multiline},\"comb\":{n_comb},\"password\":{n_password},\"checkbox\":{n_checkbox},\"radio\":{n_radio},\"combo\":{n_combo},\"listbox\":{n_listbox},\"pushbutton\":{n_push},\"signature\":{n_sig},\"maxlen\":{n_maxlen},\"readonly\":{n_readonly},\"required\":{n_required},\"nonascii_onstates\":{n_nonascii_onstate},\"fills\":[{fills}],\"saved\":{saved_ok}}}",
203        js(&doc_name),
204        model.len(),
205    );
206}