pub fn parse_acroform(pdf: &Pdf) -> Option<FieldTree>Expand description
Parse the AcroForm dictionary from a PDF document and build a field tree.
Examples found in repository?
examples/corpus_gate.rs (line 57)
31fn main() {
32 let mut args = std::env::args().skip(1);
33 let input = args
34 .next()
35 .expect("usage: corpus_gate <input.pdf> [out.pdf]");
36 let output = args.next();
37
38 let bytes = std::fs::read(&input).expect("read input");
39 let doc_name = std::path::Path::new(&input)
40 .parent()
41 .and_then(|p| p.file_name())
42 .map(|s| s.to_string_lossy().into_owned())
43 .unwrap_or_else(|| input.clone());
44
45 // --- Model side (pdf-syntax parse) ---
46 let pdf = match pdf_syntax::Pdf::new(std::sync::Arc::new(bytes.clone())) {
47 Ok(p) => p,
48 Err(e) => {
49 println!(
50 "{{\"doc\":{},\"open_fail\":{}}}",
51 js(&doc_name),
52 js(&format!("{e:?}"))
53 );
54 std::process::exit(1);
55 }
56 };
57 let model = parse_acroform(&pdf)
58 .map(|t| build_form_model(&t))
59 .unwrap_or_default();
60
61 let mut n_text = 0;
62 let mut n_multiline = 0;
63 let mut n_comb = 0;
64 let mut n_password = 0;
65 let mut n_checkbox = 0;
66 let mut n_radio = 0;
67 let mut n_combo = 0;
68 let mut n_listbox = 0;
69 let mut n_push = 0;
70 let mut n_sig = 0;
71 let mut n_maxlen = 0;
72 let mut n_readonly = 0;
73 let mut n_required = 0;
74 let mut n_nonascii_onstate = 0;
75 let mut n_widgets = 0;
76 let mut first_text: Option<String> = None;
77 let mut first_checkbox: Option<String> = None;
78 let mut first_radio: Option<(String, String)> = None;
79
80 for f in &model {
81 n_widgets += f.widgets.len();
82 if f.max_len.is_some() {
83 n_maxlen += 1;
84 }
85 if f.read_only {
86 n_readonly += 1;
87 }
88 if f.required {
89 n_required += 1;
90 }
91 match &f.kind {
92 FormFieldKind::Text {
93 multiline,
94 comb,
95 password,
96 } => {
97 n_text += 1;
98 if *multiline {
99 n_multiline += 1;
100 }
101 if *comb {
102 n_comb += 1;
103 }
104 if *password {
105 n_password += 1;
106 }
107 if first_text.is_none() && !f.read_only {
108 first_text = Some(f.name.clone());
109 }
110 }
111 FormFieldKind::Checkbox { on_state, .. } => {
112 n_checkbox += 1;
113 if !on_state.is_ascii() {
114 n_nonascii_onstate += 1;
115 }
116 if first_checkbox.is_none() && !f.read_only {
117 first_checkbox = Some(f.name.clone());
118 }
119 }
120 FormFieldKind::RadioGroup { options } => {
121 n_radio += 1;
122 if options.iter().any(|o| !o.is_ascii()) {
123 n_nonascii_onstate += 1;
124 }
125 if first_radio.is_none() && !f.read_only {
126 if let Some(opt) = options.iter().find(|o| !o.is_empty()) {
127 first_radio = Some((f.name.clone(), opt.clone()));
128 }
129 }
130 }
131 FormFieldKind::ComboBox { .. } => n_combo += 1,
132 FormFieldKind::ListBox { .. } => n_listbox += 1,
133 FormFieldKind::PushButton => n_push += 1,
134 FormFieldKind::Signature => n_sig += 1,
135 }
136 }
137
138 // --- Writeback side (lopdf) ---
139 let mut fill_results = Vec::new();
140 let mut saved_ok = false;
141 if let Ok(mut ldoc) = lopdf::Document::load_mem(&bytes) {
142 if let Some(name) = &first_text {
143 let r = apply_field_value(&mut ldoc, name, WriteValue::Text("Gate Café € test"));
144 fill_results.push((
145 "text",
146 name.clone(),
147 r.map(|_| ()).map_err(|e| e.to_string()),
148 ));
149 }
150 if let Some(name) = &first_checkbox {
151 let r = apply_field_value(&mut ldoc, name, WriteValue::Checkbox(true));
152 fill_results.push((
153 "checkbox",
154 name.clone(),
155 r.map(|_| ()).map_err(|e| e.to_string()),
156 ));
157 }
158 if let Some((name, opt)) = &first_radio {
159 let r = apply_field_value(&mut ldoc, name, WriteValue::Radio(opt));
160 fill_results.push((
161 "radio",
162 name.clone(),
163 r.map(|_| ()).map_err(|e| e.to_string()),
164 ));
165 }
166 if let Some(out) = &output {
167 let mut buf = Vec::new();
168 if ldoc.save_to(&mut buf).is_ok() {
169 saved_ok = std::fs::write(out, &buf).is_ok();
170 }
171 } else {
172 let mut buf = Vec::new();
173 saved_ok = ldoc.save_to(&mut buf).is_ok();
174 }
175 }
176
177 let mut fills = String::new();
178 for (i, (kind, name, result)) in fill_results.iter().enumerate() {
179 if i > 0 {
180 fills.push(',');
181 }
182 match result {
183 Ok(()) => {
184 let _ = write!(
185 fills,
186 "{{\"kind\":\"{kind}\",\"field\":{},\"ok\":true}}",
187 js(name)
188 );
189 }
190 Err(e) => {
191 let _ = write!(
192 fills,
193 "{{\"kind\":\"{kind}\",\"field\":{},\"ok\":false,\"error\":{}}}",
194 js(name),
195 js(e)
196 );
197 }
198 }
199 }
200
201 println!(
202 "{{\"doc\":{},\"fields\":{},\"widgets\":{n_widgets},\"text\":{n_text},\"multiline\":{n_multiline},\"comb\":{n_comb},\"password\":{n_password},\"checkbox\":{n_checkbox},\"radio\":{n_radio},\"combo\":{n_combo},\"listbox\":{n_listbox},\"pushbutton\":{n_push},\"signature\":{n_sig},\"maxlen\":{n_maxlen},\"readonly\":{n_readonly},\"required\":{n_required},\"nonascii_onstates\":{n_nonascii_onstate},\"fills\":[{fills}],\"saved\":{saved_ok}}}",
203 js(&doc_name),
204 model.len(),
205 );
206}