1use crate::flags::FieldFlags;
4use crate::tree::*;
5use pdf_syntax::object::dict::keys;
6use pdf_syntax::object::{Array, Dict, Name, Object, ObjectIdentifier, Rect};
7use pdf_syntax::Pdf;
8use std::collections::BTreeSet;
9
10const MAX_FIELD_DEPTH: usize = 100;
13
14pub fn parse_acroform(pdf: &Pdf) -> Option<FieldTree> {
16 let xref = pdf.xref();
17 let catalog: Dict<'_> = xref.get(xref.root_id())?;
18 let acroform: Dict<'_> = catalog.get(keys::ACRO_FORM)?;
19 let mut tree = FieldTree::new();
20
21 if let Some(da) = get_string_value(&acroform, keys::DA) {
22 tree.document_da = Some(da);
23 }
24 if let Some(q) = acroform.get::<u32>(keys::Q) {
25 tree.document_quadding = Some(parse_quadding(q));
26 }
27 if let Some(na) = acroform.get::<bool>(keys::NEED_APPEARANCES) {
28 tree.need_appearances = na;
29 }
30 if let Some(sf) = acroform.get::<u32>(keys::SIG_FLAGS) {
31 tree.sig_flags = sf;
32 }
33
34 if let Some(fields_arr) = acroform.get::<Array<'_>>(keys::FIELDS) {
35 let mut visited = BTreeSet::new();
38 for field_dict in fields_arr.iter::<Dict<'_>>() {
39 parse_field_recursive(&field_dict, &mut tree, None, 0, &mut visited);
40 }
41 }
42
43 if let Some(co_arr) = acroform.get::<Array<'_>>(keys::CO) {
44 for co_obj in co_arr.iter::<Object<'_>>() {
45 if let Object::Dict(co_dict) = co_obj {
46 if let Some(obj_id) = co_dict.obj_id() {
47 let target = (obj_id.obj_number, obj_id.gen_number);
48 if let Some(id) = find_by_object_id(&tree, target) {
49 tree.calculation_order.push(id);
50 }
51 }
52 }
53 }
54 }
55
56 assign_page_indices(pdf, &mut tree);
57 Some(tree)
58}
59
60fn parse_field_recursive(
61 dict: &Dict<'_>,
62 tree: &mut FieldTree,
63 parent: Option<FieldId>,
64 depth: usize,
65 visited: &mut BTreeSet<ObjectIdentifier>,
66) {
67 if depth >= MAX_FIELD_DEPTH {
70 return;
71 }
72 if let Some(id) = dict.obj_id() {
73 if !visited.insert(id) {
74 return;
75 }
76 }
77
78 let partial_name = get_string_value(dict, keys::T).unwrap_or_default();
79 let field_type = dict.get::<Name>(keys::FT).and_then(|n| match n.as_ref() {
80 b"Tx" => Some(FieldType::Text),
81 b"Btn" => Some(FieldType::Button),
82 b"Ch" => Some(FieldType::Choice),
83 b"Sig" => Some(FieldType::Signature),
84 _ => None,
85 });
86 let flags = dict
87 .get::<u32>(keys::FF)
88 .map(FieldFlags::from_bits)
89 .unwrap_or_default();
90 let rect = dict
91 .get::<Rect>(keys::RECT)
92 .map(|r| [r.x0 as f32, r.y0 as f32, r.x1 as f32, r.y1 as f32]);
93 let appearance_state = dict
94 .get::<Name>(keys::AS)
95 .map(|n| crate::encoding::decode_name_bytes(n.as_ref()));
96 let object_id = dict.obj_id().map(|oid| (oid.obj_number, oid.gen_number));
97 let on_state = parse_on_state(dict);
98
99 let node = FieldNode {
100 partial_name,
101 alternate_name: get_string_value(dict, keys::TU),
102 mapping_name: get_string_value(dict, keys::TM),
103 field_type,
104 flags,
105 value: parse_field_value(dict, keys::V),
106 default_value: parse_field_value(dict, keys::DV),
107 default_appearance: get_string_value(dict, keys::DA),
108 quadding: dict.get::<u32>(keys::Q).map(parse_quadding),
109 max_len: dict.get::<u32>(keys::MAX_LEN),
110 options: parse_options(dict),
111 top_index: dict.get::<u32>(keys::TI),
112 rect,
113 appearance_state,
114 on_state,
115 page_index: None,
116 parent,
117 children: vec![],
118 object_id,
119 has_actions: dict.contains_key(keys::AA),
120 mk: parse_mk(dict),
121 border_style: parse_border_style(dict),
122 };
123 let id = tree.alloc(node);
124 if let Some(pid) = parent {
125 tree.get_mut(pid).children.push(id);
126 }
127 if let Some(kids_arr) = dict.get::<Array<'_>>(keys::KIDS) {
128 for kid_dict in kids_arr.iter::<Dict<'_>>() {
129 parse_field_recursive(&kid_dict, tree, Some(id), depth + 1, visited);
130 }
131 }
132}
133
134fn parse_on_state(dict: &Dict<'_>) -> Option<String> {
138 let ap: Dict<'_> = dict.get(keys::AP)?;
139 let n: Dict<'_> = ap.get(keys::N)?;
140 let mut found = None;
141 for key in n.keys() {
142 let bytes: &[u8] = key.as_ref();
143 if bytes != b"Off" {
144 found = Some(crate::encoding::decode_name_bytes(bytes));
145 break;
146 }
147 }
148 found
149}
150
151fn parse_field_value(dict: &Dict<'_>, key: &[u8]) -> Option<FieldValue> {
152 let obj: Object<'_> = dict.get(key)?;
153 match obj {
154 Object::String(s) => Some(FieldValue::Text(crate::encoding::decode_pdf_text_bytes(
155 s.as_bytes(),
156 ))),
157 Object::Name(n) => Some(FieldValue::Text(crate::encoding::decode_name_bytes(
158 n.as_ref(),
159 ))),
160 Object::Array(arr) => {
161 let vals: Vec<String> = arr
162 .iter::<Object<'_>>()
163 .filter_map(|o| match o {
164 Object::String(s) => Some(crate::encoding::decode_pdf_text_bytes(s.as_bytes())),
165 Object::Name(n) => Some(crate::encoding::decode_name_bytes(n.as_ref())),
166 _ => None,
167 })
168 .collect();
169 Some(FieldValue::StringArray(vals))
170 }
171 _ => None,
172 }
173}
174
175fn parse_options(dict: &Dict<'_>) -> Vec<ChoiceOption> {
176 let Some(arr) = dict.get::<Array<'_>>(keys::OPT) else {
177 return vec![];
178 };
179 arr.iter::<Object<'_>>()
180 .filter_map(|obj| match obj {
181 Object::String(s) => {
182 let text = crate::encoding::decode_pdf_text_bytes(s.as_bytes());
183 Some(ChoiceOption {
184 export: text.clone(),
185 display: text,
186 })
187 }
188 Object::Array(pair) => {
189 let items: Vec<Object<'_>> = pair.iter::<Object<'_>>().collect();
190 if items.len() >= 2 {
191 Some(ChoiceOption {
192 export: obj_to_string(&items[0]).unwrap_or_default(),
193 display: obj_to_string(&items[1]).unwrap_or_default(),
194 })
195 } else {
196 None
197 }
198 }
199 _ => None,
200 })
201 .collect()
202}
203
204fn parse_mk(dict: &Dict<'_>) -> Option<MkDict> {
205 let mk_dict: Dict<'_> = dict.get(keys::MK)?;
206 Some(MkDict {
207 border_color: parse_color_array(&mk_dict, keys::BC),
208 background_color: parse_color_array(&mk_dict, keys::BG),
209 caption: get_string_value(&mk_dict, keys::CA),
210 rollover_caption: get_string_value(&mk_dict, &b"RC"[..]),
211 alternate_caption: get_string_value(&mk_dict, keys::AC),
212 text_position: mk_dict.get::<u32>(&b"TP"[..]),
213 rotation: mk_dict.get::<u32>(&b"R"[..]),
214 })
215}
216
217fn parse_color_array(dict: &Dict<'_>, key: &[u8]) -> Option<Vec<f32>> {
218 let arr: Array<'_> = dict.get(key)?;
219 let vals: Vec<f32> = arr.iter::<f32>().collect();
220 if vals.is_empty() {
221 None
222 } else {
223 Some(vals)
224 }
225}
226
227fn parse_border_style(dict: &Dict<'_>) -> Option<BorderStyle> {
228 let bs_dict: Dict<'_> = dict.get(keys::BS)?;
229 Some(BorderStyle {
230 width: bs_dict.get::<f32>(&b"W"[..]).unwrap_or(1.0),
231 style: bs_dict
232 .get::<Name>(&b"S"[..])
233 .and_then(|n| n.as_ref().first().copied())
234 .unwrap_or(b'S'),
235 })
236}
237
238fn parse_quadding(q: u32) -> Quadding {
239 match q {
240 1 => Quadding::Center,
241 2 => Quadding::Right,
242 _ => Quadding::Left,
243 }
244}
245
246fn get_string_value(dict: &Dict<'_>, key: &[u8]) -> Option<String> {
247 obj_to_string(&dict.get::<Object<'_>>(key)?)
248}
249
250fn obj_to_string(obj: &Object<'_>) -> Option<String> {
251 match obj {
252 Object::String(s) => Some(crate::encoding::decode_pdf_text_bytes(s.as_bytes())),
253 Object::Name(n) => Some(crate::encoding::decode_name_bytes(n.as_ref())),
254 _ => None,
255 }
256}
257
258fn find_by_object_id(tree: &FieldTree, target: (i32, i32)) -> Option<FieldId> {
259 tree.all_ids()
260 .find(|&id| tree.get(id).object_id == Some(target))
261}
262
263fn assign_page_indices(pdf: &Pdf, tree: &mut FieldTree) {
264 let pages = pdf.pages();
265 for (page_idx, page) in pages.iter().enumerate() {
266 let raw = page.raw();
267 let Some(annots_arr) = raw.get::<Array<'_>>(keys::ANNOTS) else {
268 continue;
269 };
270 for annot_obj in annots_arr.iter::<Object<'_>>() {
271 if let Object::Dict(annot_dict) = annot_obj {
272 if let Some(annot_oid) = annot_dict.obj_id() {
273 let target = (annot_oid.obj_number, annot_oid.gen_number);
274 if let Some(fid) = find_by_object_id(tree, target) {
275 tree.get_mut(fid).page_index = Some(page_idx);
276 }
277 }
278 }
279 }
280 }
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286 #[test]
287 fn quadding_values() {
288 assert_eq!(parse_quadding(0), Quadding::Left);
289 assert_eq!(parse_quadding(1), Quadding::Center);
290 assert_eq!(parse_quadding(2), Quadding::Right);
291 }
292
293 #[test]
294 fn cyclic_field_tree_terminates_and_is_bounded() {
295 fn cyclic_form_pdf() -> Vec<u8> {
298 let objs: [&[u8]; 6] = [
299 b"<< /Type /Catalog /Pages 2 0 R /AcroForm 4 0 R >>",
300 b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
301 b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] >>",
302 b"<< /Fields [5 0 R] >>",
303 b"<< /T (A) /Kids [6 0 R] >>",
304 b"<< /T (B) /Kids [5 0 R] >>", ];
306 let mut buf = Vec::new();
307 let mut offsets = [0usize; 7];
308 buf.extend_from_slice(b"%PDF-1.7\n");
309 for (i, body) in objs.iter().enumerate() {
310 offsets[i + 1] = buf.len();
311 buf.extend_from_slice(format!("{} 0 obj\n", i + 1).as_bytes());
312 buf.extend_from_slice(body);
313 buf.extend_from_slice(b"\nendobj\n");
314 }
315 let xref_off = buf.len();
316 buf.extend_from_slice(b"xref\n0 7\n0000000000 65535 f \n");
317 for o in &offsets[1..7] {
318 buf.extend_from_slice(format!("{o:010} 00000 n \n").as_bytes());
319 }
320 buf.extend_from_slice(
321 format!("trailer\n<< /Size 7 /Root 1 0 R >>\nstartxref\n{xref_off}\n%%EOF")
322 .as_bytes(),
323 );
324 buf
325 }
326
327 let pdf = Pdf::new(cyclic_form_pdf()).expect("load cyclic-form PDF");
328 let tree = parse_acroform(&pdf).expect("acroform parses");
329 assert!(
330 tree.len() <= 2,
331 "cyclic /Kids must not inflate the field tree; got {}",
332 tree.len()
333 );
334 }
335}