1use crate::flags::FieldFlags;
4use crate::tree::*;
5use pdf_syntax::object::dict::keys;
6use pdf_syntax::object::{Array, Dict, Name, Object, Rect};
7use pdf_syntax::Pdf;
8
9pub fn parse_acroform(pdf: &Pdf) -> Option<FieldTree> {
11 let xref = pdf.xref();
12 let catalog: Dict<'_> = xref.get(xref.root_id())?;
13 let acroform: Dict<'_> = catalog.get(keys::ACRO_FORM)?;
14 let mut tree = FieldTree::new();
15
16 if let Some(da) = get_string_value(&acroform, keys::DA) {
17 tree.document_da = Some(da);
18 }
19 if let Some(q) = acroform.get::<u32>(keys::Q) {
20 tree.document_quadding = Some(parse_quadding(q));
21 }
22 if let Some(na) = acroform.get::<bool>(keys::NEED_APPEARANCES) {
23 tree.need_appearances = na;
24 }
25 if let Some(sf) = acroform.get::<u32>(keys::SIG_FLAGS) {
26 tree.sig_flags = sf;
27 }
28
29 if let Some(fields_arr) = acroform.get::<Array<'_>>(keys::FIELDS) {
30 for field_dict in fields_arr.iter::<Dict<'_>>() {
31 parse_field_recursive(&field_dict, &mut tree, None);
32 }
33 }
34
35 if let Some(co_arr) = acroform.get::<Array<'_>>(keys::CO) {
36 for co_obj in co_arr.iter::<Object<'_>>() {
37 if let Object::Dict(co_dict) = co_obj {
38 if let Some(obj_id) = co_dict.obj_id() {
39 let target = (obj_id.obj_number, obj_id.gen_number);
40 if let Some(id) = find_by_object_id(&tree, target) {
41 tree.calculation_order.push(id);
42 }
43 }
44 }
45 }
46 }
47
48 assign_page_indices(pdf, &mut tree);
49 Some(tree)
50}
51
52fn parse_field_recursive(dict: &Dict<'_>, tree: &mut FieldTree, parent: Option<FieldId>) {
53 let partial_name = get_string_value(dict, keys::T).unwrap_or_default();
54 let field_type = dict.get::<Name>(keys::FT).and_then(|n| match n.as_ref() {
55 b"Tx" => Some(FieldType::Text),
56 b"Btn" => Some(FieldType::Button),
57 b"Ch" => Some(FieldType::Choice),
58 b"Sig" => Some(FieldType::Signature),
59 _ => None,
60 });
61 let flags = dict
62 .get::<u32>(keys::FF)
63 .map(FieldFlags::from_bits)
64 .unwrap_or_default();
65 let rect = dict
66 .get::<Rect>(keys::RECT)
67 .map(|r| [r.x0 as f32, r.y0 as f32, r.x1 as f32, r.y1 as f32]);
68 let appearance_state = dict
69 .get::<Name>(keys::AS)
70 .map(|n| String::from_utf8_lossy(n.as_ref()).into_owned());
71 let object_id = dict.obj_id().map(|oid| (oid.obj_number, oid.gen_number));
72
73 let node = FieldNode {
74 partial_name,
75 alternate_name: get_string_value(dict, keys::TU),
76 mapping_name: get_string_value(dict, keys::TM),
77 field_type,
78 flags,
79 value: parse_field_value(dict, keys::V),
80 default_value: parse_field_value(dict, keys::DV),
81 default_appearance: get_string_value(dict, keys::DA),
82 quadding: dict.get::<u32>(keys::Q).map(parse_quadding),
83 max_len: dict.get::<u32>(keys::MAX_LEN),
84 options: parse_options(dict),
85 top_index: dict.get::<u32>(keys::TI),
86 rect,
87 appearance_state,
88 page_index: None,
89 parent,
90 children: vec![],
91 object_id,
92 has_actions: dict.contains_key(keys::AA),
93 mk: parse_mk(dict),
94 border_style: parse_border_style(dict),
95 };
96 let id = tree.alloc(node);
97 if let Some(pid) = parent {
98 tree.get_mut(pid).children.push(id);
99 }
100 if let Some(kids_arr) = dict.get::<Array<'_>>(keys::KIDS) {
101 for kid_dict in kids_arr.iter::<Dict<'_>>() {
102 parse_field_recursive(&kid_dict, tree, Some(id));
103 }
104 }
105}
106
107fn parse_field_value(dict: &Dict<'_>, key: &[u8]) -> Option<FieldValue> {
108 let obj: Object<'_> = dict.get(key)?;
109 match obj {
110 Object::String(s) => Some(FieldValue::Text(
111 String::from_utf8_lossy(s.as_bytes()).into_owned(),
112 )),
113 Object::Name(n) => Some(FieldValue::Text(
114 String::from_utf8_lossy(n.as_ref()).into_owned(),
115 )),
116 Object::Array(arr) => {
117 let vals: Vec<String> = arr
118 .iter::<Object<'_>>()
119 .filter_map(|o| match o {
120 Object::String(s) => Some(String::from_utf8_lossy(s.as_bytes()).into_owned()),
121 Object::Name(n) => Some(String::from_utf8_lossy(n.as_ref()).into_owned()),
122 _ => None,
123 })
124 .collect();
125 Some(FieldValue::StringArray(vals))
126 }
127 _ => None,
128 }
129}
130
131fn parse_options(dict: &Dict<'_>) -> Vec<ChoiceOption> {
132 let Some(arr) = dict.get::<Array<'_>>(keys::OPT) else {
133 return vec![];
134 };
135 arr.iter::<Object<'_>>()
136 .filter_map(|obj| match obj {
137 Object::String(s) => {
138 let text = String::from_utf8_lossy(s.as_bytes()).into_owned();
139 Some(ChoiceOption {
140 export: text.clone(),
141 display: text,
142 })
143 }
144 Object::Array(pair) => {
145 let items: Vec<Object<'_>> = pair.iter::<Object<'_>>().collect();
146 if items.len() >= 2 {
147 Some(ChoiceOption {
148 export: obj_to_string(&items[0]).unwrap_or_default(),
149 display: obj_to_string(&items[1]).unwrap_or_default(),
150 })
151 } else {
152 None
153 }
154 }
155 _ => None,
156 })
157 .collect()
158}
159
160fn parse_mk(dict: &Dict<'_>) -> Option<MkDict> {
161 let mk_dict: Dict<'_> = dict.get(keys::MK)?;
162 Some(MkDict {
163 border_color: parse_color_array(&mk_dict, keys::BC),
164 background_color: parse_color_array(&mk_dict, keys::BG),
165 caption: get_string_value(&mk_dict, keys::CA),
166 rollover_caption: get_string_value(&mk_dict, &b"RC"[..]),
167 alternate_caption: get_string_value(&mk_dict, keys::AC),
168 text_position: mk_dict.get::<u32>(&b"TP"[..]),
169 rotation: mk_dict.get::<u32>(&b"R"[..]),
170 })
171}
172
173fn parse_color_array(dict: &Dict<'_>, key: &[u8]) -> Option<Vec<f32>> {
174 let arr: Array<'_> = dict.get(key)?;
175 let vals: Vec<f32> = arr.iter::<f32>().collect();
176 if vals.is_empty() {
177 None
178 } else {
179 Some(vals)
180 }
181}
182
183fn parse_border_style(dict: &Dict<'_>) -> Option<BorderStyle> {
184 let bs_dict: Dict<'_> = dict.get(keys::BS)?;
185 Some(BorderStyle {
186 width: bs_dict.get::<f32>(&b"W"[..]).unwrap_or(1.0),
187 style: bs_dict
188 .get::<Name>(&b"S"[..])
189 .and_then(|n| n.as_ref().first().copied())
190 .unwrap_or(b'S'),
191 })
192}
193
194fn parse_quadding(q: u32) -> Quadding {
195 match q {
196 1 => Quadding::Center,
197 2 => Quadding::Right,
198 _ => Quadding::Left,
199 }
200}
201
202fn get_string_value(dict: &Dict<'_>, key: &[u8]) -> Option<String> {
203 obj_to_string(&dict.get::<Object<'_>>(key)?)
204}
205
206fn obj_to_string(obj: &Object<'_>) -> Option<String> {
207 match obj {
208 Object::String(s) => Some(String::from_utf8_lossy(s.as_bytes()).into_owned()),
209 Object::Name(n) => Some(String::from_utf8_lossy(n.as_ref()).into_owned()),
210 _ => None,
211 }
212}
213
214fn find_by_object_id(tree: &FieldTree, target: (i32, i32)) -> Option<FieldId> {
215 tree.all_ids()
216 .find(|&id| tree.get(id).object_id == Some(target))
217}
218
219fn assign_page_indices(pdf: &Pdf, tree: &mut FieldTree) {
220 let pages = pdf.pages();
221 for (page_idx, page) in pages.iter().enumerate() {
222 let raw = page.raw();
223 let Some(annots_arr) = raw.get::<Array<'_>>(keys::ANNOTS) else {
224 continue;
225 };
226 for annot_obj in annots_arr.iter::<Object<'_>>() {
227 if let Object::Dict(annot_dict) = annot_obj {
228 if let Some(annot_oid) = annot_dict.obj_id() {
229 let target = (annot_oid.obj_number, annot_oid.gen_number);
230 if let Some(fid) = find_by_object_id(tree, target) {
231 tree.get_mut(fid).page_index = Some(page_idx);
232 }
233 }
234 }
235 }
236 }
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242 #[test]
243 fn quadding_values() {
244 assert_eq!(parse_quadding(0), Quadding::Left);
245 assert_eq!(parse_quadding(1), Quadding::Center);
246 assert_eq!(parse_quadding(2), Quadding::Right);
247 }
248}