Skip to main content

pdfluent_forms/
parse.rs

1//! AcroForm dictionary parser (B.1).
2
3use crate::flags::FieldFlags;
4use crate::tree::*;
5use pdf_syntax::object::dict::keys;
6use pdf_syntax::object::{Array, Dict, Name, Object, Rect};
7use pdf_syntax::Pdf;
8
9/// Parse the AcroForm dictionary from a PDF document and build a field tree.
10pub fn parse_acroform(pdf: &Pdf) -> Option<FieldTree> {
11    let xref = pdf.xref();
12    let catalog: Dict<'_> = xref.get(xref.root_id())?;
13    let acroform: Dict<'_> = catalog.get(keys::ACRO_FORM)?;
14    let mut tree = FieldTree::new();
15
16    if let Some(da) = get_string_value(&acroform, keys::DA) {
17        tree.document_da = Some(da);
18    }
19    if let Some(q) = acroform.get::<u32>(keys::Q) {
20        tree.document_quadding = Some(parse_quadding(q));
21    }
22    if let Some(na) = acroform.get::<bool>(keys::NEED_APPEARANCES) {
23        tree.need_appearances = na;
24    }
25    if let Some(sf) = acroform.get::<u32>(keys::SIG_FLAGS) {
26        tree.sig_flags = sf;
27    }
28
29    if let Some(fields_arr) = acroform.get::<Array<'_>>(keys::FIELDS) {
30        for field_dict in fields_arr.iter::<Dict<'_>>() {
31            parse_field_recursive(&field_dict, &mut tree, None);
32        }
33    }
34
35    if let Some(co_arr) = acroform.get::<Array<'_>>(keys::CO) {
36        for co_obj in co_arr.iter::<Object<'_>>() {
37            if let Object::Dict(co_dict) = co_obj {
38                if let Some(obj_id) = co_dict.obj_id() {
39                    let target = (obj_id.obj_number, obj_id.gen_number);
40                    if let Some(id) = find_by_object_id(&tree, target) {
41                        tree.calculation_order.push(id);
42                    }
43                }
44            }
45        }
46    }
47
48    assign_page_indices(pdf, &mut tree);
49    Some(tree)
50}
51
52fn parse_field_recursive(dict: &Dict<'_>, tree: &mut FieldTree, parent: Option<FieldId>) {
53    let partial_name = get_string_value(dict, keys::T).unwrap_or_default();
54    let field_type = dict.get::<Name>(keys::FT).and_then(|n| match n.as_ref() {
55        b"Tx" => Some(FieldType::Text),
56        b"Btn" => Some(FieldType::Button),
57        b"Ch" => Some(FieldType::Choice),
58        b"Sig" => Some(FieldType::Signature),
59        _ => None,
60    });
61    let flags = dict
62        .get::<u32>(keys::FF)
63        .map(FieldFlags::from_bits)
64        .unwrap_or_default();
65    let rect = dict
66        .get::<Rect>(keys::RECT)
67        .map(|r| [r.x0 as f32, r.y0 as f32, r.x1 as f32, r.y1 as f32]);
68    let appearance_state = dict
69        .get::<Name>(keys::AS)
70        .map(|n| String::from_utf8_lossy(n.as_ref()).into_owned());
71    let object_id = dict.obj_id().map(|oid| (oid.obj_number, oid.gen_number));
72
73    let node = FieldNode {
74        partial_name,
75        alternate_name: get_string_value(dict, keys::TU),
76        mapping_name: get_string_value(dict, keys::TM),
77        field_type,
78        flags,
79        value: parse_field_value(dict, keys::V),
80        default_value: parse_field_value(dict, keys::DV),
81        default_appearance: get_string_value(dict, keys::DA),
82        quadding: dict.get::<u32>(keys::Q).map(parse_quadding),
83        max_len: dict.get::<u32>(keys::MAX_LEN),
84        options: parse_options(dict),
85        top_index: dict.get::<u32>(keys::TI),
86        rect,
87        appearance_state,
88        page_index: None,
89        parent,
90        children: vec![],
91        object_id,
92        has_actions: dict.contains_key(keys::AA),
93        mk: parse_mk(dict),
94        border_style: parse_border_style(dict),
95    };
96    let id = tree.alloc(node);
97    if let Some(pid) = parent {
98        tree.get_mut(pid).children.push(id);
99    }
100    if let Some(kids_arr) = dict.get::<Array<'_>>(keys::KIDS) {
101        for kid_dict in kids_arr.iter::<Dict<'_>>() {
102            parse_field_recursive(&kid_dict, tree, Some(id));
103        }
104    }
105}
106
107fn parse_field_value(dict: &Dict<'_>, key: &[u8]) -> Option<FieldValue> {
108    let obj: Object<'_> = dict.get(key)?;
109    match obj {
110        Object::String(s) => Some(FieldValue::Text(
111            String::from_utf8_lossy(s.as_bytes()).into_owned(),
112        )),
113        Object::Name(n) => Some(FieldValue::Text(
114            String::from_utf8_lossy(n.as_ref()).into_owned(),
115        )),
116        Object::Array(arr) => {
117            let vals: Vec<String> = arr
118                .iter::<Object<'_>>()
119                .filter_map(|o| match o {
120                    Object::String(s) => Some(String::from_utf8_lossy(s.as_bytes()).into_owned()),
121                    Object::Name(n) => Some(String::from_utf8_lossy(n.as_ref()).into_owned()),
122                    _ => None,
123                })
124                .collect();
125            Some(FieldValue::StringArray(vals))
126        }
127        _ => None,
128    }
129}
130
131fn parse_options(dict: &Dict<'_>) -> Vec<ChoiceOption> {
132    let Some(arr) = dict.get::<Array<'_>>(keys::OPT) else {
133        return vec![];
134    };
135    arr.iter::<Object<'_>>()
136        .filter_map(|obj| match obj {
137            Object::String(s) => {
138                let text = String::from_utf8_lossy(s.as_bytes()).into_owned();
139                Some(ChoiceOption {
140                    export: text.clone(),
141                    display: text,
142                })
143            }
144            Object::Array(pair) => {
145                let items: Vec<Object<'_>> = pair.iter::<Object<'_>>().collect();
146                if items.len() >= 2 {
147                    Some(ChoiceOption {
148                        export: obj_to_string(&items[0]).unwrap_or_default(),
149                        display: obj_to_string(&items[1]).unwrap_or_default(),
150                    })
151                } else {
152                    None
153                }
154            }
155            _ => None,
156        })
157        .collect()
158}
159
160fn parse_mk(dict: &Dict<'_>) -> Option<MkDict> {
161    let mk_dict: Dict<'_> = dict.get(keys::MK)?;
162    Some(MkDict {
163        border_color: parse_color_array(&mk_dict, keys::BC),
164        background_color: parse_color_array(&mk_dict, keys::BG),
165        caption: get_string_value(&mk_dict, keys::CA),
166        rollover_caption: get_string_value(&mk_dict, &b"RC"[..]),
167        alternate_caption: get_string_value(&mk_dict, keys::AC),
168        text_position: mk_dict.get::<u32>(&b"TP"[..]),
169        rotation: mk_dict.get::<u32>(&b"R"[..]),
170    })
171}
172
173fn parse_color_array(dict: &Dict<'_>, key: &[u8]) -> Option<Vec<f32>> {
174    let arr: Array<'_> = dict.get(key)?;
175    let vals: Vec<f32> = arr.iter::<f32>().collect();
176    if vals.is_empty() {
177        None
178    } else {
179        Some(vals)
180    }
181}
182
183fn parse_border_style(dict: &Dict<'_>) -> Option<BorderStyle> {
184    let bs_dict: Dict<'_> = dict.get(keys::BS)?;
185    Some(BorderStyle {
186        width: bs_dict.get::<f32>(&b"W"[..]).unwrap_or(1.0),
187        style: bs_dict
188            .get::<Name>(&b"S"[..])
189            .and_then(|n| n.as_ref().first().copied())
190            .unwrap_or(b'S'),
191    })
192}
193
194fn parse_quadding(q: u32) -> Quadding {
195    match q {
196        1 => Quadding::Center,
197        2 => Quadding::Right,
198        _ => Quadding::Left,
199    }
200}
201
202fn get_string_value(dict: &Dict<'_>, key: &[u8]) -> Option<String> {
203    obj_to_string(&dict.get::<Object<'_>>(key)?)
204}
205
206fn obj_to_string(obj: &Object<'_>) -> Option<String> {
207    match obj {
208        Object::String(s) => Some(String::from_utf8_lossy(s.as_bytes()).into_owned()),
209        Object::Name(n) => Some(String::from_utf8_lossy(n.as_ref()).into_owned()),
210        _ => None,
211    }
212}
213
214fn find_by_object_id(tree: &FieldTree, target: (i32, i32)) -> Option<FieldId> {
215    tree.all_ids()
216        .find(|&id| tree.get(id).object_id == Some(target))
217}
218
219fn assign_page_indices(pdf: &Pdf, tree: &mut FieldTree) {
220    let pages = pdf.pages();
221    for (page_idx, page) in pages.iter().enumerate() {
222        let raw = page.raw();
223        let Some(annots_arr) = raw.get::<Array<'_>>(keys::ANNOTS) else {
224            continue;
225        };
226        for annot_obj in annots_arr.iter::<Object<'_>>() {
227            if let Object::Dict(annot_dict) = annot_obj {
228                if let Some(annot_oid) = annot_dict.obj_id() {
229                    let target = (annot_oid.obj_number, annot_oid.gen_number);
230                    if let Some(fid) = find_by_object_id(tree, target) {
231                        tree.get_mut(fid).page_index = Some(page_idx);
232                    }
233                }
234            }
235        }
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    #[test]
243    fn quadding_values() {
244        assert_eq!(parse_quadding(0), Quadding::Left);
245        assert_eq!(parse_quadding(1), Quadding::Center);
246        assert_eq!(parse_quadding(2), Quadding::Right);
247    }
248}