1use lopdf::{Document, Object};
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
11pub enum FormFieldType {
12 Text,
14 Checkbox,
16 RadioButton,
18 Choice,
20 Button,
22 Signature,
24 Unknown,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct FormField {
31 pub name: String,
33 pub field_type: FormFieldType,
35 pub value: Option<String>,
37 pub default_value: Option<String>,
39 pub read_only: bool,
41 pub required: bool,
43 pub page_number: Option<u32>,
45}
46
47pub fn extract_form_fields(doc: &Document) -> Vec<FormField> {
49 let mut fields = Vec::new();
50
51 let catalog = match doc.catalog() {
53 Ok(c) => c,
54 Err(_) => return fields,
55 };
56
57 let acroform = match catalog.get(b"AcroForm") {
58 Ok(obj) => resolve(doc, obj),
59 Err(_) => return fields,
60 };
61
62 let acroform_dict = match acroform.as_dict() {
63 Ok(d) => d,
64 Err(_) => return fields,
65 };
66
67 let fields_array = match acroform_dict.get(b"Fields") {
69 Ok(obj) => resolve(doc, obj),
70 Err(_) => return fields,
71 };
72
73 let fields_arr = match fields_array.as_array() {
74 Ok(a) => a,
75 Err(_) => return fields,
76 };
77
78 for field_ref in fields_arr {
80 let field_obj = resolve(doc, field_ref);
81 if let Ok(dict) = field_obj.as_dict() {
82 extract_field(doc, dict, "", &mut fields);
83 }
84 }
85
86 fields
87}
88
89fn extract_field(
91 doc: &Document,
92 dict: &lopdf::Dictionary,
93 parent_name: &str,
94 fields: &mut Vec<FormField>,
95) {
96 let partial_name = extract_string(dict, b"T").unwrap_or_default();
98 let full_name = if parent_name.is_empty() {
99 partial_name.clone()
100 } else if partial_name.is_empty() {
101 parent_name.to_string()
102 } else {
103 format!("{parent_name}.{partial_name}")
104 };
105
106 if let Ok(kids_obj) = dict.get(b"Kids") {
108 let kids = resolve(doc, kids_obj);
109 if let Ok(kids_arr) = kids.as_array() {
110 for kid_ref in kids_arr {
111 let kid_obj = resolve(doc, kid_ref);
112 if let Ok(kid_dict) = kid_obj.as_dict() {
113 extract_field(doc, kid_dict, &full_name, fields);
114 }
115 }
116 if dict.get(b"Subtype").is_err() {
118 return;
119 }
120 }
121 }
122
123 let field_type = determine_field_type(dict);
125
126 let value = extract_string(dict, b"V");
128 let default_value = extract_string(dict, b"DV");
129
130 let ff = dict
132 .get(b"Ff")
133 .ok()
134 .and_then(|o| {
135 if let Object::Integer(i) = resolve(doc, o) {
136 Some(*i as u32)
137 } else {
138 None
139 }
140 })
141 .unwrap_or(0);
142
143 let read_only = (ff & 1) != 0;
144 let required = (ff & 2) != 0;
145
146 fields.push(FormField {
147 name: full_name,
148 field_type,
149 value,
150 default_value,
151 read_only,
152 required,
153 page_number: None,
154 });
155}
156
157fn determine_field_type(dict: &lopdf::Dictionary) -> FormFieldType {
159 match dict.get(b"FT") {
160 Ok(obj) => {
161 if let Object::Name(name) = obj {
162 match name.as_slice() {
163 b"Tx" => FormFieldType::Text,
164 b"Btn" => {
165 let ff = dict
167 .get(b"Ff")
168 .ok()
169 .and_then(|o| {
170 if let Object::Integer(i) = o {
171 Some(*i as u32)
172 } else {
173 None
174 }
175 })
176 .unwrap_or(0);
177 if (ff & 0x10000) != 0 {
178 FormFieldType::Button } else if (ff & 0x8000) != 0 {
180 FormFieldType::RadioButton
181 } else {
182 FormFieldType::Checkbox
183 }
184 }
185 b"Ch" => FormFieldType::Choice,
186 b"Sig" => FormFieldType::Signature,
187 _ => FormFieldType::Unknown,
188 }
189 } else {
190 FormFieldType::Unknown
191 }
192 }
193 Err(_) => FormFieldType::Unknown,
194 }
195}
196
197fn extract_string(dict: &lopdf::Dictionary, key: &[u8]) -> Option<String> {
199 dict.get(key).ok().and_then(|obj| match obj {
200 Object::String(bytes, _) => Some(String::from_utf8_lossy(bytes).to_string()),
201 Object::Name(bytes) => Some(String::from_utf8_lossy(bytes).to_string()),
202 _ => None,
203 })
204}
205
206fn resolve<'a>(doc: &'a Document, obj: &'a Object) -> &'a Object {
208 match obj {
209 Object::Reference(id) => doc.get_object(*id).unwrap_or(obj),
210 _ => obj,
211 }
212}
213
214#[cfg(test)]
215mod tests {
216 use super::*;
217
218 #[test]
219 fn test_form_field_type_default() {
220 let dict = lopdf::Dictionary::new();
221 assert_eq!(determine_field_type(&dict), FormFieldType::Unknown);
222 }
223
224 #[test]
225 fn test_form_field_type_text() {
226 let mut dict = lopdf::Dictionary::new();
227 dict.set("FT", Object::Name(b"Tx".to_vec()));
228 assert_eq!(determine_field_type(&dict), FormFieldType::Text);
229 }
230
231 #[test]
232 fn test_form_field_type_checkbox() {
233 let mut dict = lopdf::Dictionary::new();
234 dict.set("FT", Object::Name(b"Btn".to_vec()));
235 assert_eq!(determine_field_type(&dict), FormFieldType::Checkbox);
236 }
237
238 #[test]
239 fn test_form_field_type_radio() {
240 let mut dict = lopdf::Dictionary::new();
241 dict.set("FT", Object::Name(b"Btn".to_vec()));
242 dict.set("Ff", Object::Integer(0x8000));
243 assert_eq!(determine_field_type(&dict), FormFieldType::RadioButton);
244 }
245
246 #[test]
247 fn test_extract_string_value() {
248 let mut dict = lopdf::Dictionary::new();
249 dict.set(
250 "V",
251 Object::String(b"Hello".to_vec(), lopdf::StringFormat::Literal),
252 );
253 assert_eq!(extract_string(&dict, b"V"), Some("Hello".to_string()));
254 }
255
256 #[test]
257 fn test_extract_string_missing() {
258 let dict = lopdf::Dictionary::new();
259 assert_eq!(extract_string(&dict, b"V"), None);
260 }
261}