use lopdf::{Document, Object};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum FormFieldType {
Text,
Checkbox,
RadioButton,
Choice,
Button,
Signature,
Unknown,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormField {
pub name: String,
pub field_type: FormFieldType,
pub value: Option<String>,
pub default_value: Option<String>,
pub read_only: bool,
pub required: bool,
pub page_number: Option<u32>,
}
pub fn extract_form_fields(doc: &Document) -> Vec<FormField> {
let mut fields = Vec::new();
let catalog = match doc.catalog() {
Ok(c) => c,
Err(_) => return fields,
};
let acroform = match catalog.get(b"AcroForm") {
Ok(obj) => resolve(doc, obj),
Err(_) => return fields,
};
let acroform_dict = match acroform.as_dict() {
Ok(d) => d,
Err(_) => return fields,
};
let fields_array = match acroform_dict.get(b"Fields") {
Ok(obj) => resolve(doc, obj),
Err(_) => return fields,
};
let fields_arr = match fields_array.as_array() {
Ok(a) => a,
Err(_) => return fields,
};
for field_ref in fields_arr {
let field_obj = resolve(doc, field_ref);
if let Ok(dict) = field_obj.as_dict() {
extract_field(doc, dict, "", &mut fields);
}
}
fields
}
fn extract_field(
doc: &Document,
dict: &lopdf::Dictionary,
parent_name: &str,
fields: &mut Vec<FormField>,
) {
let partial_name = extract_string(dict, b"T").unwrap_or_default();
let full_name = if parent_name.is_empty() {
partial_name.clone()
} else if partial_name.is_empty() {
parent_name.to_string()
} else {
format!("{parent_name}.{partial_name}")
};
if let Ok(kids_obj) = dict.get(b"Kids") {
let kids = resolve(doc, kids_obj);
if let Ok(kids_arr) = kids.as_array() {
for kid_ref in kids_arr {
let kid_obj = resolve(doc, kid_ref);
if let Ok(kid_dict) = kid_obj.as_dict() {
extract_field(doc, kid_dict, &full_name, fields);
}
}
if dict.get(b"Subtype").is_err() {
return;
}
}
}
let field_type = determine_field_type(dict);
let value = extract_string(dict, b"V");
let default_value = extract_string(dict, b"DV");
let ff = dict
.get(b"Ff")
.ok()
.and_then(|o| {
if let Object::Integer(i) = resolve(doc, o) {
Some(*i as u32)
} else {
None
}
})
.unwrap_or(0);
let read_only = (ff & 1) != 0;
let required = (ff & 2) != 0;
fields.push(FormField {
name: full_name,
field_type,
value,
default_value,
read_only,
required,
page_number: None,
});
}
fn determine_field_type(dict: &lopdf::Dictionary) -> FormFieldType {
match dict.get(b"FT") {
Ok(obj) => {
if let Object::Name(name) = obj {
match name.as_slice() {
b"Tx" => FormFieldType::Text,
b"Btn" => {
let ff = dict
.get(b"Ff")
.ok()
.and_then(|o| {
if let Object::Integer(i) = o {
Some(*i as u32)
} else {
None
}
})
.unwrap_or(0);
if (ff & 0x10000) != 0 {
FormFieldType::Button } else if (ff & 0x8000) != 0 {
FormFieldType::RadioButton
} else {
FormFieldType::Checkbox
}
}
b"Ch" => FormFieldType::Choice,
b"Sig" => FormFieldType::Signature,
_ => FormFieldType::Unknown,
}
} else {
FormFieldType::Unknown
}
}
Err(_) => FormFieldType::Unknown,
}
}
fn extract_string(dict: &lopdf::Dictionary, key: &[u8]) -> Option<String> {
dict.get(key).ok().and_then(|obj| match obj {
Object::String(bytes, _) => Some(String::from_utf8_lossy(bytes).to_string()),
Object::Name(bytes) => Some(String::from_utf8_lossy(bytes).to_string()),
_ => None,
})
}
fn resolve<'a>(doc: &'a Document, obj: &'a Object) -> &'a Object {
match obj {
Object::Reference(id) => doc.get_object(*id).unwrap_or(obj),
_ => obj,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_form_field_type_default() {
let dict = lopdf::Dictionary::new();
assert_eq!(determine_field_type(&dict), FormFieldType::Unknown);
}
#[test]
fn test_form_field_type_text() {
let mut dict = lopdf::Dictionary::new();
dict.set("FT", Object::Name(b"Tx".to_vec()));
assert_eq!(determine_field_type(&dict), FormFieldType::Text);
}
#[test]
fn test_form_field_type_checkbox() {
let mut dict = lopdf::Dictionary::new();
dict.set("FT", Object::Name(b"Btn".to_vec()));
assert_eq!(determine_field_type(&dict), FormFieldType::Checkbox);
}
#[test]
fn test_form_field_type_radio() {
let mut dict = lopdf::Dictionary::new();
dict.set("FT", Object::Name(b"Btn".to_vec()));
dict.set("Ff", Object::Integer(0x8000));
assert_eq!(determine_field_type(&dict), FormFieldType::RadioButton);
}
#[test]
fn test_extract_string_value() {
let mut dict = lopdf::Dictionary::new();
dict.set(
"V",
Object::String(b"Hello".to_vec(), lopdf::StringFormat::Literal),
);
assert_eq!(extract_string(&dict, b"V"), Some("Hello".to_string()));
}
#[test]
fn test_extract_string_missing() {
let dict = lopdf::Dictionary::new();
assert_eq!(extract_string(&dict, b"V"), None);
}
}