Skip to main content

zpdf_document/
forms.rs

1//! AcroForm interactive-form support (PDF 32000-1 §12.7).
2//!
3//! Two responsibilities:
4//!
5//! 1. **Field model** ([`AcroForm`]) — walks `/Root /AcroForm /Fields`,
6//!    resolving the field tree into terminal [`FormField`]s with
7//!    fully-qualified names and inherited attributes (`/FT` `/V` `/DA` `/Ff`
8//!    `/Q`). Each terminal field records its widget-annotation object ids, so a
9//!    consumer can map a page widget back to the field that owns it.
10//!
11//! 2. **Appearance generation** ([`generate_widget_appearance`]) — for text and
12//!    choice fields whose producer left no appearance stream (or set
13//!    `/NeedAppearances`), synthesizes a form XObject that draws the field
14//!    value, honoring the `/DA` font/size/color, `/Q` justification, and the
15//!    multiline / comb flags. The result feeds the interpreter's annotation
16//!    painter exactly like a real `/AP /N` stream.
17//!
18//! Buttons (checkbox/radio) keep their producer-supplied `/AP` states; only the
19//! `/AS` selection is hardened (see the annotation module). Signatures are
20//! modelled but never generate an appearance.
21
22use std::collections::{HashMap, HashSet};
23
24use zpdf_core::{Matrix, ObjectId, PdfDict, PdfName, PdfObject, Rect};
25use zpdf_parser::PdfFile;
26
27/// Hard cap on the field-tree walk depth and total field count — bounds
28/// malformed or adversarial `/Kids` graphs (in concert with the visited set).
29const MAX_FIELD_DEPTH: usize = 50;
30const MAX_FIELDS: usize = 20_000;
31
32// Field flags (`/Ff`, PDF Tables 226/228/230). Bit numbering is 1-based in the
33// spec; the shift is `bit - 1`.
34/// Common: field is read-only.
35pub const FF_READONLY: i64 = 1 << 0;
36/// Tx (bit 13): the text field holds multiple lines.
37pub const FF_MULTILINE: i64 = 1 << 12;
38/// Tx (bit 14): the value is a password — never rendered.
39pub const FF_PASSWORD: i64 = 1 << 13;
40/// Btn (bit 16): radio button (mutually-exclusive set).
41pub const FF_RADIO: i64 = 1 << 15;
42/// Btn (bit 17): push button (no persistent value).
43pub const FF_PUSHBUTTON: i64 = 1 << 16;
44/// Ch (bit 18): combo box (vs. list box).
45pub const FF_COMBO: i64 = 1 << 17;
46/// Tx (bit 25): comb formatting — `/MaxLen` equally-spaced cells.
47pub const FF_COMB: i64 = 1 << 24;
48
49/// The four AcroForm field types (`/FT`), plus an `Unknown` catch-all.
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum FieldKind {
52    Text,
53    Button,
54    Choice,
55    Signature,
56    Unknown,
57}
58
59impl FieldKind {
60    pub fn as_str(self) -> &'static str {
61        match self {
62            FieldKind::Text => "Tx",
63            FieldKind::Button => "Btn",
64            FieldKind::Choice => "Ch",
65            FieldKind::Signature => "Sig",
66            FieldKind::Unknown => "?",
67        }
68    }
69}
70
71/// A resolved field value (`/V`).
72#[derive(Debug, Clone, PartialEq)]
73pub enum FieldValue {
74    /// Text fields, combo boxes, single-select list boxes.
75    Text(String),
76    /// Button on/off state (`/Yes`, `/Off`, …).
77    Name(String),
78    /// Multi-select list box: one entry per selected option.
79    List(Vec<String>),
80}
81
82/// A terminal interactive-form field.
83#[derive(Debug, Clone)]
84pub struct FormField {
85    /// Fully-qualified name: the `/T` partial names of this field and its
86    /// ancestors joined by `.` (PDF 12.7.3.2).
87    pub name: String,
88    pub kind: FieldKind,
89    /// `/Ff` field flags (inherited).
90    pub flags: i64,
91    /// `/V` value (inherited).
92    pub value: Option<FieldValue>,
93    /// `/DA` default appearance string (inherited, falling back to the
94    /// AcroForm-level `/DA`).
95    pub default_appearance: Option<String>,
96    /// `/Q` quadding: 0 left, 1 centered, 2 right (inherited).
97    pub quadding: i64,
98    /// `/MaxLen` (text fields) — also the comb cell count.
99    pub max_len: Option<i64>,
100    /// `/Opt` `(export, display)` pairs (choice fields). For plain-string
101    /// options the two halves are equal.
102    pub options: Vec<(String, String)>,
103    /// Widget-annotation object ids that present this field on a page. When the
104    /// field dict is itself the widget (the common single-widget case), this is
105    /// the field's own object id.
106    pub widgets: Vec<ObjectId>,
107}
108
109impl FormField {
110    /// The string a renderer should draw for this field, or `None` when there
111    /// is nothing to show (no value, the `Off` button state, or empty text).
112    /// Choice values (which store the `/Opt` *export* value) are mapped to their
113    /// human-visible display label (PDF 12.7.4.4).
114    pub fn display_value(&self) -> Option<String> {
115        let s = match self.value.as_ref()? {
116            FieldValue::Text(s) => self.choice_label(s),
117            FieldValue::Name(n) if n != "Off" => n.clone(),
118            FieldValue::Name(_) => return None,
119            FieldValue::List(v) => v
120                .iter()
121                .map(|s| self.choice_label(s))
122                .collect::<Vec<_>>()
123                .join("\n"),
124        };
125        (!s.is_empty()).then_some(s)
126    }
127
128    /// Map a choice export value to its display label, or return it unchanged
129    /// (for text fields, or exports with no matching option).
130    fn choice_label(&self, value: &str) -> String {
131        if self.kind == FieldKind::Choice {
132            if let Some((_, display)) = self.options.iter().find(|(export, _)| export == value) {
133                return display.clone();
134            }
135        }
136        value.to_string()
137    }
138
139    pub fn is_multiline(&self) -> bool {
140        self.kind == FieldKind::Text && self.flags & FF_MULTILINE != 0
141    }
142
143    pub fn is_password(&self) -> bool {
144        self.kind == FieldKind::Text && self.flags & FF_PASSWORD != 0
145    }
146
147    pub fn is_comb(&self) -> bool {
148        self.kind == FieldKind::Text
149            // Comb (bit 25) is meaningful only when Multiline/Password are clear.
150            && self.flags & (FF_COMB | FF_MULTILINE | FF_PASSWORD) == FF_COMB
151            && self.max_len.unwrap_or(0) > 0
152    }
153}
154
155/// The document's interactive form.
156pub struct AcroForm {
157    /// Terminal fields, in document order.
158    pub fields: Vec<FormField>,
159    /// `/NeedAppearances`: the producer relies on the viewer to (re)generate
160    /// appearance streams.
161    pub need_appearances: bool,
162    /// `/DR /Font`: default font resources referenced by `/DA` font names.
163    pub dr_fonts: Option<PdfDict>,
164    /// Widget object id → index into `fields`.
165    widget_owner: HashMap<ObjectId, usize>,
166}
167
168impl AcroForm {
169    /// Parse the document's `/AcroForm`, or `None` when the document has no
170    /// interactive form.
171    pub fn parse(file: &PdfFile) -> Option<AcroForm> {
172        let root_ref = file.trailer.get_ref("Root").ok()?;
173        let root = file.resolve(root_ref).ok()?;
174        let root = root.as_dict().ok()?;
175        let af = deref(file, root.get("AcroForm")?);
176        let af = af.as_dict().ok()?;
177
178        let need_appearances = matches!(af.get("NeedAppearances"), Some(PdfObject::Bool(true)));
179        let dr_fonts = deref_opt(file, af.get("DR"))
180            .and_then(|dr| dr.as_dict().ok().cloned())
181            .and_then(|dr| match dr.get("Font") {
182                Some(obj) => deref(file, obj).as_dict().ok().cloned(),
183                None => None,
184            });
185
186        let root_inherited = Inherited {
187            ft: None,
188            flags: 0,
189            value: None,
190            da: af.get("DA").and_then(|o| text_string(file, o)),
191            quadding: int_value(file, af.get("Q")).unwrap_or(0),
192        };
193
194        let mut state = WalkState {
195            file,
196            fields: Vec::new(),
197            widget_owner: HashMap::new(),
198            visited: HashSet::new(),
199        };
200        if let Some(arr) = deref_array(file, af.get("Fields")) {
201            for obj in &arr {
202                if let PdfObject::Ref(r) = obj {
203                    walk_field(&mut state, *r, "", &root_inherited, 0);
204                }
205            }
206        }
207
208        Some(AcroForm {
209            fields: state.fields,
210            need_appearances,
211            dr_fonts,
212            widget_owner: state.widget_owner,
213        })
214    }
215
216    /// The terminal field presented by the given widget-annotation id.
217    pub fn field_for_widget(&self, id: ObjectId) -> Option<&FormField> {
218        self.widget_owner.get(&id).and_then(|&i| self.fields.get(i))
219    }
220}
221
222/// Attributes inherited down the field tree (PDF 12.7.3.2).
223#[derive(Clone)]
224struct Inherited {
225    ft: Option<String>,
226    flags: i64,
227    value: Option<FieldValue>,
228    da: Option<String>,
229    quadding: i64,
230}
231
232struct WalkState<'a> {
233    file: &'a PdfFile,
234    fields: Vec<FormField>,
235    widget_owner: HashMap<ObjectId, usize>,
236    visited: HashSet<ObjectId>,
237}
238
239fn walk_field(
240    state: &mut WalkState,
241    id: ObjectId,
242    parent_name: &str,
243    inherited: &Inherited,
244    depth: usize,
245) {
246    if depth > MAX_FIELD_DEPTH || state.fields.len() >= MAX_FIELDS {
247        return;
248    }
249    if !state.visited.insert(id) {
250        return; // cycle
251    }
252    let file = state.file;
253    let obj = match file.resolve(id) {
254        Ok(o) => o,
255        Err(_) => return,
256    };
257    let Ok(dict) = obj.as_dict() else { return };
258
259    // Fully-qualified name: append this node's partial name `/T` (if any),
260    // resolving one level of indirection like the other inherited attributes.
261    let partial = dict.get("T").and_then(|o| text_string(file, o));
262    let name = match &partial {
263        Some(t) if parent_name.is_empty() => t.clone(),
264        Some(t) => format!("{parent_name}.{t}"),
265        None => parent_name.to_string(),
266    };
267
268    // Merge inheritable attributes (this node's own values win).
269    let merged = Inherited {
270        ft: dict
271            .get_name("FT")
272            .ok()
273            .map(String::from)
274            .or_else(|| inherited.ft.clone()),
275        flags: int_value(file, dict.get("Ff")).unwrap_or(inherited.flags),
276        value: field_value(file, dict.get("V")).or_else(|| inherited.value.clone()),
277        da: dict
278            .get("DA")
279            .and_then(|o| text_string(file, o))
280            .or_else(|| inherited.da.clone()),
281        quadding: int_value(file, dict.get("Q")).unwrap_or(inherited.quadding),
282    };
283
284    // Classify the kids: those with a `/T` are child *fields* (recurse); those
285    // without are this terminal field's widget annotations.
286    let kids = deref_array(file, dict.get("Kids")).unwrap_or_default();
287    let mut child_fields = Vec::new();
288    let mut widget_kids = Vec::new();
289    for kid in &kids {
290        if let PdfObject::Ref(r) = kid {
291            let kid_obj = file.resolve(*r).ok();
292            let has_t = kid_obj
293                .as_ref()
294                .and_then(|o| o.as_dict().ok())
295                .map(|d| d.get("T").is_some())
296                .unwrap_or(false);
297            if has_t {
298                child_fields.push(*r);
299            } else {
300                widget_kids.push(*r);
301            }
302        }
303    }
304
305    // Descend into child fields (interior node behavior).
306    let has_child_fields = !child_fields.is_empty();
307    for r in child_fields {
308        walk_field(state, r, &name, &merged, depth + 1);
309    }
310
311    // Emit a terminal field for this node's own widgets:
312    //  - its widget-only kids, or
313    //  - the node dict itself when it has no kids at all (merged field+widget).
314    // A pure interior node (only field kids) owns no widgets and emits nothing;
315    // a *mixed* node (both field and widget kids) still maps its own widgets so
316    // their value can be rendered.
317    let widgets = if !widget_kids.is_empty() {
318        widget_kids
319    } else if has_child_fields {
320        Vec::new()
321    } else {
322        vec![id] // the field dict is itself the widget
323    };
324    if widgets.is_empty() {
325        return;
326    }
327
328    let kind = field_kind(merged.ft.as_deref());
329    let options = if kind == FieldKind::Choice {
330        parse_options(file, dict)
331    } else {
332        Vec::new()
333    };
334    let max_len = int_value(file, dict.get("MaxLen"));
335
336    let index = state.fields.len();
337    for &w in &widgets {
338        state.widget_owner.entry(w).or_insert(index);
339    }
340    state.fields.push(FormField {
341        name,
342        kind,
343        flags: merged.flags,
344        value: merged.value,
345        default_appearance: merged.da,
346        quadding: merged.quadding,
347        max_len,
348        options,
349        widgets,
350    });
351}
352
353fn field_kind(ft: Option<&str>) -> FieldKind {
354    match ft {
355        Some("Tx") => FieldKind::Text,
356        Some("Btn") => FieldKind::Button,
357        Some("Ch") => FieldKind::Choice,
358        Some("Sig") => FieldKind::Signature,
359        _ => FieldKind::Unknown,
360    }
361}
362
363/// `/Opt`: each entry is a display string, or an `[export, display]` pair. The
364/// returned `(export, display)` keeps both; plain strings export == display.
365fn parse_options(file: &PdfFile, dict: &PdfDict) -> Vec<(String, String)> {
366    let as_text = |o: &PdfObject| match o {
367        PdfObject::String(s) => Some(pdf_string_to_unicode(s.as_bytes())),
368        _ => None,
369    };
370    deref_array(file, dict.get("Opt"))
371        .map(|arr| {
372            arr.iter()
373                .map(|o| match deref(file, o) {
374                    PdfObject::String(s) => {
375                        let t = pdf_string_to_unicode(s.as_bytes());
376                        (t.clone(), t)
377                    }
378                    PdfObject::Array(a) => {
379                        let export = a.first().and_then(as_text).unwrap_or_default();
380                        let display = a.get(1).and_then(as_text).unwrap_or_else(|| export.clone());
381                        (export, display)
382                    }
383                    _ => (String::new(), String::new()),
384                })
385                .collect()
386        })
387        .unwrap_or_default()
388}
389
390// ---------------------------------------------------------------------------
391// Appearance generation
392// ---------------------------------------------------------------------------
393
394/// A synthesized appearance stream for a widget the producer left without one
395/// (or that `/NeedAppearances` asks the viewer to regenerate). Mirrors a form
396/// XObject: a `/BBox`, `/Matrix`, `/Resources` and a content byte stream.
397#[derive(Debug, Clone)]
398pub struct GeneratedAppearance {
399    pub bbox: Rect,
400    pub matrix: Matrix,
401    pub resources: PdfDict,
402    pub content: Vec<u8>,
403}
404
405/// Build a generated appearance for a widget, or `None` when nothing should be
406/// drawn (button/signature fields, empty/absent values, password fields, or a
407/// degenerate rectangle). `dr_fonts` is the AcroForm `/DR /Font` dictionary,
408/// used to resolve the `/DA` font name to a concrete font object.
409pub fn generate_widget_appearance(
410    field: &FormField,
411    rect: Rect,
412    dr_fonts: Option<&PdfDict>,
413) -> Option<GeneratedAppearance> {
414    if !matches!(field.kind, FieldKind::Text | FieldKind::Choice) || field.is_password() {
415        return None;
416    }
417    // Cap pathological value lengths — no real field shows this much, and it
418    // bounds the synthesized content size / measurement work.
419    const MAX_VALUE_CHARS: usize = 50_000;
420    let text: String = field
421        .display_value()?
422        .chars()
423        .take(MAX_VALUE_CHARS)
424        .collect();
425    let rect = rect.normalize();
426    let (w, h) = (rect.width(), rect.height());
427    if w <= 1.0 || h <= 1.0 {
428        return None;
429    }
430
431    let da = field
432        .default_appearance
433        .as_deref()
434        .unwrap_or("/Helv 0 Tf 0 g");
435    let da = parse_da(da);
436    // The font name becomes both a content-stream token and a resource key, so
437    // sanitize it to a safe charset (fall back to the standard Helvetica key).
438    let font_res_name = da
439        .font
440        .as_deref()
441        .filter(|n| is_safe_resource_name(n))
442        .unwrap_or("Helv")
443        .to_string();
444    let base_font = resolve_base_font(dr_fonts, &font_res_name);
445
446    const PAD: f64 = 2.0;
447    let comb = field.is_comb();
448    let mut body: Vec<u8> = Vec::new();
449    push_str(&mut body, "BT\n");
450
451    // List boxes (a non-combo choice) stack their selected lines like a
452    // multiline text field; combo boxes and plain text fields are single-line.
453    let stacked =
454        field.is_multiline() || (field.kind == FieldKind::Choice && field.flags & FF_COMBO == 0);
455
456    if comb {
457        comb_layout(
458            &mut body,
459            &one_line(&text),
460            &da,
461            &base_font,
462            &font_res_name,
463            w,
464            h,
465            field,
466        );
467    } else if stacked {
468        multiline_layout(
469            &mut body,
470            &text,
471            &da,
472            &base_font,
473            &font_res_name,
474            w,
475            h,
476            PAD,
477            field.quadding,
478        );
479    } else {
480        single_line_layout(
481            &mut body,
482            &one_line(&text),
483            &da,
484            &base_font,
485            &font_res_name,
486            w,
487            h,
488            PAD,
489            field.quadding,
490        );
491    }
492    push_str(&mut body, "ET\n");
493
494    // Wrap in a marked-content `/Tx` block, clipped to the field. Text/multiline
495    // use a 2pt inset; comb cells span the full width, so they clip to the BBox.
496    let inset = if comb { 0.0 } else { PAD };
497    let clip_w = (w - 2.0 * inset).max(0.0);
498    let clip_h = (h - 2.0 * inset).max(0.0);
499    let mut content: Vec<u8> = Vec::new();
500    push_str(&mut content, "/Tx BMC\nq\n");
501    push_str(&mut content, &fmt_num(inset));
502    push_str(&mut content, " ");
503    push_str(&mut content, &fmt_num(inset));
504    push_str(&mut content, " ");
505    push_str(&mut content, &fmt_num(clip_w));
506    push_str(&mut content, " ");
507    push_str(&mut content, &fmt_num(clip_h));
508    push_str(&mut content, " re W n\n");
509    content.extend_from_slice(&body);
510    push_str(&mut content, "Q\nEMC\n");
511
512    Some(GeneratedAppearance {
513        bbox: Rect::new(0.0, 0.0, w, h),
514        matrix: Matrix::identity(),
515        resources: build_resources(dr_fonts, &font_res_name),
516        content,
517    })
518}
519
520#[allow(clippy::too_many_arguments)]
521fn single_line_layout(
522    body: &mut Vec<u8>,
523    text: &str,
524    da: &DaInfo,
525    base_font: &str,
526    font_res_name: &str,
527    w: f64,
528    h: f64,
529    pad: f64,
530    quadding: i64,
531) {
532    let usable = (w - 2.0 * pad).max(1.0);
533    let mut size = if da.size > 0.0 {
534        da.size
535    } else {
536        // Auto: fit the field height (capped), then shrink to fit the width.
537        let mut s = (h * 0.7).clamp(4.0, 12.0);
538        let tw = measure(text, base_font, s);
539        if tw > usable {
540            s *= usable / tw;
541        }
542        s.max(2.0)
543    };
544    if size <= 0.0 {
545        size = 12.0;
546    }
547
548    let tw = measure(text, base_font, size);
549    let x = match quadding {
550        1 => (w - tw) / 2.0, // centered
551        2 => w - pad - tw,   // right
552        _ => pad,            // left (default)
553    };
554    let y = vertical_baseline(h, size);
555
556    emit_font(body, da, font_res_name, size);
557    emit_line(body, x, y, text);
558}
559
560#[allow(clippy::too_many_arguments)]
561fn multiline_layout(
562    body: &mut Vec<u8>,
563    text: &str,
564    da: &DaInfo,
565    base_font: &str,
566    font_res_name: &str,
567    w: f64,
568    h: f64,
569    pad: f64,
570    quadding: i64,
571) {
572    let usable = (w - 2.0 * pad).max(1.0);
573    let usable_h = (h - 2.0 * pad).max(1.0);
574
575    // Auto (DA size 0): shrink so the wrapped lines fit the box height, capped
576    // at 12pt; otherwise honor the explicit size.
577    let size = if da.size > 0.0 {
578        da.size
579    } else {
580        let mut s = 12.0_f64;
581        while s > 4.0 {
582            let lines = wrap_lines(text, base_font, s, usable);
583            if lines.len() as f64 * s * 1.15 <= usable_h {
584                break;
585            }
586            s -= 1.0;
587        }
588        s
589    };
590    let leading = size * 1.15;
591    let lines = wrap_lines(text, base_font, size, usable);
592
593    emit_font(body, da, font_res_name, size);
594    // Top line baseline sits one ascent below the top inset.
595    let mut y = h - pad - size * 0.72;
596    for line in &lines {
597        if y < -size {
598            break; // fully below the box
599        }
600        let lw = measure(line, base_font, size);
601        let x = match quadding {
602            1 => (w - lw) / 2.0, // centered
603            2 => w - pad - lw,   // right
604            _ => pad,            // left (default)
605        };
606        emit_line(body, x, y, line);
607        y -= leading;
608    }
609}
610
611#[allow(clippy::too_many_arguments)]
612fn comb_layout(
613    body: &mut Vec<u8>,
614    text: &str,
615    da: &DaInfo,
616    base_font: &str,
617    font_res_name: &str,
618    w: f64,
619    h: f64,
620    field: &FormField,
621) {
622    let n = field.max_len.unwrap_or(1).max(1) as f64;
623    let cell = w / n;
624    let size = if da.size > 0.0 {
625        da.size
626    } else {
627        ((h - 4.0).min(cell)).clamp(2.0, 12.0)
628    };
629    let y = vertical_baseline(h, size);
630
631    emit_font(body, da, font_res_name, size);
632    for (i, ch) in text.chars().take(n as usize).enumerate() {
633        let s = ch.to_string();
634        let cw = measure(&s, base_font, size);
635        let x = cell * i as f64 + (cell - cw) / 2.0;
636        emit_line(body, x, y, &s);
637    }
638}
639
640/// Baseline y that vertically centers a line of the given font size in a box of
641/// height `h`. Uses nominal Helvetica ascent/descent ratios.
642fn vertical_baseline(h: f64, size: f64) -> f64 {
643    // Glyph box spans [baseline - 0.21·size, baseline + 0.72·size]; centering
644    // its midpoint at h/2 gives baseline = h/2 - 0.255·size.
645    (h / 2.0 - 0.255 * size).max(0.0)
646}
647
648/// Emit the font/color setup: the DA color (or black) then `/Font size Tf`.
649fn emit_font(body: &mut Vec<u8>, da: &DaInfo, font_res_name: &str, size: f64) {
650    push_str(body, &format!("{}\n", da.color_ops));
651    push_str(body, &format!("/{font_res_name} {} Tf\n", fmt_num(size)));
652}
653
654/// Emit one absolutely-positioned line: `1 0 0 1 x y Tm (text) Tj`.
655fn emit_line(body: &mut Vec<u8>, x: f64, y: f64, text: &str) {
656    push_str(body, &format!("1 0 0 1 {} {} Tm\n", fmt_num(x), fmt_num(y)));
657    body.push(b'(');
658    escape_text(text, body);
659    push_str(body, ") Tj\n");
660}
661
662/// Format a coordinate/size for the content stream, mapping any non-finite
663/// value (an overflowed measurement from an adversarial DA size) to `0` so the
664/// emitted stream never contains `inf`/`-inf`/`NaN` tokens.
665fn fmt_num(v: f64) -> String {
666    if v.is_finite() {
667        format!("{v:.2}")
668    } else {
669        "0".to_string()
670    }
671}
672
673/// A font resource name safe to emit as a content-stream `/Name` token and use
674/// as a resource-dict key (no delimiters, whitespace, or `(`/`)`).
675fn is_safe_resource_name(name: &str) -> bool {
676    !name.is_empty()
677        && name.len() <= 64
678        && name
679            .chars()
680            .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '+' | '.'))
681}
682
683/// Greedy word-wrap, also breaking on explicit newlines.
684fn wrap_lines(text: &str, base_font: &str, size: f64, usable: f64) -> Vec<String> {
685    // Anti-runaway ceiling, checked at the top so a newline-heavy value cannot
686    // bypass it through the empty-paragraph fast path.
687    const MAX_LINES: usize = 1000;
688    let mut out = Vec::new();
689    for paragraph in text.split('\n') {
690        if out.len() > MAX_LINES {
691            break;
692        }
693        if paragraph.is_empty() {
694            out.push(String::new());
695            continue;
696        }
697        let mut line = String::new();
698        for word in paragraph.split(' ') {
699            let candidate = if line.is_empty() {
700                word.to_string()
701            } else {
702                format!("{line} {word}")
703            };
704            if measure(&candidate, base_font, size) <= usable || line.is_empty() {
705                line = candidate;
706            } else {
707                out.push(std::mem::take(&mut line));
708                line = word.to_string();
709            }
710        }
711        out.push(line);
712    }
713    out
714}
715
716/// Text width in text-space units at `size`, from the standard-14 metrics of
717/// `base_font` (or a 0.5-em estimate for non-standard faces).
718fn measure(text: &str, base_font: &str, size: f64) -> f64 {
719    let metrics = zpdf_font::standard_fonts::lookup(base_font);
720    let mut total = 0.0;
721    for ch in text.chars() {
722        let w1000 = match metrics {
723            Some(m) => {
724                let code = unicode_to_winansi(ch).unwrap_or(b'?') as usize;
725                m.widths[code] as f64
726            }
727            None => 500.0,
728        };
729        let w1000 = if w1000 == 0.0 { 500.0 } else { w1000 };
730        total += w1000 / 1000.0 * size;
731    }
732    total
733}
734
735/// Parsed `/DA` default-appearance pieces we care about.
736struct DaInfo {
737    font: Option<String>,
738    size: f64,
739    /// A color-setting fragment (`0 g`, `1 0 0 rg`, …) ready to emit verbatim.
740    color_ops: String,
741}
742
743/// Extract the font resource name, size, and color operators from a `/DA`
744/// content fragment (e.g. `0 0 1 rg /Helv 12 Tf`).
745fn parse_da(da: &str) -> DaInfo {
746    let mut font = None;
747    let mut size: f64 = 0.0;
748    let mut color = String::new();
749    let mut operands: Vec<&str> = Vec::new();
750
751    for tok in da.split_whitespace() {
752        match tok {
753            "Tf" => {
754                if operands.len() >= 2 {
755                    if let Some(name) = operands[operands.len() - 2].strip_prefix('/') {
756                        font = Some(name.to_string());
757                    }
758                    size = operands[operands.len() - 1].parse().unwrap_or(0.0);
759                }
760                operands.clear();
761            }
762            "g" if !operands.is_empty() => {
763                if let Some(c) = da_color(&operands, 1, "g") {
764                    color = c;
765                }
766                operands.clear();
767            }
768            "rg" if operands.len() >= 3 => {
769                if let Some(c) = da_color(&operands, 3, "rg") {
770                    color = c;
771                }
772                operands.clear();
773            }
774            "k" if operands.len() >= 4 => {
775                if let Some(c) = da_color(&operands, 4, "k") {
776                    color = c;
777                }
778                operands.clear();
779            }
780            other => operands.push(other),
781        }
782    }
783
784    // Clamp the font size to a sane ceiling so an adversarial DA (`/Helv 1e308
785    // Tf`) cannot overflow downstream width math to infinity.
786    const MAX_FONT_SIZE: f64 = 1000.0;
787    DaInfo {
788        font,
789        size: if size.is_finite() && size >= 0.0 {
790            size.min(MAX_FONT_SIZE)
791        } else {
792            0.0
793        },
794        color_ops: if color.is_empty() {
795            "0 g".to_string()
796        } else {
797            color
798        },
799    }
800}
801
802/// Build a validated color-setting operator from the last `n` DA operands,
803/// accepting only finite numbers (clamped to `[0,1]`). Returns `None` when any
804/// operand is not a number — so adversarial tokens never reach the content
805/// stream verbatim.
806fn da_color(operands: &[&str], n: usize, op: &str) -> Option<String> {
807    let vals: Option<Vec<f64>> = operands[operands.len() - n..]
808        .iter()
809        .map(|t| {
810            t.parse::<f64>()
811                .ok()
812                .filter(|v| v.is_finite())
813                .map(|v| v.clamp(0.0, 1.0))
814        })
815        .collect();
816    let parts: Vec<String> = vals?.iter().map(|v| format!("{v:.4}")).collect();
817    Some(format!("{} {op}", parts.join(" ")))
818}
819
820/// Resolve a `/DA` font resource name to a base-font name for metrics: prefer
821/// the `/DR` font's `/BaseFont`, else map the conventional Acrobat resource
822/// name (`Helv`, `Cour`, …), else Helvetica.
823fn resolve_base_font(dr_fonts: Option<&PdfDict>, res_name: &str) -> String {
824    if let Some(dr) = dr_fonts {
825        if let Some(PdfObject::Dict(fd)) = dr.get(res_name) {
826            if let Ok(bf) = fd.get_name("BaseFont") {
827                return strip_subset_prefix(bf).to_string();
828            }
829        }
830    }
831    acrobat_standard_name(res_name).to_string()
832}
833
834/// The conventional AcroForm `/DR` resource names for the standard-14 fonts.
835fn acrobat_standard_name(res_name: &str) -> &str {
836    match res_name {
837        "Helv" => "Helvetica",
838        "HeBO" | "HeBo" => "Helvetica-Bold",
839        "HeOb" => "Helvetica-Oblique",
840        "Cour" => "Courier",
841        "CoBO" | "CoBo" => "Courier-Bold",
842        "TiRo" => "Times-Roman",
843        "TiBo" => "Times-Bold",
844        "TiIt" => "Times-Italic",
845        "Symb" => "Symbol",
846        "ZaDb" => "ZapfDingbats",
847        other => other,
848    }
849}
850
851fn strip_subset_prefix(name: &str) -> &str {
852    // "ABCDEF+Helvetica" → "Helvetica"
853    name.rsplit('+').next().unwrap_or(name)
854}
855
856/// Build the appearance `/Resources`: a `/Font` dict mapping the DA font name to
857/// the `/DR` font object (if any) or a synthesized standard Helvetica.
858fn build_resources(dr_fonts: Option<&PdfDict>, font_res_name: &str) -> PdfDict {
859    let font_entry = dr_fonts
860        .and_then(|dr| dr.get(font_res_name).cloned())
861        .unwrap_or_else(|| PdfObject::Dict(helvetica_font_dict()));
862
863    let mut fonts = PdfDict::new();
864    fonts.insert(PdfName::new(font_res_name), font_entry);
865    let mut res = PdfDict::new();
866    res.insert(PdfName::new("Font"), PdfObject::Dict(fonts));
867    res
868}
869
870fn helvetica_font_dict() -> PdfDict {
871    let mut d = PdfDict::new();
872    d.insert(PdfName::new("Type"), PdfObject::Name(PdfName::new("Font")));
873    d.insert(
874        PdfName::new("Subtype"),
875        PdfObject::Name(PdfName::new("Type1")),
876    );
877    d.insert(
878        PdfName::new("BaseFont"),
879        PdfObject::Name(PdfName::new("Helvetica")),
880    );
881    d.insert(
882        PdfName::new("Encoding"),
883        PdfObject::Name(PdfName::new("WinAnsiEncoding")),
884    );
885    d
886}
887
888/// Escape a string into a PDF literal-string body (`(`/`)`/`\` and CR), encoding
889/// each character as its WinAnsiEncoding byte (the declared appearance-font
890/// encoding); characters with no WinAnsi byte fall back to `?`.
891fn escape_text(s: &str, out: &mut Vec<u8>) {
892    for ch in s.chars() {
893        let b = unicode_to_winansi(ch).unwrap_or(b'?');
894        match b {
895            b'\\' => out.extend_from_slice(b"\\\\"),
896            b'(' => out.extend_from_slice(b"\\("),
897            b')' => out.extend_from_slice(b"\\)"),
898            b'\r' => out.extend_from_slice(b"\\r"),
899            _ => out.push(b),
900        }
901    }
902}
903
904/// Map a Unicode scalar to its WinAnsiEncoding byte. ASCII (0x20–0x7E) and
905/// Latin-1 (0xA0–0xFF) are identity; the WinAnsi C1 block (0x80–0x9F) holds
906/// typographic punctuation / currency whose Unicode code points are ≥ 0x100.
907/// Returns `None` for code points with no WinAnsi representation.
908fn unicode_to_winansi(ch: char) -> Option<u8> {
909    let cp = ch as u32;
910    match cp {
911        0x20..=0x7E | 0xA0..=0xFF => Some(cp as u8),
912        0x20AC => Some(0x80),
913        0x201A => Some(0x82),
914        0x0192 => Some(0x83),
915        0x201E => Some(0x84),
916        0x2026 => Some(0x85),
917        0x2020 => Some(0x86),
918        0x2021 => Some(0x87),
919        0x02C6 => Some(0x88),
920        0x2030 => Some(0x89),
921        0x0160 => Some(0x8A),
922        0x2039 => Some(0x8B),
923        0x0152 => Some(0x8C),
924        0x017D => Some(0x8E),
925        0x2018 => Some(0x91),
926        0x2019 => Some(0x92),
927        0x201C => Some(0x93),
928        0x201D => Some(0x94),
929        0x2022 => Some(0x95),
930        0x2013 => Some(0x96),
931        0x2014 => Some(0x97),
932        0x02DC => Some(0x98),
933        0x2122 => Some(0x99),
934        0x0161 => Some(0x9A),
935        0x203A => Some(0x9B),
936        0x0153 => Some(0x9C),
937        0x017E => Some(0x9E),
938        0x0178 => Some(0x9F),
939        _ => None,
940    }
941}
942
943fn push_str(out: &mut Vec<u8>, s: &str) {
944    out.extend_from_slice(s.as_bytes());
945}
946
947/// Collapse line breaks and tabs to spaces for single-line / comb rendering.
948fn one_line(s: &str) -> String {
949    s.chars()
950        .map(|c| {
951            if c == '\n' || c == '\r' || c == '\t' {
952                ' '
953            } else {
954                c
955            }
956        })
957        .collect()
958}
959
960// ---------------------------------------------------------------------------
961// Small resolution helpers
962// ---------------------------------------------------------------------------
963
964/// Resolve one level of indirection, returning `Null` on failure.
965fn deref(file: &PdfFile, obj: &PdfObject) -> PdfObject {
966    match obj {
967        PdfObject::Ref(r) => file.resolve(*r).unwrap_or(PdfObject::Null),
968        other => other.clone(),
969    }
970}
971
972fn deref_opt(file: &PdfFile, obj: Option<&PdfObject>) -> Option<PdfObject> {
973    obj.map(|o| deref(file, o))
974}
975
976fn deref_array(file: &PdfFile, obj: Option<&PdfObject>) -> Option<Vec<PdfObject>> {
977    match deref(file, obj?) {
978        PdfObject::Array(a) => Some(a),
979        _ => None,
980    }
981}
982
983/// A string's text, resolving one level of indirection.
984fn text_string(file: &PdfFile, obj: &PdfObject) -> Option<String> {
985    match deref(file, obj) {
986        PdfObject::String(s) => Some(pdf_string_to_unicode(s.as_bytes())),
987        _ => None,
988    }
989}
990
991fn field_value(file: &PdfFile, obj: Option<&PdfObject>) -> Option<FieldValue> {
992    match deref(file, obj?) {
993        PdfObject::String(s) => Some(FieldValue::Text(pdf_string_to_unicode(s.as_bytes()))),
994        PdfObject::Name(n) => Some(FieldValue::Name(n.0)),
995        PdfObject::Array(a) => {
996            let items: Vec<String> = a
997                .iter()
998                .filter_map(|o| match o {
999                    PdfObject::String(s) => Some(pdf_string_to_unicode(s.as_bytes())),
1000                    _ => None,
1001                })
1002                .collect();
1003            (!items.is_empty()).then_some(FieldValue::List(items))
1004        }
1005        _ => None,
1006    }
1007}
1008
1009fn int_value(file: &PdfFile, obj: Option<&PdfObject>) -> Option<i64> {
1010    match deref(file, obj?) {
1011        PdfObject::Integer(n) => Some(n),
1012        PdfObject::Real(r) => Some(r as i64),
1013        _ => None,
1014    }
1015}
1016
1017/// Decode a PDF text string: UTF-16BE when it carries the `FE FF` BOM, else the
1018/// bytes as PDFDocEncoding (approximated by Latin-1 for the common range).
1019fn pdf_string_to_unicode(bytes: &[u8]) -> String {
1020    if bytes.len() >= 2 && bytes[0] == 0xFE && bytes[1] == 0xFF {
1021        let units: Vec<u16> = bytes[2..]
1022            .chunks_exact(2)
1023            .map(|c| u16::from_be_bytes([c[0], c[1]]))
1024            .collect();
1025        String::from_utf16_lossy(&units)
1026    } else {
1027        bytes.iter().map(|&b| b as char).collect()
1028    }
1029}
1030
1031#[cfg(test)]
1032mod tests {
1033    use super::*;
1034    use crate::test_util::build_pdf;
1035    use crate::PdfDocument;
1036
1037    #[test]
1038    fn field_tree_names_inheritance_and_widgets() {
1039        let doc = PdfDocument::open(build_pdf(&[
1040            "<< /Type /Catalog /Pages 2 0 R /AcroForm 4 0 R >>",
1041            "<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
1042            "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] >>",
1043            "<< /Fields [5 0 R] /DA (/Helv 0 Tf 0 g) /DR << /Font << /Helv 8 0 R >> >> >>",
1044            // Parent field carries /FT and is the inheritance source.
1045            "<< /T (address) /FT /Tx /Kids [6 0 R 7 0 R] >>",
1046            "<< /T (street) /V (Main St) >>",
1047            "<< /T (city) /V (Springfield) /Q 1 >>",
1048            "<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>",
1049        ]))
1050        .expect("open");
1051
1052        let form = doc.acro_form().expect("acroform");
1053        assert!(!form.need_appearances);
1054        assert!(form.dr_fonts.is_some());
1055        assert_eq!(form.fields.len(), 2);
1056
1057        let street = &form.fields[0];
1058        assert_eq!(street.name, "address.street");
1059        assert_eq!(street.kind, FieldKind::Text); // inherited /FT
1060        assert_eq!(street.value, Some(FieldValue::Text("Main St".into())));
1061        assert_eq!(street.default_appearance.as_deref(), Some("/Helv 0 Tf 0 g")); // inherited /DA
1062        assert_eq!(street.quadding, 0);
1063        // The terminal field with no widget kids is itself the widget.
1064        assert_eq!(street.widgets, vec![ObjectId(6, 0)]);
1065        assert_eq!(
1066            form.field_for_widget(ObjectId(6, 0))
1067                .map(|f| f.name.as_str()),
1068            Some("address.street")
1069        );
1070
1071        let city = &form.fields[1];
1072        assert_eq!(city.name, "address.city");
1073        assert_eq!(city.quadding, 1); // own /Q overrides
1074    }
1075
1076    #[test]
1077    fn single_widget_field_and_button_value() {
1078        let doc = PdfDocument::open(build_pdf(&[
1079            "<< /Type /Catalog /Pages 2 0 R /AcroForm 4 0 R >>",
1080            "<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
1081            "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] /Annots [5 0 R] >>",
1082            "<< /Fields [5 0 R] /NeedAppearances true >>",
1083            // A checkbox that is its own widget (merged field+annotation).
1084            "<< /T (agree) /FT /Btn /V /Yes /AS /Yes /Subtype /Widget /Rect [10 10 30 30] >>",
1085        ]))
1086        .expect("open");
1087
1088        let form = doc.acro_form().expect("acroform");
1089        assert!(form.need_appearances);
1090        assert_eq!(form.fields.len(), 1);
1091        let f = &form.fields[0];
1092        assert_eq!(f.name, "agree");
1093        assert_eq!(f.kind, FieldKind::Button);
1094        assert_eq!(f.value, Some(FieldValue::Name("Yes".into())));
1095        // A button never generates an appearance.
1096        assert!(generate_widget_appearance(f, Rect::new(10.0, 10.0, 30.0, 30.0), None).is_none());
1097    }
1098
1099    #[test]
1100    fn no_acroform_returns_none() {
1101        let doc = PdfDocument::open(build_pdf(&[
1102            "<< /Type /Catalog /Pages 2 0 R >>",
1103            "<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
1104            "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] >>",
1105        ]))
1106        .expect("open");
1107        assert!(doc.acro_form().is_none());
1108    }
1109
1110    #[test]
1111    fn da_parsing_extracts_font_size_color() {
1112        // Color operands are validated and re-emitted with fixed precision.
1113        let da = parse_da("0 0 1 rg /Helv 12 Tf");
1114        assert_eq!(da.font.as_deref(), Some("Helv"));
1115        assert_eq!(da.size, 12.0);
1116        assert_eq!(da.color_ops, "0.0000 0.0000 1.0000 rg");
1117
1118        let da = parse_da("/Cour 0 Tf 0.2 g");
1119        assert_eq!(da.font.as_deref(), Some("Cour"));
1120        assert_eq!(da.size, 0.0);
1121        assert_eq!(da.color_ops, "0.2000 g");
1122
1123        // Missing color defaults to black.
1124        let da = parse_da("/Helv 10 Tf");
1125        assert_eq!(da.color_ops, "0 g");
1126
1127        // Adversarial size is clamped; injected non-numeric color is dropped.
1128        let da = parse_da("/Helv 1e308 Tf");
1129        assert_eq!(da.size, 1000.0);
1130        let da = parse_da("1)Tj/Evil 0 0 rg /Helv 10 Tf");
1131        assert_eq!(da.color_ops, "0 g"); // bad operand → color rejected → default
1132    }
1133
1134    #[test]
1135    fn winansi_punctuation_round_trips() {
1136        // Smart quote / em dash / euro map to their WinAnsi bytes, not '?'.
1137        assert_eq!(unicode_to_winansi('\u{2019}'), Some(0x92));
1138        assert_eq!(unicode_to_winansi('\u{2014}'), Some(0x97));
1139        assert_eq!(unicode_to_winansi('\u{20AC}'), Some(0x80));
1140        assert_eq!(unicode_to_winansi('A'), Some(0x41));
1141        assert_eq!(unicode_to_winansi('\u{00E9}'), Some(0xE9)); // é (Latin-1)
1142        assert_eq!(unicode_to_winansi('\u{4E2D}'), None); // CJK → fallback
1143    }
1144
1145    #[test]
1146    fn non_finite_numbers_never_reach_output() {
1147        assert_eq!(fmt_num(f64::INFINITY), "0");
1148        assert_eq!(fmt_num(f64::NAN), "0");
1149        assert_eq!(fmt_num(-1.5), "-1.50");
1150    }
1151
1152    #[test]
1153    fn utf16be_value_is_decoded() {
1154        // BOM + "Hi" in UTF-16BE.
1155        let bytes = [0xFE, 0xFF, 0x00, b'H', 0x00, b'i'];
1156        assert_eq!(pdf_string_to_unicode(&bytes), "Hi");
1157    }
1158
1159    #[test]
1160    fn escape_handles_parens_and_backslash() {
1161        let mut out = Vec::new();
1162        escape_text("a(b)\\c", &mut out);
1163        assert_eq!(out, b"a\\(b\\)\\\\c");
1164    }
1165
1166    #[test]
1167    fn standard_name_mapping() {
1168        assert_eq!(acrobat_standard_name("Helv"), "Helvetica");
1169        assert_eq!(acrobat_standard_name("ZaDb"), "ZapfDingbats");
1170        assert_eq!(acrobat_standard_name("F1"), "F1");
1171    }
1172
1173    #[test]
1174    fn choice_value_maps_export_to_display_label() {
1175        let f = FormField {
1176            name: "month".into(),
1177            kind: FieldKind::Choice,
1178            flags: 0,
1179            value: Some(FieldValue::Text("01".into())),
1180            default_appearance: None,
1181            quadding: 0,
1182            max_len: None,
1183            options: vec![
1184                ("01".into(), "January".into()),
1185                ("02".into(), "February".into()),
1186            ],
1187            widgets: vec![],
1188        };
1189        // /V holds the export value "01"; the rendered label is "January".
1190        assert_eq!(f.display_value().as_deref(), Some("January"));
1191        // An export with no matching option falls back to the raw value.
1192        let f2 = FormField {
1193            value: Some(FieldValue::Text("99".into())),
1194            ..f
1195        };
1196        assert_eq!(f2.display_value().as_deref(), Some("99"));
1197    }
1198
1199    #[test]
1200    fn comb_is_suppressed_when_multiline() {
1201        let base = FormField {
1202            name: "x".into(),
1203            kind: FieldKind::Text,
1204            flags: FF_COMB | FF_MULTILINE,
1205            value: Some(FieldValue::Text("AB".into())),
1206            default_appearance: None,
1207            quadding: 0,
1208            max_len: Some(4),
1209            options: vec![],
1210            widgets: vec![],
1211        };
1212        // Comb (bit 25) is meaningless with Multiline set.
1213        assert!(!base.is_comb());
1214        assert!(base.is_multiline());
1215    }
1216
1217    #[test]
1218    fn comb_field_detection() {
1219        let f = FormField {
1220            name: "x".into(),
1221            kind: FieldKind::Text,
1222            flags: FF_COMB,
1223            value: Some(FieldValue::Text("AB".into())),
1224            default_appearance: None,
1225            quadding: 0,
1226            max_len: Some(4),
1227            options: vec![],
1228            widgets: vec![],
1229        };
1230        assert!(f.is_comb());
1231        // Comb without MaxLen is not comb.
1232        let f2 = FormField {
1233            max_len: None,
1234            ..f.clone()
1235        };
1236        assert!(!f2.is_comb());
1237    }
1238
1239    #[test]
1240    fn generated_appearance_draws_value() {
1241        let f = FormField {
1242            name: "name".into(),
1243            kind: FieldKind::Text,
1244            flags: 0,
1245            value: Some(FieldValue::Text("Test".into())),
1246            default_appearance: Some("/Helv 12 Tf 0 g".into()),
1247            quadding: 0,
1248            max_len: None,
1249            options: vec![],
1250            widgets: vec![],
1251        };
1252        let ap = generate_widget_appearance(&f, Rect::new(0.0, 0.0, 200.0, 40.0), None)
1253            .expect("appearance");
1254        assert_eq!(ap.bbox, Rect::new(0.0, 0.0, 200.0, 40.0));
1255        let s = String::from_utf8_lossy(&ap.content);
1256        assert!(s.contains("/Tx BMC"));
1257        assert!(s.contains("Tf"));
1258        assert!(s.contains("(Test) Tj"));
1259        // Resources define the DA font name.
1260        assert!(ap.resources.get("Font").is_some());
1261    }
1262
1263    #[test]
1264    fn empty_and_button_values_generate_nothing() {
1265        let base = FormField {
1266            name: "x".into(),
1267            kind: FieldKind::Text,
1268            flags: 0,
1269            value: Some(FieldValue::Text(String::new())),
1270            default_appearance: None,
1271            quadding: 0,
1272            max_len: None,
1273            options: vec![],
1274            widgets: vec![],
1275        };
1276        assert!(
1277            generate_widget_appearance(&base, Rect::new(0.0, 0.0, 100.0, 20.0), None).is_none()
1278        );
1279
1280        let button = FormField {
1281            kind: FieldKind::Button,
1282            value: Some(FieldValue::Name("Yes".into())),
1283            ..base.clone()
1284        };
1285        assert!(
1286            generate_widget_appearance(&button, Rect::new(0.0, 0.0, 100.0, 20.0), None).is_none()
1287        );
1288
1289        let password = FormField {
1290            flags: FF_PASSWORD,
1291            value: Some(FieldValue::Text("secret".into())),
1292            ..base
1293        };
1294        assert!(
1295            generate_widget_appearance(&password, Rect::new(0.0, 0.0, 100.0, 20.0), None).is_none()
1296        );
1297    }
1298}