1use std::collections::{HashMap, HashSet};
23
24use zpdf_core::{Matrix, ObjectId, PdfDict, PdfName, PdfObject, Rect};
25use zpdf_parser::PdfFile;
26
27const MAX_FIELD_DEPTH: usize = 50;
30const MAX_FIELDS: usize = 20_000;
31
32pub const FF_READONLY: i64 = 1 << 0;
36pub const FF_MULTILINE: i64 = 1 << 12;
38pub const FF_PASSWORD: i64 = 1 << 13;
40pub const FF_RADIO: i64 = 1 << 15;
42pub const FF_PUSHBUTTON: i64 = 1 << 16;
44pub const FF_COMBO: i64 = 1 << 17;
46pub const FF_COMB: i64 = 1 << 24;
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum FieldKind {
52 Text,
53 Button,
54 Choice,
55 Signature,
56 Unknown,
57}
58
59impl FieldKind {
60 pub fn as_str(self) -> &'static str {
61 match self {
62 FieldKind::Text => "Tx",
63 FieldKind::Button => "Btn",
64 FieldKind::Choice => "Ch",
65 FieldKind::Signature => "Sig",
66 FieldKind::Unknown => "?",
67 }
68 }
69}
70
71#[derive(Debug, Clone, PartialEq)]
73pub enum FieldValue {
74 Text(String),
76 Name(String),
78 List(Vec<String>),
80}
81
82#[derive(Debug, Clone)]
84pub struct FormField {
85 pub name: String,
88 pub kind: FieldKind,
89 pub flags: i64,
91 pub value: Option<FieldValue>,
93 pub default_appearance: Option<String>,
96 pub quadding: i64,
98 pub max_len: Option<i64>,
100 pub options: Vec<(String, String)>,
103 pub widgets: Vec<ObjectId>,
107}
108
109impl FormField {
110 pub fn display_value(&self) -> Option<String> {
115 let s = match self.value.as_ref()? {
116 FieldValue::Text(s) => self.choice_label(s),
117 FieldValue::Name(n) if n != "Off" => n.clone(),
118 FieldValue::Name(_) => return None,
119 FieldValue::List(v) => v
120 .iter()
121 .map(|s| self.choice_label(s))
122 .collect::<Vec<_>>()
123 .join("\n"),
124 };
125 (!s.is_empty()).then_some(s)
126 }
127
128 fn choice_label(&self, value: &str) -> String {
131 if self.kind == FieldKind::Choice {
132 if let Some((_, display)) = self.options.iter().find(|(export, _)| export == value) {
133 return display.clone();
134 }
135 }
136 value.to_string()
137 }
138
139 pub fn is_multiline(&self) -> bool {
140 self.kind == FieldKind::Text && self.flags & FF_MULTILINE != 0
141 }
142
143 pub fn is_password(&self) -> bool {
144 self.kind == FieldKind::Text && self.flags & FF_PASSWORD != 0
145 }
146
147 pub fn is_comb(&self) -> bool {
148 self.kind == FieldKind::Text
149 && self.flags & (FF_COMB | FF_MULTILINE | FF_PASSWORD) == FF_COMB
151 && self.max_len.unwrap_or(0) > 0
152 }
153}
154
155pub struct AcroForm {
157 pub fields: Vec<FormField>,
159 pub need_appearances: bool,
162 pub dr_fonts: Option<PdfDict>,
164 widget_owner: HashMap<ObjectId, usize>,
166}
167
168impl AcroForm {
169 pub fn parse(file: &PdfFile) -> Option<AcroForm> {
172 let root_ref = file.trailer.get_ref("Root").ok()?;
173 let root = file.resolve(root_ref).ok()?;
174 let root = root.as_dict().ok()?;
175 let af = deref(file, root.get("AcroForm")?);
176 let af = af.as_dict().ok()?;
177
178 let need_appearances = matches!(af.get("NeedAppearances"), Some(PdfObject::Bool(true)));
179 let dr_fonts = deref_opt(file, af.get("DR"))
180 .and_then(|dr| dr.as_dict().ok().cloned())
181 .and_then(|dr| match dr.get("Font") {
182 Some(obj) => deref(file, obj).as_dict().ok().cloned(),
183 None => None,
184 });
185
186 let root_inherited = Inherited {
187 ft: None,
188 flags: 0,
189 value: None,
190 da: af.get("DA").and_then(|o| text_string(file, o)),
191 quadding: int_value(file, af.get("Q")).unwrap_or(0),
192 };
193
194 let mut state = WalkState {
195 file,
196 fields: Vec::new(),
197 widget_owner: HashMap::new(),
198 visited: HashSet::new(),
199 };
200 if let Some(arr) = deref_array(file, af.get("Fields")) {
201 for obj in &arr {
202 if let PdfObject::Ref(r) = obj {
203 walk_field(&mut state, *r, "", &root_inherited, 0);
204 }
205 }
206 }
207
208 Some(AcroForm {
209 fields: state.fields,
210 need_appearances,
211 dr_fonts,
212 widget_owner: state.widget_owner,
213 })
214 }
215
216 pub fn field_for_widget(&self, id: ObjectId) -> Option<&FormField> {
218 self.widget_owner.get(&id).and_then(|&i| self.fields.get(i))
219 }
220}
221
222#[derive(Clone)]
224struct Inherited {
225 ft: Option<String>,
226 flags: i64,
227 value: Option<FieldValue>,
228 da: Option<String>,
229 quadding: i64,
230}
231
232struct WalkState<'a> {
233 file: &'a PdfFile,
234 fields: Vec<FormField>,
235 widget_owner: HashMap<ObjectId, usize>,
236 visited: HashSet<ObjectId>,
237}
238
239fn walk_field(
240 state: &mut WalkState,
241 id: ObjectId,
242 parent_name: &str,
243 inherited: &Inherited,
244 depth: usize,
245) {
246 if depth > MAX_FIELD_DEPTH || state.fields.len() >= MAX_FIELDS {
247 return;
248 }
249 if !state.visited.insert(id) {
250 return; }
252 let file = state.file;
253 let obj = match file.resolve(id) {
254 Ok(o) => o,
255 Err(_) => return,
256 };
257 let Ok(dict) = obj.as_dict() else { return };
258
259 let partial = dict.get("T").and_then(|o| text_string(file, o));
262 let name = match &partial {
263 Some(t) if parent_name.is_empty() => t.clone(),
264 Some(t) => format!("{parent_name}.{t}"),
265 None => parent_name.to_string(),
266 };
267
268 let merged = Inherited {
270 ft: dict
271 .get_name("FT")
272 .ok()
273 .map(String::from)
274 .or_else(|| inherited.ft.clone()),
275 flags: int_value(file, dict.get("Ff")).unwrap_or(inherited.flags),
276 value: field_value(file, dict.get("V")).or_else(|| inherited.value.clone()),
277 da: dict
278 .get("DA")
279 .and_then(|o| text_string(file, o))
280 .or_else(|| inherited.da.clone()),
281 quadding: int_value(file, dict.get("Q")).unwrap_or(inherited.quadding),
282 };
283
284 let kids = deref_array(file, dict.get("Kids")).unwrap_or_default();
287 let mut child_fields = Vec::new();
288 let mut widget_kids = Vec::new();
289 for kid in &kids {
290 if let PdfObject::Ref(r) = kid {
291 let kid_obj = file.resolve(*r).ok();
292 let has_t = kid_obj
293 .as_ref()
294 .and_then(|o| o.as_dict().ok())
295 .map(|d| d.get("T").is_some())
296 .unwrap_or(false);
297 if has_t {
298 child_fields.push(*r);
299 } else {
300 widget_kids.push(*r);
301 }
302 }
303 }
304
305 let has_child_fields = !child_fields.is_empty();
307 for r in child_fields {
308 walk_field(state, r, &name, &merged, depth + 1);
309 }
310
311 let widgets = if !widget_kids.is_empty() {
318 widget_kids
319 } else if has_child_fields {
320 Vec::new()
321 } else {
322 vec![id] };
324 if widgets.is_empty() {
325 return;
326 }
327
328 let kind = field_kind(merged.ft.as_deref());
329 let options = if kind == FieldKind::Choice {
330 parse_options(file, dict)
331 } else {
332 Vec::new()
333 };
334 let max_len = int_value(file, dict.get("MaxLen"));
335
336 let index = state.fields.len();
337 for &w in &widgets {
338 state.widget_owner.entry(w).or_insert(index);
339 }
340 state.fields.push(FormField {
341 name,
342 kind,
343 flags: merged.flags,
344 value: merged.value,
345 default_appearance: merged.da,
346 quadding: merged.quadding,
347 max_len,
348 options,
349 widgets,
350 });
351}
352
353fn field_kind(ft: Option<&str>) -> FieldKind {
354 match ft {
355 Some("Tx") => FieldKind::Text,
356 Some("Btn") => FieldKind::Button,
357 Some("Ch") => FieldKind::Choice,
358 Some("Sig") => FieldKind::Signature,
359 _ => FieldKind::Unknown,
360 }
361}
362
363fn parse_options(file: &PdfFile, dict: &PdfDict) -> Vec<(String, String)> {
366 let as_text = |o: &PdfObject| match o {
367 PdfObject::String(s) => Some(pdf_string_to_unicode(s.as_bytes())),
368 _ => None,
369 };
370 deref_array(file, dict.get("Opt"))
371 .map(|arr| {
372 arr.iter()
373 .map(|o| match deref(file, o) {
374 PdfObject::String(s) => {
375 let t = pdf_string_to_unicode(s.as_bytes());
376 (t.clone(), t)
377 }
378 PdfObject::Array(a) => {
379 let export = a.first().and_then(as_text).unwrap_or_default();
380 let display = a.get(1).and_then(as_text).unwrap_or_else(|| export.clone());
381 (export, display)
382 }
383 _ => (String::new(), String::new()),
384 })
385 .collect()
386 })
387 .unwrap_or_default()
388}
389
390#[derive(Debug, Clone)]
398pub struct GeneratedAppearance {
399 pub bbox: Rect,
400 pub matrix: Matrix,
401 pub resources: PdfDict,
402 pub content: Vec<u8>,
403}
404
405pub fn generate_widget_appearance(
410 field: &FormField,
411 rect: Rect,
412 dr_fonts: Option<&PdfDict>,
413) -> Option<GeneratedAppearance> {
414 if !matches!(field.kind, FieldKind::Text | FieldKind::Choice) || field.is_password() {
415 return None;
416 }
417 const MAX_VALUE_CHARS: usize = 50_000;
420 let text: String = field
421 .display_value()?
422 .chars()
423 .take(MAX_VALUE_CHARS)
424 .collect();
425 let rect = rect.normalize();
426 let (w, h) = (rect.width(), rect.height());
427 if w <= 1.0 || h <= 1.0 {
428 return None;
429 }
430
431 let da = field
432 .default_appearance
433 .as_deref()
434 .unwrap_or("/Helv 0 Tf 0 g");
435 let da = parse_da(da);
436 let font_res_name = da
439 .font
440 .as_deref()
441 .filter(|n| is_safe_resource_name(n))
442 .unwrap_or("Helv")
443 .to_string();
444 let base_font = resolve_base_font(dr_fonts, &font_res_name);
445
446 const PAD: f64 = 2.0;
447 let comb = field.is_comb();
448 let mut body: Vec<u8> = Vec::new();
449 push_str(&mut body, "BT\n");
450
451 let stacked =
454 field.is_multiline() || (field.kind == FieldKind::Choice && field.flags & FF_COMBO == 0);
455
456 if comb {
457 comb_layout(
458 &mut body,
459 &one_line(&text),
460 &da,
461 &base_font,
462 &font_res_name,
463 w,
464 h,
465 field,
466 );
467 } else if stacked {
468 multiline_layout(
469 &mut body,
470 &text,
471 &da,
472 &base_font,
473 &font_res_name,
474 w,
475 h,
476 PAD,
477 field.quadding,
478 );
479 } else {
480 single_line_layout(
481 &mut body,
482 &one_line(&text),
483 &da,
484 &base_font,
485 &font_res_name,
486 w,
487 h,
488 PAD,
489 field.quadding,
490 );
491 }
492 push_str(&mut body, "ET\n");
493
494 let inset = if comb { 0.0 } else { PAD };
497 let clip_w = (w - 2.0 * inset).max(0.0);
498 let clip_h = (h - 2.0 * inset).max(0.0);
499 let mut content: Vec<u8> = Vec::new();
500 push_str(&mut content, "/Tx BMC\nq\n");
501 push_str(&mut content, &fmt_num(inset));
502 push_str(&mut content, " ");
503 push_str(&mut content, &fmt_num(inset));
504 push_str(&mut content, " ");
505 push_str(&mut content, &fmt_num(clip_w));
506 push_str(&mut content, " ");
507 push_str(&mut content, &fmt_num(clip_h));
508 push_str(&mut content, " re W n\n");
509 content.extend_from_slice(&body);
510 push_str(&mut content, "Q\nEMC\n");
511
512 Some(GeneratedAppearance {
513 bbox: Rect::new(0.0, 0.0, w, h),
514 matrix: Matrix::identity(),
515 resources: build_resources(dr_fonts, &font_res_name),
516 content,
517 })
518}
519
520#[allow(clippy::too_many_arguments)]
521fn single_line_layout(
522 body: &mut Vec<u8>,
523 text: &str,
524 da: &DaInfo,
525 base_font: &str,
526 font_res_name: &str,
527 w: f64,
528 h: f64,
529 pad: f64,
530 quadding: i64,
531) {
532 let usable = (w - 2.0 * pad).max(1.0);
533 let mut size = if da.size > 0.0 {
534 da.size
535 } else {
536 let mut s = (h * 0.7).clamp(4.0, 12.0);
538 let tw = measure(text, base_font, s);
539 if tw > usable {
540 s *= usable / tw;
541 }
542 s.max(2.0)
543 };
544 if size <= 0.0 {
545 size = 12.0;
546 }
547
548 let tw = measure(text, base_font, size);
549 let x = match quadding {
550 1 => (w - tw) / 2.0, 2 => w - pad - tw, _ => pad, };
554 let y = vertical_baseline(h, size);
555
556 emit_font(body, da, font_res_name, size);
557 emit_line(body, x, y, text);
558}
559
560#[allow(clippy::too_many_arguments)]
561fn multiline_layout(
562 body: &mut Vec<u8>,
563 text: &str,
564 da: &DaInfo,
565 base_font: &str,
566 font_res_name: &str,
567 w: f64,
568 h: f64,
569 pad: f64,
570 quadding: i64,
571) {
572 let usable = (w - 2.0 * pad).max(1.0);
573 let usable_h = (h - 2.0 * pad).max(1.0);
574
575 let size = if da.size > 0.0 {
578 da.size
579 } else {
580 let mut s = 12.0_f64;
581 while s > 4.0 {
582 let lines = wrap_lines(text, base_font, s, usable);
583 if lines.len() as f64 * s * 1.15 <= usable_h {
584 break;
585 }
586 s -= 1.0;
587 }
588 s
589 };
590 let leading = size * 1.15;
591 let lines = wrap_lines(text, base_font, size, usable);
592
593 emit_font(body, da, font_res_name, size);
594 let mut y = h - pad - size * 0.72;
596 for line in &lines {
597 if y < -size {
598 break; }
600 let lw = measure(line, base_font, size);
601 let x = match quadding {
602 1 => (w - lw) / 2.0, 2 => w - pad - lw, _ => pad, };
606 emit_line(body, x, y, line);
607 y -= leading;
608 }
609}
610
611#[allow(clippy::too_many_arguments)]
612fn comb_layout(
613 body: &mut Vec<u8>,
614 text: &str,
615 da: &DaInfo,
616 base_font: &str,
617 font_res_name: &str,
618 w: f64,
619 h: f64,
620 field: &FormField,
621) {
622 let n = field.max_len.unwrap_or(1).max(1) as f64;
623 let cell = w / n;
624 let size = if da.size > 0.0 {
625 da.size
626 } else {
627 ((h - 4.0).min(cell)).clamp(2.0, 12.0)
628 };
629 let y = vertical_baseline(h, size);
630
631 emit_font(body, da, font_res_name, size);
632 for (i, ch) in text.chars().take(n as usize).enumerate() {
633 let s = ch.to_string();
634 let cw = measure(&s, base_font, size);
635 let x = cell * i as f64 + (cell - cw) / 2.0;
636 emit_line(body, x, y, &s);
637 }
638}
639
640fn vertical_baseline(h: f64, size: f64) -> f64 {
643 (h / 2.0 - 0.255 * size).max(0.0)
646}
647
648fn emit_font(body: &mut Vec<u8>, da: &DaInfo, font_res_name: &str, size: f64) {
650 push_str(body, &format!("{}\n", da.color_ops));
651 push_str(body, &format!("/{font_res_name} {} Tf\n", fmt_num(size)));
652}
653
654fn emit_line(body: &mut Vec<u8>, x: f64, y: f64, text: &str) {
656 push_str(body, &format!("1 0 0 1 {} {} Tm\n", fmt_num(x), fmt_num(y)));
657 body.push(b'(');
658 escape_text(text, body);
659 push_str(body, ") Tj\n");
660}
661
662fn fmt_num(v: f64) -> String {
666 if v.is_finite() {
667 format!("{v:.2}")
668 } else {
669 "0".to_string()
670 }
671}
672
673fn is_safe_resource_name(name: &str) -> bool {
676 !name.is_empty()
677 && name.len() <= 64
678 && name
679 .chars()
680 .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '+' | '.'))
681}
682
683fn wrap_lines(text: &str, base_font: &str, size: f64, usable: f64) -> Vec<String> {
685 const MAX_LINES: usize = 1000;
688 let mut out = Vec::new();
689 for paragraph in text.split('\n') {
690 if out.len() > MAX_LINES {
691 break;
692 }
693 if paragraph.is_empty() {
694 out.push(String::new());
695 continue;
696 }
697 let mut line = String::new();
698 for word in paragraph.split(' ') {
699 let candidate = if line.is_empty() {
700 word.to_string()
701 } else {
702 format!("{line} {word}")
703 };
704 if measure(&candidate, base_font, size) <= usable || line.is_empty() {
705 line = candidate;
706 } else {
707 out.push(std::mem::take(&mut line));
708 line = word.to_string();
709 }
710 }
711 out.push(line);
712 }
713 out
714}
715
716fn measure(text: &str, base_font: &str, size: f64) -> f64 {
719 let metrics = zpdf_font::standard_fonts::lookup(base_font);
720 let mut total = 0.0;
721 for ch in text.chars() {
722 let w1000 = match metrics {
723 Some(m) => {
724 let code = unicode_to_winansi(ch).unwrap_or(b'?') as usize;
725 m.widths[code] as f64
726 }
727 None => 500.0,
728 };
729 let w1000 = if w1000 == 0.0 { 500.0 } else { w1000 };
730 total += w1000 / 1000.0 * size;
731 }
732 total
733}
734
735struct DaInfo {
737 font: Option<String>,
738 size: f64,
739 color_ops: String,
741}
742
743fn parse_da(da: &str) -> DaInfo {
746 let mut font = None;
747 let mut size: f64 = 0.0;
748 let mut color = String::new();
749 let mut operands: Vec<&str> = Vec::new();
750
751 for tok in da.split_whitespace() {
752 match tok {
753 "Tf" => {
754 if operands.len() >= 2 {
755 if let Some(name) = operands[operands.len() - 2].strip_prefix('/') {
756 font = Some(name.to_string());
757 }
758 size = operands[operands.len() - 1].parse().unwrap_or(0.0);
759 }
760 operands.clear();
761 }
762 "g" if !operands.is_empty() => {
763 if let Some(c) = da_color(&operands, 1, "g") {
764 color = c;
765 }
766 operands.clear();
767 }
768 "rg" if operands.len() >= 3 => {
769 if let Some(c) = da_color(&operands, 3, "rg") {
770 color = c;
771 }
772 operands.clear();
773 }
774 "k" if operands.len() >= 4 => {
775 if let Some(c) = da_color(&operands, 4, "k") {
776 color = c;
777 }
778 operands.clear();
779 }
780 other => operands.push(other),
781 }
782 }
783
784 const MAX_FONT_SIZE: f64 = 1000.0;
787 DaInfo {
788 font,
789 size: if size.is_finite() && size >= 0.0 {
790 size.min(MAX_FONT_SIZE)
791 } else {
792 0.0
793 },
794 color_ops: if color.is_empty() {
795 "0 g".to_string()
796 } else {
797 color
798 },
799 }
800}
801
802fn da_color(operands: &[&str], n: usize, op: &str) -> Option<String> {
807 let vals: Option<Vec<f64>> = operands[operands.len() - n..]
808 .iter()
809 .map(|t| {
810 t.parse::<f64>()
811 .ok()
812 .filter(|v| v.is_finite())
813 .map(|v| v.clamp(0.0, 1.0))
814 })
815 .collect();
816 let parts: Vec<String> = vals?.iter().map(|v| format!("{v:.4}")).collect();
817 Some(format!("{} {op}", parts.join(" ")))
818}
819
820fn resolve_base_font(dr_fonts: Option<&PdfDict>, res_name: &str) -> String {
824 if let Some(dr) = dr_fonts {
825 if let Some(PdfObject::Dict(fd)) = dr.get(res_name) {
826 if let Ok(bf) = fd.get_name("BaseFont") {
827 return strip_subset_prefix(bf).to_string();
828 }
829 }
830 }
831 acrobat_standard_name(res_name).to_string()
832}
833
834fn acrobat_standard_name(res_name: &str) -> &str {
836 match res_name {
837 "Helv" => "Helvetica",
838 "HeBO" | "HeBo" => "Helvetica-Bold",
839 "HeOb" => "Helvetica-Oblique",
840 "Cour" => "Courier",
841 "CoBO" | "CoBo" => "Courier-Bold",
842 "TiRo" => "Times-Roman",
843 "TiBo" => "Times-Bold",
844 "TiIt" => "Times-Italic",
845 "Symb" => "Symbol",
846 "ZaDb" => "ZapfDingbats",
847 other => other,
848 }
849}
850
851fn strip_subset_prefix(name: &str) -> &str {
852 name.rsplit('+').next().unwrap_or(name)
854}
855
856fn build_resources(dr_fonts: Option<&PdfDict>, font_res_name: &str) -> PdfDict {
859 let font_entry = dr_fonts
860 .and_then(|dr| dr.get(font_res_name).cloned())
861 .unwrap_or_else(|| PdfObject::Dict(helvetica_font_dict()));
862
863 let mut fonts = PdfDict::new();
864 fonts.insert(PdfName::new(font_res_name), font_entry);
865 let mut res = PdfDict::new();
866 res.insert(PdfName::new("Font"), PdfObject::Dict(fonts));
867 res
868}
869
870fn helvetica_font_dict() -> PdfDict {
871 let mut d = PdfDict::new();
872 d.insert(PdfName::new("Type"), PdfObject::Name(PdfName::new("Font")));
873 d.insert(
874 PdfName::new("Subtype"),
875 PdfObject::Name(PdfName::new("Type1")),
876 );
877 d.insert(
878 PdfName::new("BaseFont"),
879 PdfObject::Name(PdfName::new("Helvetica")),
880 );
881 d.insert(
882 PdfName::new("Encoding"),
883 PdfObject::Name(PdfName::new("WinAnsiEncoding")),
884 );
885 d
886}
887
888fn escape_text(s: &str, out: &mut Vec<u8>) {
892 for ch in s.chars() {
893 let b = unicode_to_winansi(ch).unwrap_or(b'?');
894 match b {
895 b'\\' => out.extend_from_slice(b"\\\\"),
896 b'(' => out.extend_from_slice(b"\\("),
897 b')' => out.extend_from_slice(b"\\)"),
898 b'\r' => out.extend_from_slice(b"\\r"),
899 _ => out.push(b),
900 }
901 }
902}
903
904fn unicode_to_winansi(ch: char) -> Option<u8> {
909 let cp = ch as u32;
910 match cp {
911 0x20..=0x7E | 0xA0..=0xFF => Some(cp as u8),
912 0x20AC => Some(0x80),
913 0x201A => Some(0x82),
914 0x0192 => Some(0x83),
915 0x201E => Some(0x84),
916 0x2026 => Some(0x85),
917 0x2020 => Some(0x86),
918 0x2021 => Some(0x87),
919 0x02C6 => Some(0x88),
920 0x2030 => Some(0x89),
921 0x0160 => Some(0x8A),
922 0x2039 => Some(0x8B),
923 0x0152 => Some(0x8C),
924 0x017D => Some(0x8E),
925 0x2018 => Some(0x91),
926 0x2019 => Some(0x92),
927 0x201C => Some(0x93),
928 0x201D => Some(0x94),
929 0x2022 => Some(0x95),
930 0x2013 => Some(0x96),
931 0x2014 => Some(0x97),
932 0x02DC => Some(0x98),
933 0x2122 => Some(0x99),
934 0x0161 => Some(0x9A),
935 0x203A => Some(0x9B),
936 0x0153 => Some(0x9C),
937 0x017E => Some(0x9E),
938 0x0178 => Some(0x9F),
939 _ => None,
940 }
941}
942
943fn push_str(out: &mut Vec<u8>, s: &str) {
944 out.extend_from_slice(s.as_bytes());
945}
946
947fn one_line(s: &str) -> String {
949 s.chars()
950 .map(|c| {
951 if c == '\n' || c == '\r' || c == '\t' {
952 ' '
953 } else {
954 c
955 }
956 })
957 .collect()
958}
959
960fn deref(file: &PdfFile, obj: &PdfObject) -> PdfObject {
966 match obj {
967 PdfObject::Ref(r) => file.resolve(*r).unwrap_or(PdfObject::Null),
968 other => other.clone(),
969 }
970}
971
972fn deref_opt(file: &PdfFile, obj: Option<&PdfObject>) -> Option<PdfObject> {
973 obj.map(|o| deref(file, o))
974}
975
976fn deref_array(file: &PdfFile, obj: Option<&PdfObject>) -> Option<Vec<PdfObject>> {
977 match deref(file, obj?) {
978 PdfObject::Array(a) => Some(a),
979 _ => None,
980 }
981}
982
983fn text_string(file: &PdfFile, obj: &PdfObject) -> Option<String> {
985 match deref(file, obj) {
986 PdfObject::String(s) => Some(pdf_string_to_unicode(s.as_bytes())),
987 _ => None,
988 }
989}
990
991fn field_value(file: &PdfFile, obj: Option<&PdfObject>) -> Option<FieldValue> {
992 match deref(file, obj?) {
993 PdfObject::String(s) => Some(FieldValue::Text(pdf_string_to_unicode(s.as_bytes()))),
994 PdfObject::Name(n) => Some(FieldValue::Name(n.0)),
995 PdfObject::Array(a) => {
996 let items: Vec<String> = a
997 .iter()
998 .filter_map(|o| match o {
999 PdfObject::String(s) => Some(pdf_string_to_unicode(s.as_bytes())),
1000 _ => None,
1001 })
1002 .collect();
1003 (!items.is_empty()).then_some(FieldValue::List(items))
1004 }
1005 _ => None,
1006 }
1007}
1008
1009fn int_value(file: &PdfFile, obj: Option<&PdfObject>) -> Option<i64> {
1010 match deref(file, obj?) {
1011 PdfObject::Integer(n) => Some(n),
1012 PdfObject::Real(r) => Some(r as i64),
1013 _ => None,
1014 }
1015}
1016
1017fn pdf_string_to_unicode(bytes: &[u8]) -> String {
1020 if bytes.len() >= 2 && bytes[0] == 0xFE && bytes[1] == 0xFF {
1021 let units: Vec<u16> = bytes[2..]
1022 .chunks_exact(2)
1023 .map(|c| u16::from_be_bytes([c[0], c[1]]))
1024 .collect();
1025 String::from_utf16_lossy(&units)
1026 } else {
1027 bytes.iter().map(|&b| b as char).collect()
1028 }
1029}
1030
1031#[cfg(test)]
1032mod tests {
1033 use super::*;
1034 use crate::test_util::build_pdf;
1035 use crate::PdfDocument;
1036
1037 #[test]
1038 fn field_tree_names_inheritance_and_widgets() {
1039 let doc = PdfDocument::open(build_pdf(&[
1040 "<< /Type /Catalog /Pages 2 0 R /AcroForm 4 0 R >>",
1041 "<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
1042 "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] >>",
1043 "<< /Fields [5 0 R] /DA (/Helv 0 Tf 0 g) /DR << /Font << /Helv 8 0 R >> >> >>",
1044 "<< /T (address) /FT /Tx /Kids [6 0 R 7 0 R] >>",
1046 "<< /T (street) /V (Main St) >>",
1047 "<< /T (city) /V (Springfield) /Q 1 >>",
1048 "<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>",
1049 ]))
1050 .expect("open");
1051
1052 let form = doc.acro_form().expect("acroform");
1053 assert!(!form.need_appearances);
1054 assert!(form.dr_fonts.is_some());
1055 assert_eq!(form.fields.len(), 2);
1056
1057 let street = &form.fields[0];
1058 assert_eq!(street.name, "address.street");
1059 assert_eq!(street.kind, FieldKind::Text); assert_eq!(street.value, Some(FieldValue::Text("Main St".into())));
1061 assert_eq!(street.default_appearance.as_deref(), Some("/Helv 0 Tf 0 g")); assert_eq!(street.quadding, 0);
1063 assert_eq!(street.widgets, vec![ObjectId(6, 0)]);
1065 assert_eq!(
1066 form.field_for_widget(ObjectId(6, 0))
1067 .map(|f| f.name.as_str()),
1068 Some("address.street")
1069 );
1070
1071 let city = &form.fields[1];
1072 assert_eq!(city.name, "address.city");
1073 assert_eq!(city.quadding, 1); }
1075
1076 #[test]
1077 fn single_widget_field_and_button_value() {
1078 let doc = PdfDocument::open(build_pdf(&[
1079 "<< /Type /Catalog /Pages 2 0 R /AcroForm 4 0 R >>",
1080 "<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
1081 "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] /Annots [5 0 R] >>",
1082 "<< /Fields [5 0 R] /NeedAppearances true >>",
1083 "<< /T (agree) /FT /Btn /V /Yes /AS /Yes /Subtype /Widget /Rect [10 10 30 30] >>",
1085 ]))
1086 .expect("open");
1087
1088 let form = doc.acro_form().expect("acroform");
1089 assert!(form.need_appearances);
1090 assert_eq!(form.fields.len(), 1);
1091 let f = &form.fields[0];
1092 assert_eq!(f.name, "agree");
1093 assert_eq!(f.kind, FieldKind::Button);
1094 assert_eq!(f.value, Some(FieldValue::Name("Yes".into())));
1095 assert!(generate_widget_appearance(f, Rect::new(10.0, 10.0, 30.0, 30.0), None).is_none());
1097 }
1098
1099 #[test]
1100 fn no_acroform_returns_none() {
1101 let doc = PdfDocument::open(build_pdf(&[
1102 "<< /Type /Catalog /Pages 2 0 R >>",
1103 "<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
1104 "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] >>",
1105 ]))
1106 .expect("open");
1107 assert!(doc.acro_form().is_none());
1108 }
1109
1110 #[test]
1111 fn da_parsing_extracts_font_size_color() {
1112 let da = parse_da("0 0 1 rg /Helv 12 Tf");
1114 assert_eq!(da.font.as_deref(), Some("Helv"));
1115 assert_eq!(da.size, 12.0);
1116 assert_eq!(da.color_ops, "0.0000 0.0000 1.0000 rg");
1117
1118 let da = parse_da("/Cour 0 Tf 0.2 g");
1119 assert_eq!(da.font.as_deref(), Some("Cour"));
1120 assert_eq!(da.size, 0.0);
1121 assert_eq!(da.color_ops, "0.2000 g");
1122
1123 let da = parse_da("/Helv 10 Tf");
1125 assert_eq!(da.color_ops, "0 g");
1126
1127 let da = parse_da("/Helv 1e308 Tf");
1129 assert_eq!(da.size, 1000.0);
1130 let da = parse_da("1)Tj/Evil 0 0 rg /Helv 10 Tf");
1131 assert_eq!(da.color_ops, "0 g"); }
1133
1134 #[test]
1135 fn winansi_punctuation_round_trips() {
1136 assert_eq!(unicode_to_winansi('\u{2019}'), Some(0x92));
1138 assert_eq!(unicode_to_winansi('\u{2014}'), Some(0x97));
1139 assert_eq!(unicode_to_winansi('\u{20AC}'), Some(0x80));
1140 assert_eq!(unicode_to_winansi('A'), Some(0x41));
1141 assert_eq!(unicode_to_winansi('\u{00E9}'), Some(0xE9)); assert_eq!(unicode_to_winansi('\u{4E2D}'), None); }
1144
1145 #[test]
1146 fn non_finite_numbers_never_reach_output() {
1147 assert_eq!(fmt_num(f64::INFINITY), "0");
1148 assert_eq!(fmt_num(f64::NAN), "0");
1149 assert_eq!(fmt_num(-1.5), "-1.50");
1150 }
1151
1152 #[test]
1153 fn utf16be_value_is_decoded() {
1154 let bytes = [0xFE, 0xFF, 0x00, b'H', 0x00, b'i'];
1156 assert_eq!(pdf_string_to_unicode(&bytes), "Hi");
1157 }
1158
1159 #[test]
1160 fn escape_handles_parens_and_backslash() {
1161 let mut out = Vec::new();
1162 escape_text("a(b)\\c", &mut out);
1163 assert_eq!(out, b"a\\(b\\)\\\\c");
1164 }
1165
1166 #[test]
1167 fn standard_name_mapping() {
1168 assert_eq!(acrobat_standard_name("Helv"), "Helvetica");
1169 assert_eq!(acrobat_standard_name("ZaDb"), "ZapfDingbats");
1170 assert_eq!(acrobat_standard_name("F1"), "F1");
1171 }
1172
1173 #[test]
1174 fn choice_value_maps_export_to_display_label() {
1175 let f = FormField {
1176 name: "month".into(),
1177 kind: FieldKind::Choice,
1178 flags: 0,
1179 value: Some(FieldValue::Text("01".into())),
1180 default_appearance: None,
1181 quadding: 0,
1182 max_len: None,
1183 options: vec![
1184 ("01".into(), "January".into()),
1185 ("02".into(), "February".into()),
1186 ],
1187 widgets: vec![],
1188 };
1189 assert_eq!(f.display_value().as_deref(), Some("January"));
1191 let f2 = FormField {
1193 value: Some(FieldValue::Text("99".into())),
1194 ..f
1195 };
1196 assert_eq!(f2.display_value().as_deref(), Some("99"));
1197 }
1198
1199 #[test]
1200 fn comb_is_suppressed_when_multiline() {
1201 let base = FormField {
1202 name: "x".into(),
1203 kind: FieldKind::Text,
1204 flags: FF_COMB | FF_MULTILINE,
1205 value: Some(FieldValue::Text("AB".into())),
1206 default_appearance: None,
1207 quadding: 0,
1208 max_len: Some(4),
1209 options: vec![],
1210 widgets: vec![],
1211 };
1212 assert!(!base.is_comb());
1214 assert!(base.is_multiline());
1215 }
1216
1217 #[test]
1218 fn comb_field_detection() {
1219 let f = FormField {
1220 name: "x".into(),
1221 kind: FieldKind::Text,
1222 flags: FF_COMB,
1223 value: Some(FieldValue::Text("AB".into())),
1224 default_appearance: None,
1225 quadding: 0,
1226 max_len: Some(4),
1227 options: vec![],
1228 widgets: vec![],
1229 };
1230 assert!(f.is_comb());
1231 let f2 = FormField {
1233 max_len: None,
1234 ..f.clone()
1235 };
1236 assert!(!f2.is_comb());
1237 }
1238
1239 #[test]
1240 fn generated_appearance_draws_value() {
1241 let f = FormField {
1242 name: "name".into(),
1243 kind: FieldKind::Text,
1244 flags: 0,
1245 value: Some(FieldValue::Text("Test".into())),
1246 default_appearance: Some("/Helv 12 Tf 0 g".into()),
1247 quadding: 0,
1248 max_len: None,
1249 options: vec![],
1250 widgets: vec![],
1251 };
1252 let ap = generate_widget_appearance(&f, Rect::new(0.0, 0.0, 200.0, 40.0), None)
1253 .expect("appearance");
1254 assert_eq!(ap.bbox, Rect::new(0.0, 0.0, 200.0, 40.0));
1255 let s = String::from_utf8_lossy(&ap.content);
1256 assert!(s.contains("/Tx BMC"));
1257 assert!(s.contains("Tf"));
1258 assert!(s.contains("(Test) Tj"));
1259 assert!(ap.resources.get("Font").is_some());
1261 }
1262
1263 #[test]
1264 fn empty_and_button_values_generate_nothing() {
1265 let base = FormField {
1266 name: "x".into(),
1267 kind: FieldKind::Text,
1268 flags: 0,
1269 value: Some(FieldValue::Text(String::new())),
1270 default_appearance: None,
1271 quadding: 0,
1272 max_len: None,
1273 options: vec![],
1274 widgets: vec![],
1275 };
1276 assert!(
1277 generate_widget_appearance(&base, Rect::new(0.0, 0.0, 100.0, 20.0), None).is_none()
1278 );
1279
1280 let button = FormField {
1281 kind: FieldKind::Button,
1282 value: Some(FieldValue::Name("Yes".into())),
1283 ..base.clone()
1284 };
1285 assert!(
1286 generate_widget_appearance(&button, Rect::new(0.0, 0.0, 100.0, 20.0), None).is_none()
1287 );
1288
1289 let password = FormField {
1290 flags: FF_PASSWORD,
1291 value: Some(FieldValue::Text("secret".into())),
1292 ..base
1293 };
1294 assert!(
1295 generate_widget_appearance(&password, Rect::new(0.0, 0.0, 100.0, 20.0), None).is_none()
1296 );
1297 }
1298}