Skip to main content

forme/pdf/
certify.rs

1//! # PDF Digital Certification
2//!
3//! Certifies PDF files using X.509 certificates with PKCS#7 detached signatures.
4//! Uses incremental update to append signature objects without modifying the
5//! original PDF content.
6//!
7//! ## Two-Pass Approach
8//!
9//! 1. **Pass 1**: Append signature dictionary (with placeholder `/Contents`),
10//!    signature field widget, and updated catalog via PDF incremental update.
11//! 2. **Pass 2**: Compute SHA-256 hash over the byte ranges excluding the
12//!    placeholder, sign with RSA, build PKCS#7 SignedData, and write the
13//!    DER-encoded signature into the placeholder.
14
15use crate::error::FormeError;
16use crate::model::CertificationConfig;
17
18use der::Encode;
19use pkcs8::{DecodePrivateKey, EncodePublicKey};
20use rsa::pkcs1v15::SigningKey;
21use rsa::RsaPrivateKey;
22use sha2::{Digest, Sha256};
23use signature::{SignatureEncoding, SignerMut};
24use x509_cert::Certificate;
25
26/// Size of the hex-encoded signature placeholder (4096 bytes = 8192 hex chars).
27/// This must be large enough to hold the DER-encoded PKCS#7 SignedData.
28const SIG_PLACEHOLDER_HEX_LEN: usize = 8192;
29
30/// Result of scanning an existing PDF for structural metadata.
31struct PdfScanResult {
32    /// Byte offset of the last `startxref` value.
33    startxref_offset: usize,
34    /// Value of `/Size` in the trailer (next available object ID).
35    size: usize,
36    /// Object number of `/Root` (the Catalog).
37    root_obj: usize,
38    /// Object number of the first `/Type /Page` object found.
39    first_page_obj: usize,
40}
41
42/// Certify PDF bytes with an X.509 certificate, producing a valid digitally certified PDF.
43///
44/// Works on any valid PDF — either freshly rendered or loaded from disk.
45/// Uses incremental update to preserve the original PDF bytes.
46pub fn certify_pdf(pdf_bytes: &[u8], config: &CertificationConfig) -> Result<Vec<u8>, FormeError> {
47    // Parse certificate and private key
48    let cert = parse_pem_certificate(&config.certificate_pem)?;
49    let private_key = parse_pem_private_key(&config.private_key_pem)?;
50
51    // Verify key matches certificate by comparing public key DER
52    let cert_pub_key_der = cert
53        .tbs_certificate
54        .subject_public_key_info
55        .to_der()
56        .map_err(|e| FormeError::RenderError(format!("Failed to encode cert public key: {e}")))?;
57    let key_pub_der = rsa::RsaPublicKey::from(&private_key)
58        .to_public_key_der()
59        .map_err(|e| FormeError::RenderError(format!("Failed to encode public key: {e}")))?;
60
61    if cert_pub_key_der != key_pub_der.as_bytes() {
62        return Err(FormeError::RenderError(
63            "Certificate and private key do not match".to_string(),
64        ));
65    }
66
67    // Scan existing PDF for metadata
68    let scan = scan_pdf_metadata(pdf_bytes)?;
69
70    // Encode certificate to DER for embedding in PKCS#7
71    let cert_der = cert
72        .to_der()
73        .map_err(|e| FormeError::RenderError(format!("Failed to DER-encode certificate: {e}")))?;
74
75    // Pass 1: Build incremental update with signature placeholder
76    let (mut output, placeholder_offset) =
77        build_incremental_update(pdf_bytes, &scan, config, &cert_der)?;
78
79    // The placeholder starts at `placeholder_offset` and is SIG_PLACEHOLDER_HEX_LEN chars
80    // (enclosed in angle brackets: `<hex...>`).
81    // ByteRange: [0, before_sig_hex, after_sig_hex, total_len]
82    let before_sig_hex = placeholder_offset; // offset of '<'
83    let after_sig_hex = placeholder_offset + 1 + SIG_PLACEHOLDER_HEX_LEN + 1; // after '>'
84    let total_len = output.len();
85
86    // Update the ByteRange value in the output
87    update_byte_range(&mut output, before_sig_hex, after_sig_hex, total_len)?;
88
89    // Pass 2: Concatenate the signed byte ranges and sign
90    let mut signed_data = Vec::with_capacity(before_sig_hex + (total_len - after_sig_hex));
91    signed_data.extend_from_slice(&output[0..before_sig_hex]);
92    signed_data.extend_from_slice(&output[after_sig_hex..total_len]);
93
94    // Sign with RSA PKCS#1 v1.5 (SigningKey hashes internally with SHA-256)
95    let mut signing_key = SigningKey::<Sha256>::new(private_key);
96    let sig_result: rsa::pkcs1v15::Signature = signing_key.sign(&signed_data);
97    let sig_bytes = sig_result.to_bytes();
98
99    // Compute the hash for PKCS#7 SignedData construction
100    let hash = Sha256::digest(&signed_data);
101
102    // Build PKCS#7 SignedData
103    let pkcs7_der = build_pkcs7_signed_data(&cert_der, &sig_bytes, &hash)?;
104
105    // Hex-encode and write into placeholder
106    let hex_sig = hex_encode(&pkcs7_der);
107    if hex_sig.len() > SIG_PLACEHOLDER_HEX_LEN {
108        return Err(FormeError::RenderError(format!(
109            "PKCS#7 signature ({} hex chars) exceeds placeholder size ({})",
110            hex_sig.len(),
111            SIG_PLACEHOLDER_HEX_LEN
112        )));
113    }
114
115    // Write hex signature into placeholder, pad with zeros
116    let sig_start = placeholder_offset + 1; // skip '<'
117    for (i, b) in hex_sig.bytes().enumerate() {
118        output[sig_start + i] = b;
119    }
120    // Remaining bytes are already '0' from the placeholder
121
122    Ok(output)
123}
124
125/// Parse a PEM-encoded X.509 certificate.
126fn parse_pem_certificate(pem: &str) -> Result<Certificate, FormeError> {
127    use der::DecodePem;
128    Certificate::from_pem(pem)
129        .map_err(|e| FormeError::RenderError(format!("Failed to parse PEM certificate: {e}")))
130}
131
132/// Parse a PEM-encoded RSA private key (PKCS#8 or PKCS#1 format).
133///
134/// Tries PKCS#8 first (`-----BEGIN PRIVATE KEY-----`), then falls back to
135/// PKCS#1 (`-----BEGIN RSA PRIVATE KEY-----`). Only RSA keys are supported.
136/// ECDSA, Ed25519, and other key types will produce a clear error message.
137fn parse_pem_private_key(pem: &str) -> Result<RsaPrivateKey, FormeError> {
138    use rsa::pkcs1::DecodeRsaPrivateKey;
139
140    // Try PKCS#8 first (most common format)
141    match RsaPrivateKey::from_pkcs8_pem(pem) {
142        Ok(key) => Ok(key),
143        Err(pkcs8_err) => {
144            // If it looks like a PKCS#1 key, try that format
145            if pem.contains("BEGIN RSA PRIVATE KEY") {
146                return RsaPrivateKey::from_pkcs1_pem(pem).map_err(|e| {
147                    FormeError::RenderError(format!(
148                        "Failed to parse PKCS#1 (RSA) private key: {e}"
149                    ))
150                });
151            }
152
153            let msg = pkcs8_err.to_string();
154            // Detect non-RSA key algorithms (ECDSA, Ed25519, etc.)
155            if msg.contains("algorithm") || msg.contains("OID") {
156                return Err(FormeError::RenderError(
157                    "Only RSA private keys are supported for PDF signing. \
158                     ECDSA, Ed25519, and other key types are not supported."
159                        .to_string(),
160                ));
161            }
162
163            Err(FormeError::RenderError(format!(
164                "Failed to parse PEM private key: {pkcs8_err}"
165            )))
166        }
167    }
168}
169
170/// Scan a PDF for structural metadata needed for incremental update.
171///
172/// Uses byte-level searching for `startxref` and trailer values to avoid
173/// offset corruption from `String::from_utf8_lossy` (which replaces invalid
174/// UTF-8 sequences with multi-byte U+FFFD, shifting string positions relative
175/// to byte positions).
176fn scan_pdf_metadata(pdf: &[u8]) -> Result<PdfScanResult, FormeError> {
177    // Find startxref by scanning raw bytes (last occurrence)
178    let startxref_pos = rfind_bytes(pdf, b"startxref")
179        .ok_or_else(|| FormeError::RenderError("No startxref found in PDF".to_string()))?;
180    let after_startxref = &pdf[startxref_pos + 9..];
181    let startxref_offset: usize = parse_number_from_bytes(after_startxref)
182        .ok_or_else(|| FormeError::RenderError("Cannot parse startxref value".to_string()))?;
183
184    // Find trailer by scanning raw bytes (last occurrence)
185    let trailer_pos = rfind_bytes(pdf, b"trailer")
186        .ok_or_else(|| FormeError::RenderError("No trailer found in PDF".to_string()))?;
187    let trailer_section = &pdf[trailer_pos..startxref_pos];
188
189    // Find /Size in trailer
190    let size = find_value_in_bytes(trailer_section, b"/Size")
191        .ok_or_else(|| FormeError::RenderError("No /Size found in trailer".to_string()))?;
192
193    // Find /Root reference
194    let root_obj = find_ref_in_bytes(trailer_section, b"/Root")
195        .ok_or_else(|| FormeError::RenderError("No /Root found in trailer".to_string()))?;
196
197    // Find first /Type /Page object (not /Pages) — use lossy string since we
198    // only need the object number, not byte offsets
199    let text = String::from_utf8_lossy(pdf);
200    let first_page_obj = find_first_page_obj(&text)
201        .ok_or_else(|| FormeError::RenderError("No /Type /Page found in PDF".to_string()))?;
202
203    Ok(PdfScanResult {
204        startxref_offset,
205        size,
206        root_obj,
207        first_page_obj,
208    })
209}
210
211/// Reverse-find a byte needle in a haystack.
212fn rfind_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
213    if needle.len() > haystack.len() {
214        return None;
215    }
216    for i in (0..=haystack.len() - needle.len()).rev() {
217        if haystack[i..i + needle.len()] == *needle {
218            return Some(i);
219        }
220    }
221    None
222}
223
224/// Parse a decimal number from the start of a byte slice, skipping leading whitespace.
225fn parse_number_from_bytes(bytes: &[u8]) -> Option<usize> {
226    let start = bytes.iter().position(|&b| b.is_ascii_digit())?;
227    let end = bytes[start..]
228        .iter()
229        .position(|b| !b.is_ascii_digit())
230        .map(|p| start + p)
231        .unwrap_or(bytes.len());
232    std::str::from_utf8(&bytes[start..end]).ok()?.parse().ok()
233}
234
235/// Find a numeric value after a key in raw bytes (e.g., "/Size 42").
236fn find_value_in_bytes(section: &[u8], key: &[u8]) -> Option<usize> {
237    let pos = find_bytes(section, key)?;
238    parse_number_from_bytes(&section[pos + key.len()..])
239}
240
241/// Find an object reference after a key in raw bytes (e.g., "/Root 1 0 R" → 1).
242fn find_ref_in_bytes(section: &[u8], key: &[u8]) -> Option<usize> {
243    let pos = find_bytes(section, key)?;
244    parse_number_from_bytes(&section[pos + key.len()..])
245}
246
247/// Find the object number of the first `/Type /Page` (not `/Type /Pages`).
248fn find_first_page_obj(text: &str) -> Option<usize> {
249    let mut search_from = 0;
250    while let Some(pos) = text[search_from..].find("/Type /Page") {
251        let abs_pos = search_from + pos;
252        let after = &text[abs_pos + 11..];
253        if after.starts_with('s') || after.starts_with('S') {
254            search_from = abs_pos + 11;
255            continue;
256        }
257
258        let before = &text[..abs_pos];
259        if let Some(obj_pos) = before.rfind(" 0 obj") {
260            let line_start = before[..obj_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
261            let obj_num_str = text[line_start..obj_pos].trim();
262            if let Ok(obj_num) = obj_num_str.parse::<usize>() {
263                return Some(obj_num);
264            }
265        }
266        search_from = abs_pos + 11;
267    }
268    None
269}
270
271/// Build the incremental update appended to the original PDF.
272/// Returns the complete PDF bytes and the byte offset of the signature placeholder.
273fn build_incremental_update(
274    original: &[u8],
275    scan: &PdfScanResult,
276    config: &CertificationConfig,
277    cert_der: &[u8],
278) -> Result<(Vec<u8>, usize), FormeError> {
279    let mut buf = Vec::from(original);
280
281    // Ensure original ends with newline
282    if !buf.ends_with(b"\n") {
283        buf.push(b'\n');
284    }
285
286    let next_id = scan.size;
287    let sig_dict_id = next_id;
288    let sig_field_id = next_id + 1;
289    let new_catalog_id = next_id + 2;
290    // If visible, allocate an extra object for the appearance stream XObject
291    let ap_xobj_id = if config.visible {
292        Some(next_id + 3)
293    } else {
294        None
295    };
296    let new_size = next_id + if config.visible { 4 } else { 3 };
297
298    // Record xref entries: (obj_id, byte_offset)
299    let mut xref_entries: Vec<(usize, usize)> = Vec::new();
300
301    // --- Signature Dictionary Object ---
302    xref_entries.push((sig_dict_id, buf.len()));
303    let date_str = format_pdf_date();
304
305    // Build the sig dict with a placeholder ByteRange and Contents
306    // ByteRange placeholder must be large enough — we use fixed-width formatting
307    let byte_range_placeholder = "/ByteRange [0 0000000000 0000000000 0000000000]";
308
309    let mut sig_dict = format!(
310        "{sig_dict_id} 0 obj\n<<\n/Type /Sig\n/Filter /Adobe.PPKLite\n/SubFilter /adbe.pkcs7.detached\n{byte_range_placeholder}\n/M ({date_str})\n"
311    );
312
313    if let Some(ref reason) = config.reason {
314        sig_dict.push_str(&format!("/Reason ({})\n", escape_pdf_string(reason)));
315    }
316    if let Some(ref location) = config.location {
317        sig_dict.push_str(&format!("/Location ({})\n", escape_pdf_string(location)));
318    }
319    if let Some(ref contact) = config.contact {
320        sig_dict.push_str(&format!("/ContactInfo ({})\n", escape_pdf_string(contact)));
321    }
322
323    // Cert as hex string
324    let cert_hex = hex_encode(cert_der);
325    sig_dict.push_str(&format!("/Cert <{cert_hex}>\n"));
326
327    sig_dict.push_str("/Contents <");
328    buf.extend_from_slice(sig_dict.as_bytes());
329
330    // Record position of the placeholder (including the '<')
331    let placeholder_offset = buf.len() - 1; // the '<' char
332
333    // Write placeholder zeros
334    buf.extend(std::iter::repeat_n(b'0', SIG_PLACEHOLDER_HEX_LEN));
335    buf.extend_from_slice(b">\n>>\nendobj\n");
336
337    // --- Appearance Stream XObject (visible signatures only) ---
338    if let Some(ap_id) = ap_xobj_id {
339        xref_entries.push((ap_id, buf.len()));
340
341        let w = config.width.unwrap_or(200.0);
342        let h = config.height.unwrap_or(50.0);
343
344        // Extract signer name from certificate CN
345        let signer_name =
346            extract_cn_from_cert_der(cert_der).unwrap_or_else(|| "Unknown".to_string());
347        let date_display = format_display_date();
348
349        // Build appearance stream content
350        let mut content = String::new();
351        let font_size = 9.0_f64;
352        let line_height = font_size + 3.0;
353        let margin = 4.0_f64;
354        let mut y_pos = h - margin - font_size;
355
356        // "Digitally signed by"
357        content.push_str(&format!(
358            "BT /Helv {font_size:.1} Tf {margin:.2} {y_pos:.2} Td (Digitally signed by) Tj ET\n"
359        ));
360        y_pos -= line_height;
361
362        // Signer name
363        content.push_str(&format!(
364            "BT /Helv {font_size:.1} Tf {margin:.2} {y_pos:.2} Td ({}) Tj ET\n",
365            escape_pdf_string(&signer_name)
366        ));
367        y_pos -= line_height;
368
369        // Date
370        content.push_str(&format!(
371            "BT /Helv {font_size:.1} Tf {margin:.2} {y_pos:.2} Td (Date: {date_display}) Tj ET\n"
372        ));
373        y_pos -= line_height;
374
375        // Reason (if present)
376        if let Some(ref reason) = config.reason {
377            content.push_str(&format!(
378                "BT /Helv {font_size:.1} Tf {margin:.2} {y_pos:.2} Td (Reason: {}) Tj ET\n",
379                escape_pdf_string(reason)
380            ));
381            let _ = y_pos; // suppress unused warning on last iteration
382        }
383        let _ = y_pos;
384
385        let content_bytes = content.as_bytes();
386        let ap_obj = format!(
387            "{ap_id} 0 obj\n<<\n/Type /XObject\n/Subtype /Form\n/BBox [0 0 {w:.2} {h:.2}]\n/Resources << /Font << /Helv << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> >> >>\n/Length {}\n>>\nstream\n",
388            content_bytes.len()
389        );
390        buf.extend_from_slice(ap_obj.as_bytes());
391        buf.extend_from_slice(content_bytes);
392        buf.extend_from_slice(b"\nendstream\nendobj\n");
393    }
394
395    // --- Signature Field Widget ---
396    xref_entries.push((sig_field_id, buf.len()));
397
398    let sig_name = next_signature_name(original);
399
400    let rect = if config.visible {
401        let x = config.x.unwrap_or(0.0);
402        let y = config.y.unwrap_or(0.0);
403        let w = config.width.unwrap_or(200.0);
404        let h = config.height.unwrap_or(50.0);
405        format!("[{x:.2} {y:.2} {:.2} {:.2}]", x + w, y + h)
406    } else {
407        "[0 0 0 0]".to_string()
408    };
409
410    let ap_entry = if let Some(ap_id) = ap_xobj_id {
411        format!("/AP << /N {ap_id} 0 R >>\n")
412    } else {
413        String::new()
414    };
415
416    let sig_field = format!(
417        "{sig_field_id} 0 obj\n<<\n/Type /Annot\n/Subtype /Widget\n/FT /Sig\n/T ({sig_name})\n/V {sig_dict_id} 0 R\n/Rect {rect}\n/P {page_ref} 0 R\n/F 132\n{ap_entry}>>\nendobj\n",
418        page_ref = scan.first_page_obj
419    );
420    buf.extend_from_slice(sig_field.as_bytes());
421
422    // --- Updated Catalog with AcroForm ---
423    xref_entries.push((new_catalog_id, buf.len()));
424
425    // Read existing catalog to preserve its entries (especially /Pages reference)
426    // We need to find the /Pages ref from the original catalog
427    let original_lossy = String::from_utf8_lossy(original);
428    let original_text: &str = &original_lossy;
429    let pages_ref = find_catalog_pages_ref(original_text, scan.root_obj).unwrap_or(2);
430
431    // Merge existing AcroForm fields (from <TextField>, <Checkbox>, etc.) with the new signature field
432    let existing_fields = find_existing_acroform_fields(original, scan.root_obj);
433    let all_fields = if existing_fields.is_empty() {
434        format!("{sig_field_id} 0 R")
435    } else {
436        let mut fields = existing_fields.join(" ");
437        fields.push(' ');
438        fields.push_str(&format!("{sig_field_id} 0 R"));
439        fields
440    };
441
442    // Preserve existing AcroForm metadata (DA, NeedAppearances) from the original PDF
443    let acroform_meta = find_existing_acroform_metadata(original, scan.root_obj);
444
445    let mut acroform_entries = format!("/Fields [{all_fields}] /SigFlags 3");
446    if acroform_meta.need_appearances {
447        acroform_entries.push_str(" /NeedAppearances true");
448    }
449    if let Some(ref da) = acroform_meta.da {
450        acroform_entries.push_str(&format!(" /DA ({})", escape_pdf_string(da)));
451    }
452    if config.visible {
453        acroform_entries.push_str(
454            " /DR << /Font << /Helv << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> >> >>",
455        );
456    }
457
458    let mut catalog = format!(
459        "{new_catalog_id} 0 obj\n<<\n/Type /Catalog\n/Pages {pages_ref} 0 R\n/AcroForm << {acroform_entries} >>\n"
460    );
461
462    // Preserve /Lang if present
463    if let Some(lang) = find_catalog_string(original_text, scan.root_obj, "/Lang") {
464        catalog.push_str(&format!("/Lang ({lang})\n"));
465    }
466
467    // Preserve /MarkInfo if present
468    if catalog_has_key(original_text, scan.root_obj, "/MarkInfo") {
469        catalog.push_str("/MarkInfo << /Marked true >>\n");
470    }
471
472    // Preserve /StructTreeRoot if present
473    if let Some(struct_ref) = find_catalog_ref(original_text, scan.root_obj, "/StructTreeRoot") {
474        catalog.push_str(&format!("/StructTreeRoot {struct_ref} 0 R\n"));
475    }
476
477    // Preserve /Metadata if present
478    if let Some(meta_ref) = find_catalog_ref(original_text, scan.root_obj, "/Metadata") {
479        catalog.push_str(&format!("/Metadata {meta_ref} 0 R\n"));
480    }
481
482    // Preserve /Names if present
483    if let Some(names_ref) = find_catalog_ref(original_text, scan.root_obj, "/Names") {
484        catalog.push_str(&format!("/Names {names_ref} 0 R\n"));
485    }
486
487    // Preserve /ViewerPreferences if present
488    if let Some(vp_ref) = find_catalog_ref(original_text, scan.root_obj, "/ViewerPreferences") {
489        catalog.push_str(&format!("/ViewerPreferences {vp_ref} 0 R\n"));
490    }
491
492    // Preserve /OutputIntents if present (for PDF/A)
493    if let Some(oi_content) =
494        find_catalog_array_content(original_text, scan.root_obj, "/OutputIntents")
495    {
496        catalog.push_str(&format!("/OutputIntents {oi_content}\n"));
497    }
498
499    catalog.push_str(">>\nendobj\n");
500    buf.extend_from_slice(catalog.as_bytes());
501
502    // --- Cross-Reference Table ---
503    let xref_offset = buf.len();
504    buf.extend_from_slice(b"xref\n");
505
506    // Write each entry as a separate subsection
507    // Sort entries by object ID for proper xref table
508    let mut sorted_entries = xref_entries.clone();
509    sorted_entries.sort_by_key(|(id, _)| *id);
510
511    // Group consecutive IDs into subsections
512    let mut i = 0;
513    while i < sorted_entries.len() {
514        let start_id = sorted_entries[i].0;
515        let mut count = 1;
516        while i + count < sorted_entries.len() && sorted_entries[i + count].0 == start_id + count {
517            count += 1;
518        }
519        buf.extend_from_slice(format!("{start_id} {count}\n").as_bytes());
520        for j in 0..count {
521            let offset = sorted_entries[i + j].1;
522            buf.extend_from_slice(format!("{offset:010} 00000 n \n").as_bytes());
523        }
524        i += count;
525    }
526
527    // --- Trailer ---
528    buf.extend_from_slice(
529        format!(
530            "trailer\n<<\n/Size {new_size}\n/Root {new_catalog_id} 0 R\n/Prev {prev}\n>>\nstartxref\n{xref_offset}\n%%EOF\n",
531            prev = scan.startxref_offset
532        )
533        .as_bytes(),
534    );
535
536    Ok((buf, placeholder_offset))
537}
538
539/// Update the ByteRange placeholder with actual values.
540fn update_byte_range(
541    buf: &mut [u8],
542    before_sig: usize,
543    after_sig: usize,
544    total_len: usize,
545) -> Result<(), FormeError> {
546    // Find the ByteRange placeholder in the buffer
547    let needle = b"/ByteRange [0 0000000000 0000000000 0000000000]";
548    let pos = find_bytes(buf, needle).ok_or_else(|| {
549        FormeError::RenderError("ByteRange placeholder not found in output".to_string())
550    })?;
551
552    // Format the actual byte range with same total width
553    let br_str = format!(
554        "/ByteRange [0 {:>10} {:>10} {:>10}]",
555        before_sig,
556        after_sig,
557        total_len - after_sig
558    );
559    let br_bytes = br_str.as_bytes();
560
561    // Verify lengths match
562    if br_bytes.len() != needle.len() {
563        return Err(FormeError::RenderError(format!(
564            "ByteRange replacement length mismatch: {} vs {}",
565            br_bytes.len(),
566            needle.len()
567        )));
568    }
569
570    buf[pos..pos + br_bytes.len()].copy_from_slice(br_bytes);
571    Ok(())
572}
573
574/// Find a byte sequence in a buffer.
575fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
576    haystack
577        .windows(needle.len())
578        .position(|window| window == needle)
579}
580
581/// Build a PKCS#7 SignedData structure (DER-encoded).
582///
583/// This is a minimal but valid CMS SignedData for PDF signatures:
584/// - version 1
585/// - digestAlgorithms: SHA-256
586/// - encapContentInfo: id-data (detached, no content)
587/// - certificates: the signing certificate
588/// - signerInfos: one signer with RSA signature
589fn build_pkcs7_signed_data(
590    cert_der: &[u8],
591    signature_bytes: &[u8],
592    _hash: &[u8],
593) -> Result<Vec<u8>, FormeError> {
594    // We build the DER manually since the `cms` crate API can be tricky.
595    // PKCS#7 SignedData structure:
596    //
597    // ContentInfo {
598    //   contentType: id-signedData (1.2.840.113549.1.7.2)
599    //   content: SignedData {
600    //     version: 1
601    //     digestAlgorithms: { sha-256 }
602    //     encapContentInfo: { id-data }  (detached)
603    //     certificates: [0] IMPLICIT { cert }
604    //     signerInfos: {
605    //       SignerInfo {
606    //         version: 1
607    //         issuerAndSerialNumber: { issuer, serial }
608    //         digestAlgorithm: sha-256
609    //         signatureAlgorithm: rsaEncryption
610    //         signature: <bytes>
611    //       }
612    //     }
613    //   }
614    // }
615
616    use der::Decode;
617    let cert = x509_cert::Certificate::from_der(cert_der)
618        .map_err(|e| FormeError::RenderError(format!("Failed to parse cert DER: {e}")))?;
619
620    let issuer_der = cert
621        .tbs_certificate
622        .issuer
623        .to_der()
624        .map_err(|e| FormeError::RenderError(format!("Failed to encode issuer: {e}")))?;
625    let serial_der = cert.tbs_certificate.serial_number.as_bytes();
626
627    // OIDs
628    let oid_signed_data: &[u8] = &[
629        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x02,
630    ]; // 1.2.840.113549.1.7.2
631    let oid_data: &[u8] = &[
632        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x07, 0x01,
633    ]; // 1.2.840.113549.1.7.1
634    let oid_sha256: &[u8] = &[
635        0x06, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
636    ]; // 2.16.840.1.101.3.4.2.1
637    let oid_rsa: &[u8] = &[
638        0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01,
639    ]; // 1.2.840.113549.1.1.1
640
641    // Build SignerInfo
642    let signer_info = {
643        let mut si = Vec::new();
644        // version INTEGER 1
645        si.extend_from_slice(&der_integer(1));
646        // issuerAndSerialNumber SEQUENCE { issuer, serial }
647        let mut ias = Vec::new();
648        ias.extend_from_slice(&issuer_der);
649        ias.extend_from_slice(&der_integer_bytes(serial_der));
650        si.extend_from_slice(&der_sequence(&ias));
651        // digestAlgorithm AlgorithmIdentifier { sha-256, NULL }
652        let mut da = Vec::new();
653        da.extend_from_slice(oid_sha256);
654        da.extend_from_slice(&[0x05, 0x00]); // NULL
655        si.extend_from_slice(&der_sequence(&da));
656        // signatureAlgorithm AlgorithmIdentifier { rsaEncryption, NULL }
657        let mut sa = Vec::new();
658        sa.extend_from_slice(oid_rsa);
659        sa.extend_from_slice(&[0x05, 0x00]); // NULL
660        si.extend_from_slice(&der_sequence(&sa));
661        // signature OCTET STRING
662        si.extend_from_slice(&der_octet_string(signature_bytes));
663
664        der_sequence(&si)
665    };
666
667    // Build SignedData
668    let signed_data = {
669        let mut sd = Vec::new();
670        // version INTEGER 1
671        sd.extend_from_slice(&der_integer(1));
672        // digestAlgorithms SET OF { AlgorithmIdentifier }
673        let mut da_set_content = Vec::new();
674        let mut alg_id = Vec::new();
675        alg_id.extend_from_slice(oid_sha256);
676        alg_id.extend_from_slice(&[0x05, 0x00]);
677        da_set_content.extend_from_slice(&der_sequence(&alg_id));
678        sd.extend_from_slice(&der_set(&da_set_content));
679        // encapContentInfo SEQUENCE { id-data } (detached — no content)
680        let mut eci = Vec::new();
681        eci.extend_from_slice(oid_data);
682        sd.extend_from_slice(&der_sequence(&eci));
683        // certificates [0] IMPLICIT SET OF Certificate
684        sd.extend_from_slice(&der_context_constructed(0, cert_der));
685        // signerInfos SET OF SignerInfo
686        let mut si_set = Vec::new();
687        si_set.extend_from_slice(&signer_info);
688        sd.extend_from_slice(&der_set(&si_set));
689
690        der_sequence(&sd)
691    };
692
693    // Build ContentInfo
694    let content_info = {
695        let mut ci = Vec::new();
696        ci.extend_from_slice(oid_signed_data);
697        // [0] EXPLICIT SignedData
698        ci.extend_from_slice(&der_context_constructed(0, &signed_data));
699        der_sequence(&ci)
700    };
701
702    Ok(content_info)
703}
704
705// --- DER encoding helpers ---
706
707fn der_integer(value: i64) -> Vec<u8> {
708    if (0..=127).contains(&value) {
709        vec![0x02, 0x01, value as u8]
710    } else {
711        let bytes = value.to_be_bytes();
712        // Find first non-zero (or non-0xFF for negatives) byte
713        let start = bytes
714            .iter()
715            .position(|&b| if value >= 0 { b != 0 } else { b != 0xFF })
716            .unwrap_or(bytes.len() - 1);
717        let significant = &bytes[start..];
718        // Add leading zero if high bit is set on positive number
719        if value >= 0 && significant[0] & 0x80 != 0 {
720            let mut result = vec![0x02];
721            result.extend_from_slice(&der_length(significant.len() + 1));
722            result.push(0x00);
723            result.extend_from_slice(significant);
724            result
725        } else {
726            let mut result = vec![0x02];
727            result.extend_from_slice(&der_length(significant.len()));
728            result.extend_from_slice(significant);
729            result
730        }
731    }
732}
733
734fn der_integer_bytes(bytes: &[u8]) -> Vec<u8> {
735    // INTEGER from raw bytes (for serial numbers)
736    let mut result = vec![0x02];
737    // If high bit is set, prepend a zero byte
738    if !bytes.is_empty() && bytes[0] & 0x80 != 0 {
739        result.extend_from_slice(&der_length(bytes.len() + 1));
740        result.push(0x00);
741    } else {
742        result.extend_from_slice(&der_length(bytes.len()));
743    }
744    result.extend_from_slice(bytes);
745    result
746}
747
748fn der_octet_string(data: &[u8]) -> Vec<u8> {
749    let mut result = vec![0x04];
750    result.extend_from_slice(&der_length(data.len()));
751    result.extend_from_slice(data);
752    result
753}
754
755fn der_sequence(content: &[u8]) -> Vec<u8> {
756    let mut result = vec![0x30];
757    result.extend_from_slice(&der_length(content.len()));
758    result.extend_from_slice(content);
759    result
760}
761
762fn der_set(content: &[u8]) -> Vec<u8> {
763    let mut result = vec![0x31];
764    result.extend_from_slice(&der_length(content.len()));
765    result.extend_from_slice(content);
766    result
767}
768
769fn der_context_constructed(tag: u8, content: &[u8]) -> Vec<u8> {
770    let mut result = vec![0xA0 | tag];
771    result.extend_from_slice(&der_length(content.len()));
772    result.extend_from_slice(content);
773    result
774}
775
776fn der_length(len: usize) -> Vec<u8> {
777    if len < 0x80 {
778        vec![len as u8]
779    } else if len < 0x100 {
780        vec![0x81, len as u8]
781    } else if len < 0x10000 {
782        vec![0x82, (len >> 8) as u8, len as u8]
783    } else if len < 0x1000000 {
784        vec![0x83, (len >> 16) as u8, (len >> 8) as u8, len as u8]
785    } else {
786        vec![
787            0x84,
788            (len >> 24) as u8,
789            (len >> 16) as u8,
790            (len >> 8) as u8,
791            len as u8,
792        ]
793    }
794}
795
796/// Hex-encode bytes to uppercase hex string.
797fn hex_encode(data: &[u8]) -> String {
798    data.iter().map(|b| format!("{b:02X}")).collect()
799}
800
801/// Escape special characters in a PDF string.
802fn escape_pdf_string(s: &str) -> String {
803    let mut out = String::with_capacity(s.len());
804    for c in s.chars() {
805        match c {
806            '(' => out.push_str("\\("),
807            ')' => out.push_str("\\)"),
808            '\\' => out.push_str("\\\\"),
809            _ => out.push(c),
810        }
811    }
812    out
813}
814
815/// Get current Unix timestamp in seconds, portable across native and WASM.
816#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
817pub(super) fn current_timestamp_secs() -> u64 {
818    (js_sys::Date::now() / 1000.0) as u64
819}
820
821#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
822pub(super) fn current_timestamp_secs() -> u64 {
823    std::time::SystemTime::now()
824        .duration_since(std::time::UNIX_EPOCH)
825        .unwrap_or_default()
826        .as_secs()
827}
828
829/// Format current time as PDF date string: D:YYYYMMDDHHmmss+00'00'
830pub(super) fn format_pdf_date() -> String {
831    let now = current_timestamp_secs();
832
833    // Convert epoch seconds to date components (UTC)
834    let days = now / 86400;
835    let time_of_day = now % 86400;
836    let hours = time_of_day / 3600;
837    let minutes = (time_of_day % 3600) / 60;
838    let seconds = time_of_day % 60;
839
840    // Days since 1970-01-01 to year/month/day
841    let (year, month, day) = epoch_days_to_ymd(days);
842
843    format!("D:{year:04}{month:02}{day:02}{hours:02}{minutes:02}{seconds:02}+00'00'")
844}
845
846/// Convert days since 1970-01-01 to (year, month, day).
847pub(super) fn epoch_days_to_ymd(days: u64) -> (u64, u64, u64) {
848    // Civil calendar algorithm from Howard Hinnant
849    let z = days + 719468;
850    let era = z / 146097;
851    let doe = z - era * 146097;
852    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
853    let y = yoe + era * 400;
854    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
855    let mp = (5 * doy + 2) / 153;
856    let d = doy - (153 * mp + 2) / 5 + 1;
857    let m = if mp < 10 { mp + 3 } else { mp - 9 };
858    let y = if m <= 2 { y + 1 } else { y };
859    (y, m, d)
860}
861
862/// Format current time as a human-readable display date: YYYY-MM-DD HH:MM UTC
863fn format_display_date() -> String {
864    let now = current_timestamp_secs();
865
866    let days = now / 86400;
867    let time_of_day = now % 86400;
868    let hours = time_of_day / 3600;
869    let minutes = (time_of_day % 3600) / 60;
870    let (year, month, day) = epoch_days_to_ymd(days);
871
872    format!("{year:04}-{month:02}-{day:02} {hours:02}:{minutes:02} UTC")
873}
874
875/// Parse a DER tag-length-value header, returning (tag, content_length, header_size).
876/// Handles both single-byte and multi-byte DER length encoding.
877fn parse_der_tag_length(bytes: &[u8]) -> Option<(u8, usize, usize)> {
878    if bytes.len() < 2 {
879        return None;
880    }
881    let tag = bytes[0];
882    let first = bytes[1];
883    if first < 0x80 {
884        // Short form: length is a single byte
885        Some((tag, first as usize, 2))
886    } else {
887        // Long form: first byte = 0x80 | num_length_bytes
888        let num_bytes = (first & 0x7F) as usize;
889        if num_bytes == 0 || num_bytes > 4 || bytes.len() < 2 + num_bytes {
890            return None;
891        }
892        let mut len: usize = 0;
893        for i in 0..num_bytes {
894            len = (len << 8) | (bytes[2 + i] as usize);
895        }
896        Some((tag, len, 2 + num_bytes))
897    }
898}
899
900/// Extract the Common Name (CN) from a DER-encoded X.509 certificate's Subject.
901fn extract_cn_from_cert_der(cert_der: &[u8]) -> Option<String> {
902    use der::Decode;
903    let cert = x509_cert::Certificate::from_der(cert_der).ok()?;
904
905    // OID for CommonName: 2.5.4.3
906    let cn_oid = const_oid::ObjectIdentifier::new_unwrap("2.5.4.3");
907
908    // Walk the RDN sequence looking for CN
909    for rdn in cert.tbs_certificate.subject.0.iter() {
910        for atv in rdn.0.iter() {
911            if atv.oid == cn_oid {
912                // The value is an ANY — try to extract as UTF8String or PrintableString
913                let value_bytes = atv.value.to_der().ok()?;
914                let (tag, len, hdr) = parse_der_tag_length(&value_bytes)?;
915                if value_bytes.len() >= hdr + len {
916                    let s = std::str::from_utf8(&value_bytes[hdr..hdr + len]).ok()?;
917                    // Filter for UTF8String (0x0C), PrintableString (0x13), IA5String (0x16)
918                    if tag == 0x0C || tag == 0x13 || tag == 0x16 {
919                        return Some(s.to_string());
920                    }
921                }
922            }
923        }
924    }
925    None
926}
927
928// --- Catalog parsing helpers ---
929
930/// Find the /Pages reference in the original catalog object.
931fn find_catalog_pages_ref(text: &str, root_obj: usize) -> Option<usize> {
932    find_catalog_ref(text, root_obj, "/Pages")
933}
934
935/// Find an object reference for a key within a specific object.
936fn find_catalog_ref(text: &str, obj_id: usize, key: &str) -> Option<usize> {
937    let obj_header = format!("{obj_id} 0 obj");
938    let obj_start = text.find(&obj_header)?;
939    let obj_section = &text[obj_start..];
940    let obj_end = obj_section.find("endobj")?;
941    let obj_content = &obj_section[..obj_end];
942
943    let key_pos = obj_content.find(key)?;
944    let after_key = &obj_content[key_pos + key.len()..];
945    let trimmed = after_key.trim_start();
946    // Parse "N 0 R"
947    let end = trimmed
948        .find(|c: char| !c.is_ascii_digit())
949        .unwrap_or(trimmed.len());
950    if end == 0 {
951        return None;
952    }
953    trimmed[..end].parse().ok()
954}
955
956/// Check if a key exists in a catalog object.
957fn catalog_has_key(text: &str, obj_id: usize, key: &str) -> bool {
958    let obj_header = format!("{obj_id} 0 obj");
959    if let Some(obj_start) = text.find(&obj_header) {
960        let obj_section = &text[obj_start..];
961        if let Some(obj_end) = obj_section.find("endobj") {
962            return obj_section[..obj_end].contains(key);
963        }
964    }
965    false
966}
967
968/// Find a string value for a key in a catalog object (e.g., /Lang (en-US)).
969fn find_catalog_string(text: &str, obj_id: usize, key: &str) -> Option<String> {
970    let obj_header = format!("{obj_id} 0 obj");
971    let obj_start = text.find(&obj_header)?;
972    let obj_section = &text[obj_start..];
973    let obj_end = obj_section.find("endobj")?;
974    let obj_content = &obj_section[..obj_end];
975
976    let key_pos = obj_content.find(key)?;
977    let after_key = &obj_content[key_pos + key.len()..];
978    let trimmed = after_key.trim_start();
979    if !trimmed.starts_with('(') {
980        return None;
981    }
982    let end = trimmed[1..].find(')')? + 1;
983    Some(trimmed[1..end].to_string())
984}
985
986/// Metadata extracted from an existing AcroForm dictionary.
987struct AcroFormMetadata {
988    need_appearances: bool,
989    da: Option<String>,
990}
991
992/// Extract AcroForm metadata (/NeedAppearances, /DA) from the original PDF's catalog.
993fn find_existing_acroform_metadata(pdf: &[u8], root_obj: usize) -> AcroFormMetadata {
994    let text = String::from_utf8_lossy(pdf);
995    let obj_header = format!("{root_obj} 0 obj");
996    let obj_start = match text.find(&obj_header) {
997        Some(pos) => pos,
998        None => {
999            return AcroFormMetadata {
1000                need_appearances: false,
1001                da: None,
1002            }
1003        }
1004    };
1005    let obj_section = &text[obj_start..];
1006    let obj_end = match obj_section.find("endobj") {
1007        Some(pos) => pos,
1008        None => {
1009            return AcroFormMetadata {
1010                need_appearances: false,
1011                da: None,
1012            }
1013        }
1014    };
1015    let obj_content = &obj_section[..obj_end];
1016
1017    let acroform_pos = match obj_content.find("/AcroForm") {
1018        Some(pos) => pos,
1019        None => {
1020            return AcroFormMetadata {
1021                need_appearances: false,
1022                da: None,
1023            }
1024        }
1025    };
1026    let after_acroform = &obj_content[acroform_pos..];
1027
1028    let need_appearances = after_acroform.contains("/NeedAppearances true");
1029
1030    // Extract /DA (default appearance) string
1031    let da = if let Some(da_pos) = after_acroform.find("/DA") {
1032        let after_da = after_acroform[da_pos + 3..].trim_start();
1033        if let Some(stripped) = after_da.strip_prefix('(') {
1034            stripped.find(')').map(|end| stripped[..end].to_string())
1035        } else {
1036            None
1037        }
1038    } else {
1039        None
1040    };
1041
1042    AcroFormMetadata {
1043        need_appearances,
1044        da,
1045    }
1046}
1047
1048/// Determine the next unique signature field name by scanning for existing `/T (SignatureN)` entries.
1049fn next_signature_name(pdf: &[u8]) -> String {
1050    let text = String::from_utf8_lossy(pdf);
1051    let mut max_num = 0u32;
1052    let prefix = "/T (Signature";
1053    let mut pos = 0;
1054    while let Some(idx) = text[pos..].find(prefix) {
1055        let after = &text[pos + idx + prefix.len()..];
1056        if let Some(end) = after.find(')') {
1057            if let Ok(n) = after[..end].parse::<u32>() {
1058                max_num = max_num.max(n);
1059            }
1060        }
1061        pos = pos + idx + prefix.len();
1062    }
1063    format!("Signature{}", max_num + 1)
1064}
1065
1066/// Find existing AcroForm /Fields references from the original PDF.
1067/// Returns a list of "N 0 R" strings for each existing field.
1068fn find_existing_acroform_fields(pdf: &[u8], root_obj: usize) -> Vec<String> {
1069    let text = String::from_utf8_lossy(pdf);
1070    let obj_header = format!("{root_obj} 0 obj");
1071    let obj_start = match text.find(&obj_header) {
1072        Some(pos) => pos,
1073        None => return Vec::new(),
1074    };
1075    let obj_section = &text[obj_start..];
1076    let obj_end = match obj_section.find("endobj") {
1077        Some(pos) => pos,
1078        None => return Vec::new(),
1079    };
1080    let obj_content = &obj_section[..obj_end];
1081
1082    // Find /AcroForm in the catalog
1083    let acroform_pos = match obj_content.find("/AcroForm") {
1084        Some(pos) => pos,
1085        None => return Vec::new(),
1086    };
1087    let after_acroform = &obj_content[acroform_pos..];
1088
1089    // Look for /Fields array within the AcroForm dict
1090    let fields_pos = match after_acroform.find("/Fields") {
1091        Some(pos) => pos,
1092        None => return Vec::new(),
1093    };
1094    let after_fields = &after_acroform[fields_pos + 7..]; // skip "/Fields"
1095    let trimmed = after_fields.trim_start();
1096    if !trimmed.starts_with('[') {
1097        return Vec::new();
1098    }
1099    let bracket_end = match trimmed.find(']') {
1100        Some(pos) => pos,
1101        None => return Vec::new(),
1102    };
1103    let fields_content = &trimmed[1..bracket_end];
1104
1105    // Parse "N 0 R" references
1106    let mut fields = Vec::new();
1107    let mut remaining = fields_content.trim();
1108    while !remaining.is_empty() {
1109        // Parse object number
1110        let end = remaining
1111            .find(|c: char| !c.is_ascii_digit())
1112            .unwrap_or(remaining.len());
1113        if end == 0 {
1114            break;
1115        }
1116        let obj_num = &remaining[..end];
1117        remaining = remaining[end..].trim_start();
1118        // Expect "0 R"
1119        if remaining.starts_with("0 R") {
1120            fields.push(format!("{obj_num} 0 R"));
1121            remaining = remaining[3..].trim_start();
1122        } else {
1123            break;
1124        }
1125    }
1126    fields
1127}
1128
1129/// Find an array value for a key in a catalog object (returns the raw "[...]" string).
1130fn find_catalog_array_content(text: &str, obj_id: usize, key: &str) -> Option<String> {
1131    let obj_header = format!("{obj_id} 0 obj");
1132    let obj_start = text.find(&obj_header)?;
1133    let obj_section = &text[obj_start..];
1134    let obj_end = obj_section.find("endobj")?;
1135    let obj_content = &obj_section[..obj_end];
1136
1137    let key_pos = obj_content.find(key)?;
1138    let after_key = &obj_content[key_pos + key.len()..];
1139    let trimmed = after_key.trim_start();
1140    if !trimmed.starts_with('[') {
1141        return None;
1142    }
1143    let end = trimmed.find(']')? + 1;
1144    Some(trimmed[..end].to_string())
1145}