Skip to main content

pdf_objects/
parser.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use crate::crypto::{BytesKind, StandardSecurityHandler};
4use crate::document::build_document;
5use crate::error::{PdfError, PdfResult};
6use crate::pubsec::{PubSecCredential, open_pubsec};
7use crate::stream::decode_stream;
8use crate::types::{
9    ObjectRef, PdfDictionary, PdfFile, PdfObject, PdfStream, PdfString, PdfValue, XrefEntry,
10    XrefForm,
11};
12
13/// Caller-supplied credential for opening an encrypted PDF. Standard
14/// security handlers authenticate by password; the public-key handler
15/// (`/Filter /Adobe.PubSec`) authenticates by an X.509 certificate plus
16/// its RSA private key. For unencrypted PDFs the credential is ignored
17/// (the empty `Password(b"")` is the natural default).
18#[derive(Clone, Copy)]
19pub enum PdfCredential<'a> {
20    Password(&'a [u8]),
21    Certificate {
22        cert_der: &'a [u8],
23        private_key_der: &'a [u8],
24    },
25}
26
27/// Parses an unencrypted PDF, or an encrypted PDF whose user password is
28/// empty. For encrypted PDFs that require a user- or owner-supplied
29/// password, use [`parse_pdf_with_password`]; for `/Filter /Adobe.PubSec`
30/// PDFs, use [`parse_pdf_with_certificate`].
31pub fn parse_pdf(bytes: &[u8]) -> PdfResult<crate::document::ParsedDocument> {
32    parse_pdf_with_credential(bytes, PdfCredential::Password(b""))
33}
34
35/// Parses an encrypted PDF with a caller-supplied password. The password
36/// is tried first as the user password, then as the owner password; if
37/// neither authenticates, the function returns
38/// [`PdfError::InvalidPassword`]. For unencrypted documents the password
39/// is ignored.
40pub fn parse_pdf_with_password(
41    bytes: &[u8],
42    password: &[u8],
43) -> PdfResult<crate::document::ParsedDocument> {
44    parse_pdf_with_credential(bytes, PdfCredential::Password(password))
45}
46
47/// Parses an Adobe.PubSec-encrypted PDF using a recipient X.509
48/// certificate (DER) and its matching PKCS#8 private key (DER). Returns
49/// [`PdfError::InvalidPassword`] when no recipient blob in the PDF
50/// unwraps with the supplied private key. For password-encrypted or
51/// unencrypted documents this returns
52/// [`PdfError::Unsupported`] — use [`parse_pdf_with_password`] /
53/// [`parse_pdf`] respectively.
54pub fn parse_pdf_with_certificate(
55    bytes: &[u8],
56    cert_der: &[u8],
57    private_key_der: &[u8],
58) -> PdfResult<crate::document::ParsedDocument> {
59    parse_pdf_with_credential(
60        bytes,
61        PdfCredential::Certificate {
62            cert_der,
63            private_key_der,
64        },
65    )
66}
67
68/// Generic entry point that accepts either credential variant. The
69/// password and certificate wrappers above thread their arguments
70/// through this function.
71pub fn parse_pdf_with_credential(
72    bytes: &[u8],
73    credential: PdfCredential,
74) -> PdfResult<crate::document::ParsedDocument> {
75    let version = parse_header(bytes)?;
76    let startxref = find_startxref(bytes)?;
77    let (xref, mut trailer, xref_form) = parse_xref_table(bytes, startxref)?;
78
79    let mut objects = BTreeMap::new();
80    let mut max_object_number = 0;
81    let mut compressed: Vec<(ObjectRef, u32, u32)> = Vec::new();
82
83    for (object_ref, entry) in &xref {
84        match entry {
85            XrefEntry::Free => {}
86            XrefEntry::Uncompressed { offset, .. } => {
87                if object_ref.object_number == 0 {
88                    continue;
89                }
90                let object = parse_indirect_object(bytes, *offset, Some(&xref))?;
91                max_object_number = max_object_number.max(object_ref.object_number);
92                objects.insert(*object_ref, object);
93            }
94            XrefEntry::Compressed {
95                stream_object_number,
96                index,
97            } => {
98                compressed.push((*object_ref, *stream_object_number, *index));
99            }
100        }
101    }
102
103    // Decrypt in place before materializing object streams: the ObjStm stream
104    // itself is encrypted, but once its bytes are decrypted the contained
105    // members are plaintext and materialize_object_streams can proceed as
106    // usual. Order matters — if we materialized first, each ObjStm's decoded
107    // body would still be ciphertext and we'd parse garbage.
108    decrypt_document_if_encrypted(&mut objects, &mut trailer, credential)?;
109
110    materialize_object_streams(&mut objects, &mut max_object_number, &compressed)?;
111
112    let file = PdfFile {
113        version,
114        objects,
115        trailer,
116        max_object_number,
117        xref_form,
118    };
119    build_document(file)
120}
121
122fn decrypt_document_if_encrypted(
123    objects: &mut BTreeMap<ObjectRef, PdfObject>,
124    trailer: &mut PdfDictionary,
125    credential: PdfCredential,
126) -> PdfResult<()> {
127    let encrypt_ref = match trailer.get("Encrypt") {
128        Some(PdfValue::Reference(object_ref)) => *object_ref,
129        Some(PdfValue::Dictionary(_)) => {
130            return Err(PdfError::Unsupported(
131                "direct (non-indirect) /Encrypt dictionaries are not supported".to_string(),
132            ));
133        }
134        Some(_) => {
135            return Err(PdfError::Corrupt(
136                "trailer /Encrypt is not a reference".to_string(),
137            ));
138        }
139        None => return Ok(()),
140    };
141
142    let encrypt_dict = match objects.get(&encrypt_ref) {
143        Some(PdfObject::Value(PdfValue::Dictionary(dict))) => dict.clone(),
144        _ => {
145            return Err(PdfError::Corrupt(
146                "trailer /Encrypt does not point at a dictionary".to_string(),
147            ));
148        }
149    };
150
151    let filter_name = encrypt_dict
152        .get("Filter")
153        .and_then(PdfValue::as_name)
154        .unwrap_or("");
155
156    let handler = match filter_name {
157        "Standard" => match credential {
158            PdfCredential::Password(password) => {
159                let id_first = extract_id_first(trailer)?;
160                StandardSecurityHandler::open(&encrypt_dict, &id_first, password)?
161                    .ok_or(PdfError::InvalidPassword)?
162            }
163            PdfCredential::Certificate { .. } => {
164                return Err(PdfError::Unsupported(
165                    "/Filter /Standard requires a password, not a certificate".to_string(),
166                ));
167            }
168        },
169        "Adobe.PubSec" => match credential {
170            PdfCredential::Certificate {
171                cert_der,
172                private_key_der,
173            } => open_pubsec(
174                &encrypt_dict,
175                &PubSecCredential {
176                    certificate_der: cert_der,
177                    private_key_der,
178                },
179            )?,
180            PdfCredential::Password(_) => {
181                return Err(PdfError::Unsupported(
182                    "/Filter /Adobe.PubSec requires a certificate, not a password".to_string(),
183                ));
184            }
185        },
186        other => {
187            return Err(PdfError::Unsupported(format!(
188                "encryption filter /{other} is not supported"
189            )));
190        }
191    };
192
193    let refs: Vec<ObjectRef> = objects.keys().copied().collect();
194    for object_ref in refs {
195        if object_ref == encrypt_ref {
196            // Strings and streams in the Encrypt dictionary itself are
197            // exempt from encryption (PDF 1.7 §7.6.1).
198            continue;
199        }
200        let object = objects
201            .get_mut(&object_ref)
202            .expect("ref obtained from map keys must still be present");
203        match object {
204            PdfObject::Stream(stream) => {
205                // Cross-reference streams are never encrypted; metadata
206                // streams are exempt when the document sets
207                // /EncryptMetadata false (Tr. ISO 32000-1 §7.6.1).
208                let type_name = stream.dict.get("Type").and_then(PdfValue::as_name);
209                let is_xref_stream = type_name == Some("XRef");
210                let is_exempt_metadata =
211                    !handler.encrypts_metadata() && type_name == Some("Metadata");
212                decrypt_strings_in_dict(&mut stream.dict, &handler, object_ref)?;
213                if !is_xref_stream && !is_exempt_metadata {
214                    stream.data =
215                        handler.decrypt_bytes(&stream.data, object_ref, BytesKind::Stream)?;
216                }
217            }
218            PdfObject::Value(value) => {
219                decrypt_strings_in_value(value, &handler, object_ref)?;
220            }
221        }
222    }
223
224    trailer.remove("Encrypt");
225    // Remove the Encrypt dictionary object itself so the writer never
226    // emits its now-decrypted /O, /U, /OE, /UE, /Perms fields as
227    // dangling unreferenced bytes (would leak the password verifiers).
228    objects.remove(&encrypt_ref);
229    Ok(())
230}
231
232fn extract_id_first(trailer: &PdfDictionary) -> PdfResult<Vec<u8>> {
233    match trailer.get("ID") {
234        Some(PdfValue::Array(entries)) => match entries.first() {
235            Some(PdfValue::String(value)) => Ok(value.0.clone()),
236            _ => Err(PdfError::Corrupt(
237                "trailer /ID[0] is not a string — cannot derive encryption key".to_string(),
238            )),
239        },
240        _ => Err(PdfError::Corrupt(
241            "encrypted PDF is missing the trailer /ID array required for key derivation"
242                .to_string(),
243        )),
244    }
245}
246
247fn decrypt_strings_in_value(
248    value: &mut PdfValue,
249    handler: &StandardSecurityHandler,
250    object_ref: ObjectRef,
251) -> PdfResult<()> {
252    match value {
253        PdfValue::String(string) => {
254            string.0 = handler.decrypt_bytes(&string.0, object_ref, BytesKind::String)?;
255        }
256        PdfValue::Array(items) => {
257            for item in items {
258                decrypt_strings_in_value(item, handler, object_ref)?;
259            }
260        }
261        PdfValue::Dictionary(dict) => {
262            decrypt_strings_in_dict(dict, handler, object_ref)?;
263        }
264        _ => {}
265    }
266    Ok(())
267}
268
269fn decrypt_strings_in_dict(
270    dict: &mut PdfDictionary,
271    handler: &StandardSecurityHandler,
272    object_ref: ObjectRef,
273) -> PdfResult<()> {
274    for value in dict.values_mut() {
275        decrypt_strings_in_value(value, handler, object_ref)?;
276    }
277    Ok(())
278}
279
280fn parse_header(bytes: &[u8]) -> PdfResult<String> {
281    if !bytes.starts_with(b"%PDF-") {
282        return Err(PdfError::Parse("missing PDF header".to_string()));
283    }
284    let line_end = bytes
285        .iter()
286        .position(|byte| *byte == b'\n' || *byte == b'\r')
287        .ok_or_else(|| PdfError::Parse("unterminated header".to_string()))?;
288    Ok(String::from_utf8_lossy(&bytes[5..line_end])
289        .trim()
290        .to_string())
291}
292
293fn find_startxref(bytes: &[u8]) -> PdfResult<usize> {
294    let marker = b"startxref";
295    let position = bytes
296        .windows(marker.len())
297        .rposition(|window| window == marker)
298        .ok_or_else(|| PdfError::Parse("missing startxref".to_string()))?;
299    let mut parser = Cursor::new(bytes, position + marker.len());
300    parser.skip_ws_and_comments();
301    parser.parse_usize()
302}
303
304fn parse_xref_table(
305    bytes: &[u8],
306    start_offset: usize,
307) -> PdfResult<(BTreeMap<ObjectRef, XrefEntry>, PdfDictionary, XrefForm)> {
308    let mut merged_entries: BTreeMap<ObjectRef, XrefEntry> = BTreeMap::new();
309    let mut newest_trailer: Option<PdfDictionary> = None;
310    // The form of the very first section we visit (the one at startxref)
311    // determines the output shape. Older sections reached via /Prev or
312    // /XRefStm may use the opposite form, but the writer mirrors the
313    // newest section's shape only.
314    let mut top_form: Option<XrefForm> = None;
315    let mut visited = BTreeSet::new();
316    let mut pending: Vec<usize> = vec![start_offset];
317
318    while let Some(offset) = pending.pop() {
319        if !visited.insert(offset) {
320            continue;
321        }
322        let section = parse_xref_section_at(bytes, offset)?;
323
324        // Newest-first: only insert entries not already present
325        for (object_ref, entry) in section.entries {
326            merged_entries.entry(object_ref).or_insert(entry);
327        }
328
329        if newest_trailer.is_none() {
330            newest_trailer = Some(section.trailer.clone());
331            top_form = Some(section.form);
332        }
333
334        if let Some(stm_offset) = section
335            .trailer
336            .get("XRefStm")
337            .and_then(PdfValue::as_integer)
338        {
339            pending.push(stm_offset as usize);
340        }
341        if let Some(prev_offset) = section.trailer.get("Prev").and_then(PdfValue::as_integer) {
342            pending.push(prev_offset as usize);
343        }
344    }
345
346    let trailer = newest_trailer
347        .ok_or_else(|| PdfError::Parse("xref chain produced no trailer".to_string()))?;
348    let form = top_form.unwrap_or(XrefForm::Classic);
349    Ok((merged_entries, trailer, form))
350}
351
352struct XrefSection {
353    entries: BTreeMap<ObjectRef, XrefEntry>,
354    trailer: PdfDictionary,
355    form: XrefForm,
356}
357
358fn parse_xref_section_at(bytes: &[u8], offset: usize) -> PdfResult<XrefSection> {
359    let mut probe = Cursor::new(bytes, offset);
360    probe.skip_ws_and_comments();
361    if probe.peek_keyword("xref") {
362        parse_classic_xref_section(bytes, offset)
363    } else {
364        parse_xref_stream_section(bytes, offset)
365    }
366}
367
368fn parse_classic_xref_section(bytes: &[u8], offset: usize) -> PdfResult<XrefSection> {
369    let mut cursor = Cursor::new(bytes, offset);
370    cursor.expect_keyword("xref")?;
371    let mut entries = BTreeMap::new();
372    loop {
373        cursor.skip_ws_and_comments();
374        if cursor.peek_keyword("trailer") {
375            break;
376        }
377        let start = cursor.parse_u32()?;
378        cursor.skip_ws_and_comments();
379        let count = cursor.parse_u32()?;
380        cursor.skip_line_breaks();
381        for index in 0..count {
382            let line = cursor.read_line()?;
383            if line.len() < 17 {
384                return Err(PdfError::Parse("invalid xref entry".to_string()));
385            }
386            let parts = String::from_utf8_lossy(line).trim().to_string();
387            let mut fields = parts.split_whitespace();
388            let entry_offset = fields
389                .next()
390                .ok_or_else(|| PdfError::Parse("invalid xref entry offset".to_string()))?
391                .parse::<usize>()
392                .map_err(|_| PdfError::Parse("invalid xref entry offset".to_string()))?;
393            let generation = fields
394                .next()
395                .ok_or_else(|| PdfError::Parse("invalid xref generation".to_string()))?
396                .parse::<u16>()
397                .map_err(|_| PdfError::Parse("invalid xref generation".to_string()))?;
398            let flag = fields
399                .next()
400                .ok_or_else(|| PdfError::Parse("invalid xref flag".to_string()))?;
401            let object_number = start
402                .checked_add(index)
403                .ok_or_else(|| PdfError::Parse("xref object number overflow".to_string()))?;
404            let entry = if flag == "n" {
405                XrefEntry::Uncompressed {
406                    offset: entry_offset,
407                    generation,
408                }
409            } else {
410                XrefEntry::Free
411            };
412            entries.insert(ObjectRef::new(object_number, generation), entry);
413        }
414    }
415    cursor.expect_keyword("trailer")?;
416    let trailer = match cursor.parse_value()? {
417        PdfValue::Dictionary(dictionary) => dictionary,
418        _ => return Err(PdfError::Parse("trailer is not a dictionary".to_string())),
419    };
420    Ok(XrefSection {
421        entries,
422        trailer,
423        form: XrefForm::Classic,
424    })
425}
426
427fn parse_xref_stream_section(bytes: &[u8], offset: usize) -> PdfResult<XrefSection> {
428    // The xref stream itself is read while the xref map is still being
429    // built, so there is no xref available to resolve indirect /Length
430    // references. Pass `None` and fall back to the endstream scan if the
431    // xref stream ever uses an indirect /Length (vanishingly rare).
432    let object = parse_indirect_object(bytes, offset, None)?;
433    let stream = match object {
434        PdfObject::Stream(stream) => stream,
435        PdfObject::Value(_) => {
436            return Err(PdfError::Parse(
437                "expected xref stream object at startxref offset".to_string(),
438            ));
439        }
440    };
441    if stream.dict.get("Type").and_then(PdfValue::as_name) != Some("XRef") {
442        return Err(PdfError::Parse(
443            "xref stream object has wrong Type".to_string(),
444        ));
445    }
446
447    let size = stream
448        .dict
449        .get("Size")
450        .and_then(PdfValue::as_integer)
451        .ok_or_else(|| PdfError::Corrupt("xref stream missing Size".to_string()))?
452        as u32;
453
454    let w = stream
455        .dict
456        .get("W")
457        .and_then(PdfValue::as_array)
458        .ok_or_else(|| PdfError::Corrupt("xref stream missing W".to_string()))?;
459    if w.len() != 3 {
460        return Err(PdfError::Corrupt(
461            "xref stream W must have three entries".to_string(),
462        ));
463    }
464    let w0 = w[0]
465        .as_integer()
466        .ok_or_else(|| PdfError::Corrupt("invalid W[0]".to_string()))? as usize;
467    let w1 = w[1]
468        .as_integer()
469        .ok_or_else(|| PdfError::Corrupt("invalid W[1]".to_string()))? as usize;
470    let w2 = w[2]
471        .as_integer()
472        .ok_or_else(|| PdfError::Corrupt("invalid W[2]".to_string()))? as usize;
473    let row_len = w0 + w1 + w2;
474    if row_len == 0 {
475        return Err(PdfError::Corrupt(
476            "xref stream row width is zero".to_string(),
477        ));
478    }
479
480    let index: Vec<(u32, u32)> = match stream.dict.get("Index") {
481        Some(PdfValue::Array(entries)) => {
482            if entries.len() % 2 != 0 {
483                return Err(PdfError::Corrupt(
484                    "xref stream Index must have an even number of entries".to_string(),
485                ));
486            }
487            let mut pairs = Vec::with_capacity(entries.len() / 2);
488            for chunk in entries.chunks(2) {
489                let first = chunk[0]
490                    .as_integer()
491                    .ok_or_else(|| PdfError::Corrupt("invalid Index entry".to_string()))?
492                    as u32;
493                let count = chunk[1]
494                    .as_integer()
495                    .ok_or_else(|| PdfError::Corrupt("invalid Index entry".to_string()))?
496                    as u32;
497                pairs.push((first, count));
498            }
499            pairs
500        }
501        Some(_) => {
502            return Err(PdfError::Corrupt(
503                "xref stream Index is not an array".to_string(),
504            ));
505        }
506        None => vec![(0, size)],
507    };
508
509    let decoded = decode_stream(&stream)?;
510    let expected_rows: u32 = index.iter().map(|(_, count)| *count).sum();
511    if decoded.len() < expected_rows as usize * row_len {
512        return Err(PdfError::Corrupt(
513            "xref stream body is shorter than declared entries".to_string(),
514        ));
515    }
516
517    let mut entries: BTreeMap<ObjectRef, XrefEntry> = BTreeMap::new();
518    let mut cursor = 0usize;
519    for (first, count) in index {
520        for i in 0..count {
521            let row = &decoded[cursor..cursor + row_len];
522            cursor += row_len;
523            let field_type = if w0 == 0 { 1u64 } else { read_be(&row[..w0])? };
524            let f2 = read_be(&row[w0..w0 + w1])?;
525            let f3 = read_be(&row[w0 + w1..])?;
526            let object_number = first + i;
527            let entry = match field_type {
528                0 => XrefEntry::Free,
529                1 => XrefEntry::Uncompressed {
530                    offset: f2 as usize,
531                    generation: f3 as u16,
532                },
533                2 => XrefEntry::Compressed {
534                    stream_object_number: f2 as u32,
535                    index: f3 as u32,
536                },
537                other => {
538                    return Err(PdfError::Unsupported(format!(
539                        "xref stream entry type {other} is not supported"
540                    )));
541                }
542            };
543            let generation = match entry {
544                XrefEntry::Uncompressed { generation, .. } => generation,
545                _ => 0,
546            };
547            entries.insert(ObjectRef::new(object_number, generation), entry);
548        }
549    }
550
551    Ok(XrefSection {
552        entries,
553        trailer: stream.dict,
554        form: XrefForm::Stream,
555    })
556}
557
558fn read_be(bytes: &[u8]) -> PdfResult<u64> {
559    if bytes.len() > 8 {
560        return Err(PdfError::Corrupt(
561            "xref stream field width exceeds 8 bytes".to_string(),
562        ));
563    }
564    let mut value: u64 = 0;
565    for byte in bytes {
566        value = (value << 8) | *byte as u64;
567    }
568    Ok(value)
569}
570
571fn materialize_object_streams(
572    objects: &mut BTreeMap<ObjectRef, PdfObject>,
573    max_object_number: &mut u32,
574    compressed: &[(ObjectRef, u32, u32)],
575) -> PdfResult<()> {
576    if compressed.is_empty() {
577        return Ok(());
578    }
579
580    let mut by_stream: BTreeMap<u32, Vec<(ObjectRef, u32)>> = BTreeMap::new();
581    for (object_ref, stream_obj_num, index) in compressed {
582        by_stream
583            .entry(*stream_obj_num)
584            .or_default()
585            .push((*object_ref, *index));
586    }
587
588    for (stream_obj_num, mut members) in by_stream {
589        let stream_ref = ObjectRef::new(stream_obj_num, 0);
590        let stream = match objects.get(&stream_ref) {
591            Some(PdfObject::Stream(stream)) => stream.clone(),
592            Some(PdfObject::Value(_)) => {
593                return Err(PdfError::Corrupt(format!(
594                    "object stream {stream_obj_num} is not a stream"
595                )));
596            }
597            None => {
598                return Err(PdfError::Corrupt(format!(
599                    "compressed entry references missing object stream {stream_obj_num}"
600                )));
601            }
602        };
603        if stream.dict.get("Type").and_then(PdfValue::as_name) != Some("ObjStm") {
604            return Err(PdfError::Corrupt(format!(
605                "object {stream_obj_num} is not marked as ObjStm"
606            )));
607        }
608        let n = stream
609            .dict
610            .get("N")
611            .and_then(PdfValue::as_integer)
612            .ok_or_else(|| PdfError::Corrupt("ObjStm missing N".to_string()))?
613            as usize;
614        let first = stream
615            .dict
616            .get("First")
617            .and_then(PdfValue::as_integer)
618            .ok_or_else(|| PdfError::Corrupt("ObjStm missing First".to_string()))?
619            as usize;
620
621        let decoded = decode_stream(&stream)?;
622        if first > decoded.len() {
623            return Err(PdfError::Corrupt(
624                "ObjStm First offset is past end of decoded data".to_string(),
625            ));
626        }
627
628        let header = &decoded[..first];
629        let mut header_cursor = Cursor::new(header, 0);
630        let mut entries: Vec<(u32, usize)> = Vec::with_capacity(n);
631        for _ in 0..n {
632            header_cursor.skip_ws_and_comments();
633            let obj_num = header_cursor.parse_u32()?;
634            header_cursor.skip_ws_and_comments();
635            let rel_offset = header_cursor.parse_usize()?;
636            entries.push((obj_num, rel_offset));
637        }
638
639        // Guard: a compressed entry's index must be in range.
640        members.sort_by_key(|(_, index)| *index);
641        for (member_ref, index) in members {
642            let idx = index as usize;
643            if idx >= entries.len() {
644                return Err(PdfError::Corrupt(format!(
645                    "ObjStm {stream_obj_num} has no index {idx}"
646                )));
647            }
648            let (declared_number, rel_offset) = entries[idx];
649            if declared_number != member_ref.object_number {
650                return Err(PdfError::Corrupt(format!(
651                    "ObjStm {stream_obj_num} index {idx} has number {declared_number} but xref expected {}",
652                    member_ref.object_number
653                )));
654            }
655            let absolute_offset = first
656                .checked_add(rel_offset)
657                .ok_or_else(|| PdfError::Corrupt("ObjStm offset overflow".to_string()))?;
658            if absolute_offset > decoded.len() {
659                return Err(PdfError::Corrupt(
660                    "ObjStm member offset is past end of decoded data".to_string(),
661                ));
662            }
663            let mut value_cursor = Cursor::new(&decoded, absolute_offset);
664            let value = value_cursor.parse_value()?;
665            if let PdfValue::Dictionary(dict) = &value {
666                if dict.get("Type").and_then(PdfValue::as_name) == Some("ObjStm") {
667                    return Err(PdfError::Unsupported(
668                        "nested object streams are not supported".to_string(),
669                    ));
670                }
671            }
672            *max_object_number = (*max_object_number).max(member_ref.object_number);
673            objects.insert(member_ref, PdfObject::Value(value));
674        }
675        // Drop the ObjStm container after its members are materialised.
676        // The container's compressed bytes mirror the pre-redaction state
677        // of every member dictionary that was packed into it; leaving it
678        // in `objects` would make the writer emit the original bytes
679        // even after the materialised members were modified by redaction.
680        objects.remove(&stream_ref);
681    }
682
683    Ok(())
684}
685
686fn parse_indirect_object(
687    bytes: &[u8],
688    offset: usize,
689    xref: Option<&BTreeMap<ObjectRef, XrefEntry>>,
690) -> PdfResult<PdfObject> {
691    let mut cursor = Cursor::new(bytes, offset);
692    let _object_number = cursor.parse_u32()?;
693    cursor.skip_ws_and_comments();
694    let _generation = cursor.parse_u16()?;
695    cursor.skip_ws_and_comments();
696    cursor.expect_keyword("obj")?;
697    cursor.skip_ws_and_comments();
698
699    let value = cursor.parse_value()?;
700    cursor.skip_ws_and_comments();
701    if matches!(value, PdfValue::Dictionary(_)) && cursor.peek_keyword("stream") {
702        let dict = match value {
703            PdfValue::Dictionary(dict) => dict,
704            _ => unreachable!(),
705        };
706        cursor.expect_keyword("stream")?;
707        cursor.consume_stream_line_break();
708        let stream_start = cursor.position;
709        // Prefer the Length entry from the stream dictionary to determine the
710        // data boundary. This prevents binary stream data that happens to
711        // contain the literal bytes "endstream" from being truncated. When
712        // /Length is an indirect reference we resolve it by following the
713        // xref entry for the referenced integer object; see
714        // `resolve_stream_length_ref`. A missing or unresolvable /Length
715        // falls back to scanning forward for `endstream`.
716        let length_hint = match dict.get("Length") {
717            Some(PdfValue::Integer(len)) if *len >= 0 => Some(*len as usize),
718            Some(PdfValue::Reference(target)) => {
719                xref.and_then(|map| resolve_stream_length_ref(bytes, map, *target))
720            }
721            _ => None,
722        };
723        let (data, endstream_pos) = match length_hint {
724            Some(len) if stream_start + len <= bytes.len() => {
725                // Verify the endstream keyword follows at the expected offset.
726                // Tolerate trailing EOL between data and keyword per PDF spec.
727                let mut check = stream_start + len;
728                while check < bytes.len() && matches!(bytes[check], b'\r' | b'\n') {
729                    check += 1;
730                }
731                if bytes.get(check..check + 9) == Some(b"endstream") {
732                    (bytes[stream_start..stream_start + len].to_vec(), check)
733                } else {
734                    // Length is wrong; fall back to scanning
735                    let pos = find_keyword(bytes, stream_start, b"endstream")
736                        .ok_or_else(|| PdfError::Parse("stream missing endstream".to_string()))?;
737                    (bytes[stream_start..pos].to_vec(), pos)
738                }
739            }
740            _ => {
741                let pos = find_keyword(bytes, stream_start, b"endstream")
742                    .ok_or_else(|| PdfError::Parse("stream missing endstream".to_string()))?;
743                (bytes[stream_start..pos].to_vec(), pos)
744            }
745        };
746        cursor.position = endstream_pos;
747        cursor.expect_keyword("endstream")?;
748        cursor.skip_ws_and_comments();
749        cursor.expect_keyword("endobj")?;
750        Ok(PdfObject::Stream(PdfStream { dict, data }))
751    } else {
752        cursor.expect_keyword("endobj")?;
753        Ok(PdfObject::Value(value))
754    }
755}
756
757/// Resolve an indirect `/Length` reference inside a stream dictionary to
758/// the plain non-negative integer it points at. Follows `target` through
759/// the xref table, parses the referenced object, and returns its integer
760/// value if and only if the resolved object is a plain integer value
761/// (not a stream, reference, or negative integer). Returns `None` when
762/// the target entry is missing, compressed, or the resolved value is not
763/// a usable length; the caller then falls back to scanning for
764/// `endstream`.
765fn resolve_stream_length_ref(
766    bytes: &[u8],
767    xref: &BTreeMap<ObjectRef, XrefEntry>,
768    target: ObjectRef,
769) -> Option<usize> {
770    let entry = xref.get(&target)?;
771    let offset = match entry {
772        XrefEntry::Uncompressed { offset, .. } => *offset,
773        // Compressed (ObjStm) length refs are exotic and have not shown up
774        // in the wild for stream /Length specifically; skip for now.
775        XrefEntry::Compressed { .. } | XrefEntry::Free => return None,
776    };
777    // Do not pass `xref` into the recursive parse — a /Length reference
778    // should point at a plain integer, and forbidding further recursion
779    // keeps a malformed cycle from spiralling.
780    let object = parse_indirect_object(bytes, offset, None).ok()?;
781    match object {
782        PdfObject::Value(PdfValue::Integer(len)) if len >= 0 => Some(len as usize),
783        _ => None,
784    }
785}
786
787fn find_keyword(bytes: &[u8], start: usize, keyword: &[u8]) -> Option<usize> {
788    bytes[start..]
789        .windows(keyword.len())
790        .position(|window| window == keyword)
791        .map(|relative| start + relative)
792}
793
794struct Cursor<'a> {
795    bytes: &'a [u8],
796    position: usize,
797}
798
799impl<'a> Cursor<'a> {
800    fn new(bytes: &'a [u8], position: usize) -> Self {
801        Self { bytes, position }
802    }
803
804    fn eof(&self) -> bool {
805        self.position >= self.bytes.len()
806    }
807
808    fn current(&self) -> Option<u8> {
809        self.bytes.get(self.position).copied()
810    }
811
812    fn skip_ws_and_comments(&mut self) {
813        while let Some(byte) = self.current() {
814            match byte {
815                b' ' | b'\t' | b'\n' | b'\r' | 0x0C | 0x00 => self.position += 1,
816                b'%' => {
817                    while let Some(next) = self.current() {
818                        self.position += 1;
819                        if next == b'\n' || next == b'\r' {
820                            break;
821                        }
822                    }
823                }
824                _ => break,
825            }
826        }
827    }
828
829    fn skip_line_breaks(&mut self) {
830        while matches!(self.current(), Some(b'\n' | b'\r')) {
831            self.position += 1;
832        }
833    }
834
835    fn read_line(&mut self) -> PdfResult<&'a [u8]> {
836        if self.eof() {
837            return Err(PdfError::Parse("unexpected end of file".to_string()));
838        }
839        let start = self.position;
840        while let Some(byte) = self.current() {
841            if byte == b'\n' || byte == b'\r' {
842                let end = self.position;
843                self.skip_line_breaks();
844                return Ok(&self.bytes[start..end]);
845            }
846            self.position += 1;
847        }
848        Ok(&self.bytes[start..self.position])
849    }
850
851    fn peek_keyword(&self, keyword: &str) -> bool {
852        self.bytes
853            .get(self.position..self.position + keyword.len())
854            .map(|slice| slice == keyword.as_bytes())
855            .unwrap_or(false)
856    }
857
858    fn expect_keyword(&mut self, keyword: &str) -> PdfResult<()> {
859        self.skip_ws_and_comments();
860        if self.peek_keyword(keyword) {
861            self.position += keyword.len();
862            Ok(())
863        } else {
864            Err(PdfError::Parse(format!("expected keyword {keyword}")))
865        }
866    }
867
868    fn consume_stream_line_break(&mut self) {
869        if self.current() == Some(b'\r') {
870            self.position += 1;
871        }
872        if self.current() == Some(b'\n') {
873            self.position += 1;
874        }
875    }
876
877    fn parse_u32(&mut self) -> PdfResult<u32> {
878        let token = self.parse_token()?;
879        token
880            .parse::<u32>()
881            .map_err(|_| PdfError::Parse(format!("invalid integer token: {token}")))
882    }
883
884    fn parse_u16(&mut self) -> PdfResult<u16> {
885        let token = self.parse_token()?;
886        token
887            .parse::<u16>()
888            .map_err(|_| PdfError::Parse(format!("invalid integer token: {token}")))
889    }
890
891    fn parse_usize(&mut self) -> PdfResult<usize> {
892        let token = self.parse_token()?;
893        token
894            .parse::<usize>()
895            .map_err(|_| PdfError::Parse(format!("invalid offset token: {token}")))
896    }
897
898    fn parse_token(&mut self) -> PdfResult<String> {
899        self.skip_ws_and_comments();
900        let start = self.position;
901        while let Some(byte) = self.current() {
902            if is_delimiter(byte) || is_whitespace(byte) {
903                break;
904            }
905            self.position += 1;
906        }
907        if self.position == start {
908            return Err(PdfError::Parse("expected token".to_string()));
909        }
910        Ok(String::from_utf8_lossy(&self.bytes[start..self.position]).to_string())
911    }
912
913    fn parse_value(&mut self) -> PdfResult<PdfValue> {
914        self.skip_ws_and_comments();
915        match self.current() {
916            Some(b'/') => self.parse_name(),
917            Some(b'(') => self.parse_literal_string(),
918            Some(b'[') => self.parse_array(),
919            Some(b'<') if self.bytes.get(self.position + 1) == Some(&b'<') => {
920                self.parse_dictionary()
921            }
922            Some(b'<') => self.parse_hex_string(),
923            Some(b't') if self.peek_keyword("true") => {
924                self.position += 4;
925                Ok(PdfValue::Bool(true))
926            }
927            Some(b'f') if self.peek_keyword("false") => {
928                self.position += 5;
929                Ok(PdfValue::Bool(false))
930            }
931            Some(b'n') if self.peek_keyword("null") => {
932                self.position += 4;
933                Ok(PdfValue::Null)
934            }
935            Some(_) => self.parse_number_or_reference(),
936            None => Err(PdfError::Parse("unexpected end of file".to_string())),
937        }
938    }
939
940    fn parse_name(&mut self) -> PdfResult<PdfValue> {
941        self.position += 1;
942        let mut raw = Vec::new();
943        while let Some(byte) = self.current() {
944            if is_delimiter(byte) || is_whitespace(byte) {
945                break;
946            }
947            if byte == b'#' {
948                let high =
949                    self.bytes.get(self.position + 1).copied().ok_or_else(|| {
950                        PdfError::Parse("truncated #XX escape in name".to_string())
951                    })?;
952                let low =
953                    self.bytes.get(self.position + 2).copied().ok_or_else(|| {
954                        PdfError::Parse("truncated #XX escape in name".to_string())
955                    })?;
956                let decoded = u8::from_str_radix(&format!("{}{}", high as char, low as char), 16)
957                    .map_err(|_| {
958                    PdfError::Parse("invalid #XX hex escape in name".to_string())
959                })?;
960                raw.push(decoded);
961                self.position += 3;
962            } else {
963                raw.push(byte);
964                self.position += 1;
965            }
966        }
967        Ok(PdfValue::Name(String::from_utf8_lossy(&raw).to_string()))
968    }
969
970    fn parse_literal_string(&mut self) -> PdfResult<PdfValue> {
971        self.position += 1;
972        let mut output = Vec::new();
973        let mut depth = 1usize;
974        while let Some(byte) = self.current() {
975            self.position += 1;
976            match byte {
977                b'\\' => {
978                    let escaped = self
979                        .current()
980                        .ok_or_else(|| PdfError::Parse("unterminated string escape".to_string()))?;
981                    self.position += 1;
982                    match escaped {
983                        b'n' => output.push(b'\n'),
984                        b'r' => output.push(b'\r'),
985                        b't' => output.push(b'\t'),
986                        b'b' => output.push(0x08),
987                        b'f' => output.push(0x0C),
988                        b'(' | b')' | b'\\' => output.push(escaped),
989                        b'\n' => {}
990                        b'\r' => {
991                            if self.current() == Some(b'\n') {
992                                self.position += 1;
993                            }
994                        }
995                        b'0'..=b'7' => {
996                            let mut octal = vec![escaped];
997                            for _ in 0..2 {
998                                match self.current() {
999                                    Some(next @ b'0'..=b'7') => {
1000                                        octal.push(next);
1001                                        self.position += 1;
1002                                    }
1003                                    _ => break,
1004                                }
1005                            }
1006                            // PDF spec: octal value is taken modulo 256
1007                            let value =
1008                                u16::from_str_radix(std::str::from_utf8(&octal).unwrap_or("0"), 8)
1009                                    .unwrap_or(0);
1010                            output.push((value % 256) as u8);
1011                        }
1012                        other => output.push(other),
1013                    }
1014                }
1015                b'(' => {
1016                    depth += 1;
1017                    output.push(byte);
1018                }
1019                b')' => {
1020                    depth -= 1;
1021                    if depth == 0 {
1022                        return Ok(PdfValue::String(PdfString(output)));
1023                    }
1024                    output.push(byte);
1025                }
1026                _ => output.push(byte),
1027            }
1028        }
1029        Err(PdfError::Parse("unterminated literal string".to_string()))
1030    }
1031
1032    fn parse_hex_string(&mut self) -> PdfResult<PdfValue> {
1033        self.position += 1;
1034        let start = self.position;
1035        while self.current() != Some(b'>') {
1036            if self.eof() {
1037                return Err(PdfError::Parse("unterminated hex string".to_string()));
1038            }
1039            self.position += 1;
1040        }
1041        let raw = String::from_utf8_lossy(&self.bytes[start..self.position])
1042            .chars()
1043            .filter(|character| !character.is_whitespace())
1044            .collect::<String>();
1045        self.position += 1;
1046        let mut chars = raw.chars().collect::<Vec<_>>();
1047        if chars.len() % 2 != 0 {
1048            chars.push('0');
1049        }
1050        let mut bytes = Vec::with_capacity(chars.len() / 2);
1051        for pair in chars.chunks(2) {
1052            let value = u8::from_str_radix(&pair.iter().collect::<String>(), 16)
1053                .map_err(|_| PdfError::Parse("invalid hex string".to_string()))?;
1054            bytes.push(value);
1055        }
1056        Ok(PdfValue::String(PdfString(bytes)))
1057    }
1058
1059    fn parse_array(&mut self) -> PdfResult<PdfValue> {
1060        self.position += 1;
1061        let mut values = Vec::new();
1062        loop {
1063            self.skip_ws_and_comments();
1064            match self.current() {
1065                Some(b']') => {
1066                    self.position += 1;
1067                    break;
1068                }
1069                Some(_) => values.push(self.parse_value()?),
1070                None => return Err(PdfError::Parse("unterminated array".to_string())),
1071            }
1072        }
1073        Ok(PdfValue::Array(values))
1074    }
1075
1076    fn parse_dictionary(&mut self) -> PdfResult<PdfValue> {
1077        self.position += 2;
1078        let mut dictionary = PdfDictionary::new();
1079        loop {
1080            self.skip_ws_and_comments();
1081            if self.current() == Some(b'>') && self.bytes.get(self.position + 1) == Some(&b'>') {
1082                self.position += 2;
1083                break;
1084            }
1085            let key = match self.parse_name()? {
1086                PdfValue::Name(name) => name,
1087                _ => unreachable!(),
1088            };
1089            let value = self.parse_value()?;
1090            dictionary.insert(key, value);
1091        }
1092        Ok(PdfValue::Dictionary(dictionary))
1093    }
1094
1095    fn parse_number_or_reference(&mut self) -> PdfResult<PdfValue> {
1096        let first_token = self.parse_token()?;
1097        if first_token.contains('.') || first_token.contains(['e', 'E']) {
1098            return first_token
1099                .parse::<f64>()
1100                .map(PdfValue::Number)
1101                .map_err(|_| PdfError::Parse(format!("invalid number token: {first_token}")));
1102        }
1103
1104        let checkpoint = self.position;
1105        self.skip_ws_and_comments();
1106        if let Ok(second_token) = self.parse_token() {
1107            self.skip_ws_and_comments();
1108            if self.current() == Some(b'R')
1109                && second_token
1110                    .chars()
1111                    .all(|character| character.is_ascii_digit())
1112            {
1113                self.position += 1;
1114                return Ok(PdfValue::Reference(ObjectRef::new(
1115                    first_token
1116                        .parse::<u32>()
1117                        .map_err(|_| PdfError::Parse("invalid reference object".to_string()))?,
1118                    second_token
1119                        .parse::<u16>()
1120                        .map_err(|_| PdfError::Parse("invalid reference generation".to_string()))?,
1121                )));
1122            }
1123        }
1124        self.position = checkpoint;
1125        first_token
1126            .parse::<i64>()
1127            .map(PdfValue::Integer)
1128            .or_else(|_| first_token.parse::<f64>().map(PdfValue::Number))
1129            .map_err(|_| PdfError::Parse(format!("invalid number token: {first_token}")))
1130    }
1131}
1132
1133fn is_whitespace(byte: u8) -> bool {
1134    matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | 0x0C | 0x00)
1135}
1136
1137fn is_delimiter(byte: u8) -> bool {
1138    matches!(
1139        byte,
1140        b'(' | b')' | b'<' | b'>' | b'[' | b']' | b'{' | b'}' | b'/' | b'%'
1141    )
1142}
1143
1144#[cfg(test)]
1145mod tests {
1146    use super::{parse_pdf, parse_pdf_with_certificate, parse_pdf_with_password};
1147    use crate::error::PdfError;
1148    use crate::types::{PdfObject, PdfValue};
1149
1150    #[test]
1151    fn parses_simple_pdf_fixture() {
1152        let bytes = include_bytes!("../../../tests/fixtures/simple-text.pdf");
1153        let document = parse_pdf(bytes).expect("fixture should parse");
1154        assert_eq!(document.pages.len(), 1);
1155    }
1156
1157    #[test]
1158    fn parses_incremental_update_fixture() {
1159        let bytes = include_bytes!("../../../tests/fixtures/incremental-update.pdf");
1160        let document = parse_pdf(bytes).expect("incremental fixture should parse");
1161        assert_eq!(document.pages.len(), 1);
1162
1163        // The updated content stream (object 4) should contain "Updated Secret",
1164        // not "Original Secret"
1165        let content_refs = &document.pages[0].content_refs;
1166        assert!(!content_refs.is_empty());
1167        let content_obj = document.file.objects.get(&content_refs[0]).unwrap();
1168        let stream_data = match content_obj {
1169            PdfObject::Stream(stream) => String::from_utf8_lossy(&stream.data),
1170            _ => panic!("expected stream object for page content"),
1171        };
1172        assert!(
1173            stream_data.contains("Updated Secret"),
1174            "content stream should contain updated text"
1175        );
1176        assert!(
1177            !stream_data.contains("Original Secret"),
1178            "content stream should not contain original text"
1179        );
1180    }
1181
1182    #[test]
1183    fn circular_prev_chain_does_not_loop() {
1184        // Build a minimal PDF where Prev points back to the same xref offset.
1185        // The parser should de-duplicate the offset via its visited-set and
1186        // parse the tree successfully instead of returning an error.
1187        let mut pdf = Vec::new();
1188        pdf.extend_from_slice(b"%PDF-1.4\n");
1189
1190        // Object 1: catalog
1191        let obj1_offset = pdf.len();
1192        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1193
1194        // Object 2: pages
1195        let obj2_offset = pdf.len();
1196        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n");
1197
1198        let xref_offset = pdf.len();
1199        pdf.extend_from_slice(b"xref\n0 3\n");
1200        pdf.extend_from_slice(b"0000000000 65535 f \n");
1201        pdf.extend_from_slice(format!("{:010} 00000 n \n", obj1_offset).as_bytes());
1202        pdf.extend_from_slice(format!("{:010} 00000 n \n", obj2_offset).as_bytes());
1203        pdf.extend_from_slice(b"trailer\n");
1204        // Prev points back to this same xref offset — circular
1205        pdf.extend_from_slice(
1206            format!("<< /Size 3 /Root 1 0 R /Prev {} >>\n", xref_offset).as_bytes(),
1207        );
1208        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", xref_offset).as_bytes());
1209
1210        let document = parse_pdf(&pdf).expect("circular Prev should be tolerated");
1211        assert_eq!(document.pages.len(), 0);
1212    }
1213
1214    #[test]
1215    fn stream_length_indirect_reference_is_resolved() {
1216        // Minimal PDF whose page content stream has `/Length 5 0 R`, where
1217        // object 5 is a plain integer. The stream's payload includes the
1218        // literal bytes "endstream" so the fallback endstream scan would
1219        // underflow; resolving the indirect /Length reads the exact bytes.
1220        let payload = b"--endstream--HIDDEN";
1221        let mut pdf = Vec::new();
1222        pdf.extend_from_slice(b"%PDF-1.4\n");
1223
1224        let obj1_offset = pdf.len();
1225        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1226
1227        let obj2_offset = pdf.len();
1228        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n");
1229
1230        let obj3_offset = pdf.len();
1231        pdf.extend_from_slice(
1232            b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << >> /Contents 4 0 R >>\nendobj\n",
1233        );
1234
1235        let obj4_offset = pdf.len();
1236        pdf.extend_from_slice(b"4 0 obj\n<< /Length 5 0 R >>\nstream\n");
1237        pdf.extend_from_slice(payload);
1238        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1239
1240        let obj5_offset = pdf.len();
1241        pdf.extend_from_slice(format!("5 0 obj\n{}\nendobj\n", payload.len()).as_bytes());
1242
1243        let xref_offset = pdf.len();
1244        pdf.extend_from_slice(b"xref\n0 6\n");
1245        pdf.extend_from_slice(b"0000000000 65535 f \n");
1246        pdf.extend_from_slice(format!("{:010} 00000 n \n", obj1_offset).as_bytes());
1247        pdf.extend_from_slice(format!("{:010} 00000 n \n", obj2_offset).as_bytes());
1248        pdf.extend_from_slice(format!("{:010} 00000 n \n", obj3_offset).as_bytes());
1249        pdf.extend_from_slice(format!("{:010} 00000 n \n", obj4_offset).as_bytes());
1250        pdf.extend_from_slice(format!("{:010} 00000 n \n", obj5_offset).as_bytes());
1251        pdf.extend_from_slice(b"trailer\n<< /Size 6 /Root 1 0 R >>\n");
1252        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", xref_offset).as_bytes());
1253
1254        let document = parse_pdf(&pdf).expect("indirect-length fixture should parse");
1255        let content_refs = &document.pages[0].content_refs;
1256        let content_obj = document.file.objects.get(&content_refs[0]).unwrap();
1257        let data = match content_obj {
1258            PdfObject::Stream(stream) => &stream.data,
1259            _ => panic!("expected stream object for page content"),
1260        };
1261        assert_eq!(
1262            data.as_slice(),
1263            payload,
1264            "resolved indirect /Length should yield the exact original payload bytes"
1265        );
1266    }
1267
1268    #[test]
1269    fn parses_uncompressed_xref_stream() {
1270        // Minimal PDF using an xref stream with no filters and no predictor.
1271        // W = [1 2 1] means type(1) + offset(2) + generation(1).
1272        let mut pdf: Vec<u8> = Vec::new();
1273        pdf.extend_from_slice(b"%PDF-1.5\n");
1274
1275        let obj1_offset = pdf.len();
1276        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1277        let obj2_offset = pdf.len();
1278        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n");
1279
1280        // Build the xref stream body: four 4-byte rows for objects 0..3.
1281        // Row layout: type(1) | offset(2) | generation(1).
1282        let row_for = |t: u8, off: u16, generation: u8| {
1283            let mut row = [0u8; 4];
1284            row[0] = t;
1285            row[1] = (off >> 8) as u8;
1286            row[2] = off as u8;
1287            row[3] = generation;
1288            row
1289        };
1290        let mut body = Vec::new();
1291        body.extend_from_slice(&row_for(0, 0, 0xFF)); // object 0 free
1292        body.extend_from_slice(&row_for(1, obj1_offset as u16, 0));
1293        body.extend_from_slice(&row_for(1, obj2_offset as u16, 0));
1294        body.extend_from_slice(&row_for(1, 0, 0)); // self (object 3), placeholder; we will overwrite after knowing offset
1295
1296        let xref_obj_offset = pdf.len();
1297        // Overwrite object 3 self-offset in body now that we know it.
1298        let self_offset = xref_obj_offset as u16;
1299        body[12] = 1;
1300        body[13] = (self_offset >> 8) as u8;
1301        body[14] = self_offset as u8;
1302        body[15] = 0;
1303
1304        let stream_dict = format!(
1305            "<< /Type /XRef /Size 4 /W [1 2 1] /Root 1 0 R /Length {} >>",
1306            body.len()
1307        );
1308        pdf.extend_from_slice(format!("3 0 obj\n{stream_dict}\nstream\n").as_bytes());
1309        pdf.extend_from_slice(&body);
1310        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1311        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", xref_obj_offset).as_bytes());
1312
1313        let document = parse_pdf(&pdf).expect("xref stream fixture should parse");
1314        assert_eq!(document.pages.len(), 0);
1315        // Object 1 and 2 must be materialized.
1316        assert!(document.file.objects.len() >= 2);
1317    }
1318
1319    #[test]
1320    fn parses_object_stream_via_xref_stream() {
1321        use flate2::{Compression, write::ZlibEncoder};
1322        use std::io::Write;
1323
1324        // Pages tree is compressed inside an ObjStm.
1325        // Layout:
1326        //   1: Catalog (uncompressed)
1327        //   2: Pages (compressed in ObjStm 3, index 0)
1328        //   3: ObjStm (uncompressed, flate-compressed body)
1329        //   4: xref stream (uncompressed)
1330        let mut pdf: Vec<u8> = Vec::new();
1331        pdf.extend_from_slice(b"%PDF-1.5\n");
1332
1333        let obj1_offset = pdf.len();
1334        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1335
1336        // Object 3 is an ObjStm holding object 2.
1337        let member_payload = b"<< /Type /Pages /Count 0 /Kids [] >>";
1338        let header = b"2 0 ";
1339        let first = header.len();
1340        let mut decompressed = Vec::new();
1341        decompressed.extend_from_slice(header);
1342        decompressed.extend_from_slice(member_payload);
1343
1344        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1345        encoder.write_all(&decompressed).unwrap();
1346        let compressed = encoder.finish().unwrap();
1347
1348        let obj3_offset = pdf.len();
1349        let objstm_dict = format!(
1350            "<< /Type /ObjStm /N 1 /First {} /Filter /FlateDecode /Length {} >>",
1351            first,
1352            compressed.len()
1353        );
1354        pdf.extend_from_slice(format!("3 0 obj\n{objstm_dict}\nstream\n").as_bytes());
1355        pdf.extend_from_slice(&compressed);
1356        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1357
1358        // Build xref stream entries for objects 0..5:
1359        // 0 free, 1 uncompressed, 2 compressed (stream=3, index=0),
1360        // 3 uncompressed (ObjStm), 4 uncompressed (xref stream itself).
1361        let row_for = |t: u8, a: u32, b: u16| {
1362            let mut row = [0u8; 5];
1363            row[0] = t;
1364            row[1] = (a >> 16) as u8;
1365            row[2] = (a >> 8) as u8;
1366            row[3] = a as u8;
1367            row[4] = b as u8;
1368            row
1369        };
1370
1371        let obj4_offset = pdf.len();
1372        let mut body = Vec::new();
1373        body.extend_from_slice(&row_for(0, 0, 0xFF));
1374        body.extend_from_slice(&row_for(1, obj1_offset as u32, 0));
1375        body.extend_from_slice(&row_for(2, 3, 0));
1376        body.extend_from_slice(&row_for(1, obj3_offset as u32, 0));
1377        body.extend_from_slice(&row_for(1, obj4_offset as u32, 0));
1378
1379        let stream_dict = format!(
1380            "<< /Type /XRef /Size 5 /W [1 3 1] /Root 1 0 R /Length {} >>",
1381            body.len()
1382        );
1383        pdf.extend_from_slice(format!("4 0 obj\n{stream_dict}\nstream\n").as_bytes());
1384        pdf.extend_from_slice(&body);
1385        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1386        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", obj4_offset).as_bytes());
1387
1388        let document = parse_pdf(&pdf).expect("ObjStm fixture should parse");
1389        assert_eq!(document.pages.len(), 0);
1390        // Pages dictionary should be materialized.
1391        let pages_ref = document.catalog.pages_ref;
1392        let pages_dict = document.file.get_dictionary(pages_ref).unwrap();
1393        assert_eq!(
1394            pages_dict.get("Type").and_then(|v| v.as_name()),
1395            Some("Pages")
1396        );
1397    }
1398
1399    #[test]
1400    fn rejects_nested_object_stream() {
1401        use flate2::{Compression, write::ZlibEncoder};
1402        use std::io::Write;
1403
1404        // A compressed member is itself an ObjStm dictionary → must fail.
1405        let mut pdf: Vec<u8> = Vec::new();
1406        pdf.extend_from_slice(b"%PDF-1.5\n");
1407
1408        let obj1_offset = pdf.len();
1409        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1410
1411        let member_payload = b"<< /Type /ObjStm /N 0 /First 0 /Length 0 >>";
1412        let header = b"2 0 ";
1413        let first = header.len();
1414        let mut decompressed = Vec::new();
1415        decompressed.extend_from_slice(header);
1416        decompressed.extend_from_slice(member_payload);
1417
1418        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1419        encoder.write_all(&decompressed).unwrap();
1420        let compressed = encoder.finish().unwrap();
1421
1422        let obj3_offset = pdf.len();
1423        let objstm_dict = format!(
1424            "<< /Type /ObjStm /N 1 /First {} /Filter /FlateDecode /Length {} >>",
1425            first,
1426            compressed.len()
1427        );
1428        pdf.extend_from_slice(format!("3 0 obj\n{objstm_dict}\nstream\n").as_bytes());
1429        pdf.extend_from_slice(&compressed);
1430        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1431
1432        let row_for = |t: u8, a: u32, b: u16| {
1433            let mut row = [0u8; 5];
1434            row[0] = t;
1435            row[1] = (a >> 16) as u8;
1436            row[2] = (a >> 8) as u8;
1437            row[3] = a as u8;
1438            row[4] = b as u8;
1439            row
1440        };
1441
1442        let obj4_offset = pdf.len();
1443        let mut body = Vec::new();
1444        body.extend_from_slice(&row_for(0, 0, 0xFF));
1445        body.extend_from_slice(&row_for(1, obj1_offset as u32, 0));
1446        body.extend_from_slice(&row_for(2, 3, 0));
1447        body.extend_from_slice(&row_for(1, obj3_offset as u32, 0));
1448        body.extend_from_slice(&row_for(1, obj4_offset as u32, 0));
1449
1450        let stream_dict = format!(
1451            "<< /Type /XRef /Size 5 /W [1 3 1] /Root 1 0 R /Length {} >>",
1452            body.len()
1453        );
1454        pdf.extend_from_slice(format!("4 0 obj\n{stream_dict}\nstream\n").as_bytes());
1455        pdf.extend_from_slice(&body);
1456        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1457        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF\n", obj4_offset).as_bytes());
1458
1459        match parse_pdf(&pdf) {
1460            Err(PdfError::Unsupported(message)) => {
1461                assert!(message.contains("nested object streams"), "got: {message}")
1462            }
1463            other => panic!("expected Unsupported, got: {other:?}"),
1464        }
1465    }
1466
1467    /// Build a minimal V=2/R=3 RC4-encrypted PDF with the supplied user /
1468    /// owner passwords; encrypt a single content stream whose plaintext is
1469    /// returned alongside the bytes. Reused by all the RC4-encryption
1470    /// regression tests so the only per-test variable is which password
1471    /// the caller supplies to `parse_pdf_with_password`.
1472    fn build_rc4_encrypted_pdf(
1473        user_password: &[u8],
1474        owner_password: &[u8],
1475    ) -> (Vec<u8>, &'static [u8]) {
1476        use crate::crypto::SecurityRevision;
1477        use crate::crypto::test_helpers::{
1478            compute_file_key, compute_o, compute_u_r3, object_key, rc4,
1479        };
1480
1481        let id_first: [u8; 16] = [
1482            0x6e, 0x05, 0xb1, 0x20, 0x63, 0x94, 0x69, 0x1f, 0x22, 0x2c, 0x32, 0xac, 0x61, 0x8b,
1483            0xe6, 0x8d,
1484        ];
1485        let permissions: i32 = -4;
1486        let key_length_bytes = 16;
1487
1488        let owner_entry = compute_o(
1489            owner_password,
1490            user_password,
1491            SecurityRevision::R3,
1492            key_length_bytes,
1493        );
1494        let file_key = compute_file_key(
1495            user_password,
1496            &owner_entry,
1497            permissions,
1498            &id_first,
1499            key_length_bytes,
1500        );
1501        let u_entry = compute_u_r3(&file_key, &id_first);
1502
1503        let escape_literal = |bytes: &[u8]| -> Vec<u8> {
1504            let mut out = Vec::with_capacity(bytes.len() + 2);
1505            out.push(b'(');
1506            for &byte in bytes {
1507                match byte {
1508                    b'(' | b')' | b'\\' => {
1509                        out.push(b'\\');
1510                        out.push(byte);
1511                    }
1512                    _ => out.push(byte),
1513                }
1514            }
1515            out.push(b')');
1516            out
1517        };
1518
1519        let content_plain: &'static [u8] = b"BT\n/F1 24 Tf\n72 700 Td\n(CIPHERED SECRET) Tj\nET\n";
1520        let content_cipher = rc4(&object_key(&file_key, 4, 0), content_plain);
1521
1522        let mut pdf: Vec<u8> = Vec::new();
1523        pdf.extend_from_slice(b"%PDF-1.4\n");
1524
1525        let catalog_offset = pdf.len();
1526        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1527
1528        let pages_offset = pdf.len();
1529        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n");
1530
1531        let page_offset = pdf.len();
1532        pdf.extend_from_slice(
1533            b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] \
1534              /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >>\nendobj\n",
1535        );
1536
1537        let content_offset = pdf.len();
1538        pdf.extend_from_slice(
1539            format!("4 0 obj\n<< /Length {} >>\nstream\n", content_cipher.len()).as_bytes(),
1540        );
1541        pdf.extend_from_slice(&content_cipher);
1542        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1543
1544        let font_offset = pdf.len();
1545        pdf.extend_from_slice(
1546            b"5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica \
1547              /Encoding /WinAnsiEncoding >>\nendobj\n",
1548        );
1549
1550        let encrypt_offset = pdf.len();
1551        pdf.extend_from_slice(b"6 0 obj\n<< /Filter /Standard /V 2 /R 3 /Length 128 ");
1552        pdf.extend_from_slice(format!("/P {permissions} ").as_bytes());
1553        pdf.extend_from_slice(b"/O ");
1554        pdf.extend_from_slice(&escape_literal(&owner_entry));
1555        pdf.extend_from_slice(b" /U ");
1556        pdf.extend_from_slice(&escape_literal(&u_entry));
1557        pdf.extend_from_slice(b" >>\nendobj\n");
1558
1559        let xref_offset = pdf.len();
1560        pdf.extend_from_slice(b"xref\n0 7\n");
1561        pdf.extend_from_slice(b"0000000000 65535 f \n");
1562        for offset in [
1563            catalog_offset,
1564            pages_offset,
1565            page_offset,
1566            content_offset,
1567            font_offset,
1568            encrypt_offset,
1569        ] {
1570            pdf.extend_from_slice(format!("{offset:010} 00000 n \n").as_bytes());
1571        }
1572        pdf.extend_from_slice(b"trailer\n<< /Size 7 /Root 1 0 R /Encrypt 6 0 R /ID [");
1573        pdf.extend_from_slice(&escape_literal(&id_first));
1574        pdf.extend_from_slice(&escape_literal(&id_first));
1575        pdf.extend_from_slice(b"] >>\n");
1576        pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF\n").as_bytes());
1577
1578        (pdf, content_plain)
1579    }
1580
1581    fn assert_decrypts_content_stream(document: &crate::document::ParsedDocument, expected: &[u8]) {
1582        assert_eq!(document.pages.len(), 1);
1583        assert!(
1584            !document.file.trailer.contains_key("Encrypt"),
1585            "trailer /Encrypt must be stripped once the document is decrypted in place"
1586        );
1587        let content_ref = document.pages[0].content_refs[0];
1588        let stream = match document.file.get_object(content_ref).unwrap() {
1589            PdfObject::Stream(stream) => stream,
1590            _ => panic!("page content must be a stream"),
1591        };
1592        assert_eq!(stream.data, expected);
1593    }
1594
1595    #[test]
1596    fn parses_rc4_encrypted_pdf_with_empty_password() {
1597        // Real-world "encrypted to prevent editing but openable by anyone"
1598        // PDFs ship with an empty user password. The regression target
1599        // here is that parse_pdf (the no-argument entry point) still opens
1600        // them without a caller-supplied password.
1601        let (pdf, plain) = build_rc4_encrypted_pdf(b"", b"arbitrary-owner-password");
1602        let document = parse_pdf(&pdf).expect("empty-password PDF should decrypt");
1603        assert_decrypts_content_stream(&document, plain);
1604    }
1605
1606    #[test]
1607    fn parses_rc4_encrypted_pdf_with_user_password() {
1608        let (pdf, plain) = build_rc4_encrypted_pdf(b"userpw", b"ownerpw");
1609        let document =
1610            parse_pdf_with_password(&pdf, b"userpw").expect("correct user password should decrypt");
1611        assert_decrypts_content_stream(&document, plain);
1612    }
1613
1614    #[test]
1615    fn parses_rc4_encrypted_pdf_with_owner_password() {
1616        let (pdf, plain) = build_rc4_encrypted_pdf(b"userpw", b"ownerpw");
1617        let document = parse_pdf_with_password(&pdf, b"ownerpw")
1618            .expect("correct owner password should decrypt");
1619        assert_decrypts_content_stream(&document, plain);
1620    }
1621
1622    #[test]
1623    fn rejects_wrong_password_with_invalid_password_error() {
1624        let (pdf, _) = build_rc4_encrypted_pdf(b"userpw", b"ownerpw");
1625        let err =
1626            parse_pdf_with_password(&pdf, b"wrongpw").expect_err("wrong password must not decrypt");
1627        assert_eq!(err, PdfError::InvalidPassword);
1628    }
1629
1630    #[test]
1631    fn parses_rc4_encrypted_pdf_with_utf8_password() {
1632        let password = "pässwörd".as_bytes();
1633        let (pdf, plain) = build_rc4_encrypted_pdf(password, b"ownerpw");
1634        let document =
1635            parse_pdf_with_password(&pdf, password).expect("UTF-8 user password should decrypt");
1636        assert_decrypts_content_stream(&document, plain);
1637    }
1638
1639    /// Build a minimal V=4/R=4 AES-128 encrypted PDF with the supplied
1640    /// user / owner passwords and `/EncryptMetadata` flag. Reused by all
1641    /// the AES encryption regression tests so the only per-test variable
1642    /// is which password the caller supplies.
1643    fn build_aes_128_encrypted_pdf(
1644        user_password: &[u8],
1645        owner_password: &[u8],
1646        encrypt_metadata: bool,
1647    ) -> (Vec<u8>, &'static [u8]) {
1648        use crate::crypto::SecurityRevision;
1649        use crate::crypto::test_helpers::{
1650            aes_128_cbc_encrypt, compute_file_key_r4, compute_o, compute_u_r3, object_key_aes,
1651        };
1652
1653        let id_first: [u8; 16] = [
1654            0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
1655            0x99, 0x00,
1656        ];
1657        let permissions: i32 = -4;
1658
1659        let owner_entry = compute_o(owner_password, user_password, SecurityRevision::R4, 16);
1660        let file_key = compute_file_key_r4(
1661            user_password,
1662            &owner_entry,
1663            permissions,
1664            &id_first,
1665            encrypt_metadata,
1666        );
1667        let u_entry = compute_u_r3(&file_key, &id_first);
1668
1669        // The IV for each encrypted string / stream is arbitrary. Use
1670        // object-number-derived patterns so the two fixtures we produce
1671        // here do not collide on a block.
1672        let content_iv = [0x42u8; 16];
1673        let content_plain: &'static [u8] =
1674            b"BT\n/F1 24 Tf\n72 700 Td\n(AES SECRET REMOVED) Tj\nET\n";
1675        let content_key = object_key_aes(&file_key, 4, 0);
1676        let content_cipher = aes_128_cbc_encrypt(&content_key, &content_iv, content_plain);
1677
1678        let escape_literal = |bytes: &[u8]| -> Vec<u8> {
1679            let mut out = Vec::with_capacity(bytes.len() + 2);
1680            out.push(b'(');
1681            for &byte in bytes {
1682                match byte {
1683                    b'(' | b')' | b'\\' => {
1684                        out.push(b'\\');
1685                        out.push(byte);
1686                    }
1687                    _ => out.push(byte),
1688                }
1689            }
1690            out.push(b')');
1691            out
1692        };
1693
1694        let mut pdf: Vec<u8> = Vec::new();
1695        pdf.extend_from_slice(b"%PDF-1.5\n");
1696
1697        let catalog_offset = pdf.len();
1698        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1699
1700        let pages_offset = pdf.len();
1701        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n");
1702
1703        let page_offset = pdf.len();
1704        pdf.extend_from_slice(
1705            b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] \
1706              /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >>\nendobj\n",
1707        );
1708
1709        let content_offset = pdf.len();
1710        pdf.extend_from_slice(
1711            format!("4 0 obj\n<< /Length {} >>\nstream\n", content_cipher.len()).as_bytes(),
1712        );
1713        pdf.extend_from_slice(&content_cipher);
1714        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1715
1716        let font_offset = pdf.len();
1717        pdf.extend_from_slice(
1718            b"5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica \
1719              /Encoding /WinAnsiEncoding >>\nendobj\n",
1720        );
1721
1722        let encrypt_offset = pdf.len();
1723        pdf.extend_from_slice(
1724            b"6 0 obj\n<< /Filter /Standard /V 4 /R 4 /Length 128 \
1725              /CF << /StdCF << /CFM /AESV2 /Length 16 /AuthEvent /DocOpen >> >> \
1726              /StmF /StdCF /StrF /StdCF ",
1727        );
1728        pdf.extend_from_slice(format!("/P {permissions} ").as_bytes());
1729        if !encrypt_metadata {
1730            pdf.extend_from_slice(b"/EncryptMetadata false ");
1731        }
1732        pdf.extend_from_slice(b"/O ");
1733        pdf.extend_from_slice(&escape_literal(&owner_entry));
1734        pdf.extend_from_slice(b" /U ");
1735        pdf.extend_from_slice(&escape_literal(&u_entry));
1736        pdf.extend_from_slice(b" >>\nendobj\n");
1737
1738        let xref_offset = pdf.len();
1739        pdf.extend_from_slice(b"xref\n0 7\n");
1740        pdf.extend_from_slice(b"0000000000 65535 f \n");
1741        for offset in [
1742            catalog_offset,
1743            pages_offset,
1744            page_offset,
1745            content_offset,
1746            font_offset,
1747            encrypt_offset,
1748        ] {
1749            pdf.extend_from_slice(format!("{offset:010} 00000 n \n").as_bytes());
1750        }
1751        pdf.extend_from_slice(b"trailer\n<< /Size 7 /Root 1 0 R /Encrypt 6 0 R /ID [");
1752        pdf.extend_from_slice(&escape_literal(&id_first));
1753        pdf.extend_from_slice(&escape_literal(&id_first));
1754        pdf.extend_from_slice(b"] >>\n");
1755        pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF\n").as_bytes());
1756
1757        (pdf, content_plain)
1758    }
1759
1760    #[test]
1761    fn parses_aes_128_encrypted_pdf_with_empty_password() {
1762        let (pdf, plain) = build_aes_128_encrypted_pdf(b"", b"arbitrary-owner-password", true);
1763        let document = parse_pdf(&pdf).expect("empty-password AES-128 PDF should decrypt");
1764        assert_decrypts_content_stream(&document, plain);
1765    }
1766
1767    #[test]
1768    fn parses_aes_128_encrypted_pdf_with_user_password() {
1769        let (pdf, plain) = build_aes_128_encrypted_pdf(b"userpw", b"ownerpw", true);
1770        let document = parse_pdf_with_password(&pdf, b"userpw")
1771            .expect("correct user password should decrypt AES-128 PDF");
1772        assert_decrypts_content_stream(&document, plain);
1773    }
1774
1775    #[test]
1776    fn parses_aes_128_encrypted_pdf_with_owner_password() {
1777        let (pdf, plain) = build_aes_128_encrypted_pdf(b"userpw", b"ownerpw", true);
1778        let document = parse_pdf_with_password(&pdf, b"ownerpw")
1779            .expect("correct owner password should decrypt AES-128 PDF");
1780        assert_decrypts_content_stream(&document, plain);
1781    }
1782
1783    #[test]
1784    fn aes_128_rejects_wrong_password() {
1785        let (pdf, _) = build_aes_128_encrypted_pdf(b"userpw", b"ownerpw", true);
1786        let err = parse_pdf_with_password(&pdf, b"wrongpw")
1787            .expect_err("wrong password must not decrypt AES-128 PDF");
1788        assert_eq!(err, PdfError::InvalidPassword);
1789    }
1790
1791    /// Build a minimal V=5/R=6 AES-256 encrypted PDF. Reused by all the
1792    /// AES-256 regression tests so the only per-test variable is which
1793    /// password the caller supplies.
1794    fn build_aes_256_encrypted_pdf(
1795        user_password: &[u8],
1796        owner_password: &[u8],
1797        revision: crate::crypto::SecurityRevision,
1798    ) -> (Vec<u8>, &'static [u8]) {
1799        use crate::crypto::test_helpers::{
1800            aes_256_cbc_encrypt, compute_v5_o_and_oe, compute_v5_u_and_ue,
1801        };
1802
1803        let permissions: i32 = -4;
1804        let file_key = [0x13u8; 32];
1805        let u_validation_salt = [0xAAu8; 8];
1806        let u_key_salt = [0xBBu8; 8];
1807        let o_validation_salt = [0xCCu8; 8];
1808        let o_key_salt = [0xDDu8; 8];
1809
1810        let (u_entry, ue_entry) = compute_v5_u_and_ue(
1811            user_password,
1812            &u_validation_salt,
1813            &u_key_salt,
1814            &file_key,
1815            revision,
1816        );
1817        let u_vector: [u8; 48] = u_entry.as_slice().try_into().expect("U is 48 bytes");
1818        let (o_entry, oe_entry) = compute_v5_o_and_oe(
1819            owner_password,
1820            &o_validation_salt,
1821            &o_key_salt,
1822            &u_vector,
1823            &file_key,
1824            revision,
1825        );
1826
1827        let content_iv = [0x42u8; 16];
1828        let content_plain: &'static [u8] = b"BT\n/F1 24 Tf\n72 700 Td\n(AES-256 SECRET) Tj\nET\n";
1829        let content_cipher = aes_256_cbc_encrypt(&file_key, &content_iv, content_plain);
1830
1831        let escape_literal = |bytes: &[u8]| -> Vec<u8> {
1832            let mut out = Vec::with_capacity(bytes.len() + 2);
1833            out.push(b'(');
1834            for &byte in bytes {
1835                match byte {
1836                    b'(' | b')' | b'\\' => {
1837                        out.push(b'\\');
1838                        out.push(byte);
1839                    }
1840                    _ => out.push(byte),
1841                }
1842            }
1843            out.push(b')');
1844            out
1845        };
1846
1847        let mut pdf: Vec<u8> = Vec::new();
1848        pdf.extend_from_slice(b"%PDF-2.0\n");
1849
1850        let catalog_offset = pdf.len();
1851        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
1852
1853        let pages_offset = pdf.len();
1854        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n");
1855
1856        let page_offset = pdf.len();
1857        pdf.extend_from_slice(
1858            b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] \
1859              /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >>\nendobj\n",
1860        );
1861
1862        let content_offset = pdf.len();
1863        pdf.extend_from_slice(
1864            format!("4 0 obj\n<< /Length {} >>\nstream\n", content_cipher.len()).as_bytes(),
1865        );
1866        pdf.extend_from_slice(&content_cipher);
1867        pdf.extend_from_slice(b"\nendstream\nendobj\n");
1868
1869        let font_offset = pdf.len();
1870        pdf.extend_from_slice(
1871            b"5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica \
1872              /Encoding /WinAnsiEncoding >>\nendobj\n",
1873        );
1874
1875        let r_value = match revision {
1876            crate::crypto::SecurityRevision::R5 => 5,
1877            crate::crypto::SecurityRevision::R6 => 6,
1878            _ => panic!("V=5 fixture requires R=5 or R=6"),
1879        };
1880
1881        let encrypt_offset = pdf.len();
1882        pdf.extend_from_slice(
1883            format!(
1884                "6 0 obj\n<< /Filter /Standard /V 5 /R {r_value} /Length 256 \
1885                  /CF << /StdCF << /CFM /AESV3 /Length 32 /AuthEvent /DocOpen >> >> \
1886                  /StmF /StdCF /StrF /StdCF /P {permissions} "
1887            )
1888            .as_bytes(),
1889        );
1890        pdf.extend_from_slice(b"/O ");
1891        pdf.extend_from_slice(&escape_literal(&o_entry));
1892        pdf.extend_from_slice(b" /U ");
1893        pdf.extend_from_slice(&escape_literal(&u_entry));
1894        pdf.extend_from_slice(b" /OE ");
1895        pdf.extend_from_slice(&escape_literal(&oe_entry));
1896        pdf.extend_from_slice(b" /UE ");
1897        pdf.extend_from_slice(&escape_literal(&ue_entry));
1898        pdf.extend_from_slice(b" >>\nendobj\n");
1899
1900        let xref_offset = pdf.len();
1901        pdf.extend_from_slice(b"xref\n0 7\n");
1902        pdf.extend_from_slice(b"0000000000 65535 f \n");
1903        for offset in [
1904            catalog_offset,
1905            pages_offset,
1906            page_offset,
1907            content_offset,
1908            font_offset,
1909            encrypt_offset,
1910        ] {
1911            pdf.extend_from_slice(format!("{offset:010} 00000 n \n").as_bytes());
1912        }
1913        // V=5 still requires /ID in the trailer even though it is not
1914        // consumed by the key-derivation algorithm.
1915        let id_literal: [u8; 16] = [
1916            0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE,
1917            0xFF, 0x00,
1918        ];
1919        pdf.extend_from_slice(b"trailer\n<< /Size 7 /Root 1 0 R /Encrypt 6 0 R /ID [");
1920        pdf.extend_from_slice(&escape_literal(&id_literal));
1921        pdf.extend_from_slice(&escape_literal(&id_literal));
1922        pdf.extend_from_slice(b"] >>\n");
1923        pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF\n").as_bytes());
1924
1925        (pdf, content_plain)
1926    }
1927
1928    #[test]
1929    fn parses_aes_256_r6_encrypted_pdf_with_user_password() {
1930        let (pdf, plain) =
1931            build_aes_256_encrypted_pdf(b"userpw", b"ownerpw", crate::crypto::SecurityRevision::R6);
1932        let document = parse_pdf_with_password(&pdf, b"userpw")
1933            .expect("correct user password should decrypt AES-256 R=6 PDF");
1934        assert_decrypts_content_stream(&document, plain);
1935    }
1936
1937    #[test]
1938    fn parses_aes_256_r6_encrypted_pdf_with_owner_password() {
1939        let (pdf, plain) =
1940            build_aes_256_encrypted_pdf(b"userpw", b"ownerpw", crate::crypto::SecurityRevision::R6);
1941        let document = parse_pdf_with_password(&pdf, b"ownerpw")
1942            .expect("correct owner password should decrypt AES-256 R=6 PDF");
1943        assert_decrypts_content_stream(&document, plain);
1944    }
1945
1946    #[test]
1947    fn parses_aes_256_r5_encrypted_pdf_with_empty_password() {
1948        let (pdf, plain) =
1949            build_aes_256_encrypted_pdf(b"", b"ownerpw", crate::crypto::SecurityRevision::R5);
1950        let document = parse_pdf(&pdf).expect("empty-password AES-256 R=5 PDF should decrypt");
1951        assert_decrypts_content_stream(&document, plain);
1952    }
1953
1954    #[test]
1955    fn aes_256_rejects_wrong_password() {
1956        let (pdf, _) =
1957            build_aes_256_encrypted_pdf(b"userpw", b"ownerpw", crate::crypto::SecurityRevision::R6);
1958        let err = parse_pdf_with_password(&pdf, b"wrongpw")
1959            .expect_err("wrong password must not decrypt AES-256 PDF");
1960        assert_eq!(err, PdfError::InvalidPassword);
1961    }
1962
1963    #[test]
1964    fn parses_aes_128_with_encrypt_metadata_false() {
1965        // EncryptMetadata=false changes the file-key derivation (Algorithm 2
1966        // step 5 appends 0xFFFFFFFF), so the whole decryption path fails if
1967        // we do not honour the flag.
1968        let (pdf, plain) = build_aes_128_encrypted_pdf(b"", b"ownerpw", false);
1969        let document =
1970            parse_pdf(&pdf).expect("empty-password AES-128 PDF should decrypt with metadata off");
1971        assert_decrypts_content_stream(&document, plain);
1972    }
1973
1974    #[test]
1975    fn decryption_drops_original_encrypt_dictionary_object() {
1976        // After successful decryption the parser strips the trailer's
1977        // /Encrypt reference. The Encrypt dictionary object itself must
1978        // also be removed from `objects` so the writer never re-emits its
1979        // /O, /U, /OE, /UE, /Perms fields as dangling unreferenced bytes.
1980        let (pdf, _) = build_aes_128_encrypted_pdf(b"", b"ownerpw", true);
1981        let document = parse_pdf(&pdf).expect("encrypted PDF should decrypt");
1982        for (object_ref, object) in &document.file.objects {
1983            if let PdfObject::Value(PdfValue::Dictionary(dict)) = object {
1984                let has_o = dict.contains_key("O");
1985                let has_u = dict.contains_key("U");
1986                let has_filter_standard =
1987                    dict.get("Filter").and_then(PdfValue::as_name) == Some("Standard");
1988                assert!(
1989                    !(has_o && has_u && has_filter_standard),
1990                    "Encrypt dictionary at {} {} survived parse",
1991                    object_ref.object_number,
1992                    object_ref.generation
1993                );
1994            }
1995        }
1996    }
1997
1998    #[test]
1999    fn materialize_drops_objstm_containers() {
2000        // After ObjStm members are materialised into top-level objects the
2001        // container itself must be dropped from `objects`. Otherwise the
2002        // writer would re-emit the container's compressed bytes, leaking
2003        // the pre-redaction state of every member dictionary.
2004        let bytes = include_bytes!("../../../tests/fixtures/xref-object-stream.pdf");
2005        let document = parse_pdf(bytes).expect("xref+ObjStm fixture should parse");
2006        for (object_ref, object) in &document.file.objects {
2007            if let PdfObject::Stream(stream) = object {
2008                let type_name = stream.dict.get("Type").and_then(PdfValue::as_name);
2009                assert_ne!(
2010                    type_name,
2011                    Some("ObjStm"),
2012                    "ObjStm container at {} {} survived parse",
2013                    object_ref.object_number,
2014                    object_ref.generation
2015                );
2016            }
2017        }
2018    }
2019
2020    /// Output of [`build_pubsec_encrypted_pdf`]: the encrypted PDF, the
2021    /// recipient's DER-encoded certificate, the recipient's DER-encoded
2022    /// PKCS#8 private key, and the plaintext content stream the test
2023    /// asserts the parser recovers.
2024    struct PubSecFixture {
2025        pdf: Vec<u8>,
2026        cert_der: Vec<u8>,
2027        private_key_der: Vec<u8>,
2028        plaintext: Vec<u8>,
2029    }
2030
2031    /// Build a minimal Adobe.PubSec encrypted PDF for the requested
2032    /// SubFilter (`adbe.pkcs7.s4` → V=4 / AES-128, or
2033    /// `adbe.pkcs7.s5` → V=5 / AES-256). Generates a deterministic
2034    /// RSA-2048 keypair and self-signed cert from a fixed PRNG seed so
2035    /// the fixture bytes are reproducible across test runs without
2036    /// committing any private key material.
2037    fn build_pubsec_encrypted_pdf(sub_filter: &str) -> PubSecFixture {
2038        use cms::builder::{
2039            ContentEncryptionAlgorithm, EnvelopedDataBuilder, KeyEncryptionInfo,
2040            KeyTransRecipientInfoBuilder,
2041        };
2042        use cms::cert::IssuerAndSerialNumber;
2043        use cms::content_info::ContentInfo;
2044        use cms::enveloped_data::RecipientIdentifier;
2045        use const_oid::ObjectIdentifier;
2046        use der::asn1::{Any, PrintableString, SetOfVec};
2047        use der::{Decode, Encode};
2048        use rand_chacha::ChaCha8Rng;
2049        use rand_core::SeedableRng;
2050        use rsa::pkcs1v15::SigningKey;
2051        use rsa::pkcs8::{EncodePrivateKey, EncodePublicKey};
2052        use rsa::{RsaPrivateKey, RsaPublicKey};
2053        use sha2::Sha256;
2054        use spki::SubjectPublicKeyInfoOwned;
2055        use std::time::Duration;
2056        use x509_cert::Certificate;
2057        use x509_cert::attr::AttributeTypeAndValue;
2058        use x509_cert::builder::{Builder, CertificateBuilder, Profile};
2059        use x509_cert::name::{Name, RdnSequence, RelativeDistinguishedName};
2060        use x509_cert::serial_number::SerialNumber;
2061        use x509_cert::time::Validity;
2062
2063        let mut rng = ChaCha8Rng::from_seed([0x42u8; 32]);
2064        let private_key = RsaPrivateKey::new(&mut rng, 2048).expect("RSA-2048 keygen must succeed");
2065        let public_key = RsaPublicKey::from(&private_key);
2066        let private_key_der = private_key
2067            .to_pkcs8_der()
2068            .expect("PKCS#8 encode")
2069            .as_bytes()
2070            .to_vec();
2071
2072        // Build a minimal self-signed X.509 certificate.
2073        let serial_number = SerialNumber::from(0x01020304u32);
2074        let validity = Validity::from_now(Duration::from_secs(3600 * 24 * 30))
2075            .expect("validity computation must succeed");
2076        let cn = AttributeTypeAndValue {
2077            oid: const_oid::db::rfc4519::CN,
2078            value: Any::from(
2079                &PrintableString::new(b"open-redact-pdf-test-recipient").expect("printable string"),
2080            ),
2081        };
2082        let rdn_set = SetOfVec::try_from(vec![cn]).expect("rdn set");
2083        let mut subject = RdnSequence::default();
2084        subject.0.push(RelativeDistinguishedName::from(rdn_set));
2085        let subject_name =
2086            Name::from_der(&subject.to_der().expect("subject encode")).expect("subject re-decode");
2087
2088        let signer: SigningKey<Sha256> = SigningKey::new(private_key.clone());
2089        let pub_key_der = public_key.to_public_key_der().expect("RSA public key DER");
2090        let pub_key_info =
2091            SubjectPublicKeyInfoOwned::try_from(pub_key_der.as_bytes()).expect("SPKI from DER");
2092        let cert_builder = CertificateBuilder::new(
2093            Profile::Root,
2094            serial_number.clone(),
2095            validity,
2096            subject_name.clone(),
2097            pub_key_info.clone(),
2098            &signer,
2099        )
2100        .expect("CertificateBuilder::new");
2101        let certificate: Certificate = cert_builder.build().expect("cert build");
2102        let cert_der = certificate.to_der().expect("cert DER");
2103
2104        // Random 20-byte seed + 4-byte permissions (all 0xFF = full access).
2105        let mut seed_and_perms = [0u8; 24];
2106        rsa::rand_core::RngCore::fill_bytes(&mut rng, &mut seed_and_perms);
2107        seed_and_perms[20..24].copy_from_slice(&[0xFFu8, 0xFF, 0xFF, 0xFF]);
2108
2109        // CMS EnvelopedData wrapping (seed || perms) for the recipient.
2110        let recipient_identifier =
2111            RecipientIdentifier::IssuerAndSerialNumber(IssuerAndSerialNumber {
2112                issuer: certificate.tbs_certificate.issuer.clone(),
2113                serial_number: certificate.tbs_certificate.serial_number.clone(),
2114            });
2115        let recipient_info_builder = KeyTransRecipientInfoBuilder::new(
2116            recipient_identifier,
2117            KeyEncryptionInfo::Rsa(public_key.clone()),
2118            &mut rng,
2119        )
2120        .expect("KeyTransRecipientInfoBuilder::new");
2121
2122        let mut enveloped_builder = EnvelopedDataBuilder::new(
2123            None,
2124            &seed_and_perms,
2125            ContentEncryptionAlgorithm::Aes128Cbc,
2126            None,
2127        )
2128        .expect("EnvelopedDataBuilder::new");
2129        // Separate RNG instance for the EnvelopedData build step: the
2130        // KeyTransRecipientInfoBuilder still holds an exclusive borrow on
2131        // the primary rng until it is consumed inside the final
2132        // build_with_rng call below.
2133        let mut envelope_rng = ChaCha8Rng::from_seed([0xA5u8; 32]);
2134        let enveloped_data = enveloped_builder
2135            .add_recipient_info(recipient_info_builder)
2136            .expect("add_recipient_info")
2137            .build_with_rng(&mut envelope_rng)
2138            .expect("build_with_rng");
2139
2140        // Wrap in ContentInfo (the outer ASN.1 structure).
2141        const ID_ENVELOPED: ObjectIdentifier = ObjectIdentifier::new_unwrap("1.2.840.113549.1.7.3");
2142        let enveloped_der = enveloped_data.to_der().expect("envelope DER");
2143        let content_info = ContentInfo {
2144            content_type: ID_ENVELOPED,
2145            content: Any::from_der(&enveloped_der).expect("Any from envelope DER"),
2146        };
2147        let recipient_blob = content_info.to_der().expect("content_info DER");
2148
2149        // Derive file key per spec.
2150        let plaintext_content: Vec<u8> =
2151            b"BT\n/F1 24 Tf\n72 700 Td\n(PUBSEC SECRET) Tj\nET\n".to_vec();
2152        let (file_key, content_cipher, sub_filter_str, v_value, r_value, length_bits, cfm_name) =
2153            match sub_filter {
2154                "adbe.pkcs7.s5" => {
2155                    use crate::crypto::test_helpers::aes_256_cbc_encrypt;
2156                    use sha2::Digest as _;
2157                    let mut hasher = sha2::Sha256::new();
2158                    hasher.update(&seed_and_perms[..20]);
2159                    hasher.update(&recipient_blob);
2160                    hasher.update(&seed_and_perms[20..24]);
2161                    let file_key: [u8; 32] = hasher.finalize().into();
2162                    let iv = [0x55u8; 16];
2163                    let cipher = aes_256_cbc_encrypt(&file_key, &iv, &plaintext_content);
2164                    (
2165                        file_key.to_vec(),
2166                        cipher,
2167                        "adbe.pkcs7.s5",
2168                        5i32,
2169                        5i32,
2170                        256i32,
2171                        "AESV3",
2172                    )
2173                }
2174                "adbe.pkcs7.s4" => {
2175                    use crate::crypto::test_helpers::{aes_128_cbc_encrypt, object_key_aes};
2176                    use sha1::{Digest as _, Sha1};
2177                    let mut hasher = Sha1::new();
2178                    hasher.update(&seed_and_perms[..20]);
2179                    hasher.update(&recipient_blob);
2180                    hasher.update(&seed_and_perms[20..24]);
2181                    let hash = hasher.finalize();
2182                    let file_key: [u8; 16] = hash[..16].try_into().expect("16 bytes");
2183                    let object_key = object_key_aes(&file_key, 4, 0);
2184                    let iv = [0x77u8; 16];
2185                    let cipher = aes_128_cbc_encrypt(&object_key, &iv, &plaintext_content);
2186                    (
2187                        file_key.to_vec(),
2188                        cipher,
2189                        "adbe.pkcs7.s4",
2190                        4i32,
2191                        4i32,
2192                        128i32,
2193                        "AESV2",
2194                    )
2195                }
2196                other => panic!("unsupported sub_filter for fixture builder: {other}"),
2197            };
2198        let _ = (file_key, length_bits); // silence unused warning paths
2199
2200        // Hex-encode the recipient blob for embedding as a PDF byte
2201        // string inside the /Recipients array.
2202        let blob_hex_string = {
2203            let mut s = String::from("<");
2204            for byte in &recipient_blob {
2205                s.push_str(&format!("{byte:02X}"));
2206            }
2207            s.push('>');
2208            s
2209        };
2210
2211        let mut pdf: Vec<u8> = Vec::new();
2212        pdf.extend_from_slice(b"%PDF-1.7\n");
2213
2214        let catalog_offset = pdf.len();
2215        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
2216        let pages_offset = pdf.len();
2217        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n");
2218        let page_offset = pdf.len();
2219        pdf.extend_from_slice(
2220            b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] \
2221              /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >>\nendobj\n",
2222        );
2223        let content_offset = pdf.len();
2224        pdf.extend_from_slice(
2225            format!("4 0 obj\n<< /Length {} >>\nstream\n", content_cipher.len()).as_bytes(),
2226        );
2227        pdf.extend_from_slice(&content_cipher);
2228        pdf.extend_from_slice(b"\nendstream\nendobj\n");
2229        let font_offset = pdf.len();
2230        pdf.extend_from_slice(
2231            b"5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica \
2232              /Encoding /WinAnsiEncoding >>\nendobj\n",
2233        );
2234
2235        let encrypt_offset = pdf.len();
2236        if v_value == 5 {
2237            pdf.extend_from_slice(
2238                format!(
2239                    "6 0 obj\n<< /Filter /Adobe.PubSec /SubFilter /{sub_filter_str} \
2240                 /V {v_value} /R {r_value} /Length {length_bits} \
2241                 /CF << /DefaultCryptFilter << /CFM /{cfm_name} /Length 32 \
2242                 /AuthEvent /DocOpen /Recipients [{blob_hex_string}] >> >> \
2243                 /StmF /DefaultCryptFilter /StrF /DefaultCryptFilter \
2244                 /EncryptMetadata true >>\nendobj\n"
2245                )
2246                .as_bytes(),
2247            );
2248        } else {
2249            // V=4 stores /Recipients at the top level, not per-CF.
2250            pdf.extend_from_slice(
2251                format!(
2252                    "6 0 obj\n<< /Filter /Adobe.PubSec /SubFilter /{sub_filter_str} \
2253                 /V {v_value} /R {r_value} /Length {length_bits} \
2254                 /CF << /DefaultCryptFilter << /CFM /{cfm_name} /Length 16 \
2255                 /AuthEvent /DocOpen >> >> \
2256                 /StmF /DefaultCryptFilter /StrF /DefaultCryptFilter \
2257                 /Recipients [{blob_hex_string}] /EncryptMetadata true >>\nendobj\n"
2258                )
2259                .as_bytes(),
2260            );
2261        }
2262
2263        let xref_offset = pdf.len();
2264        pdf.extend_from_slice(b"xref\n0 7\n");
2265        pdf.extend_from_slice(b"0000000000 65535 f \n");
2266        for offset in [
2267            catalog_offset,
2268            pages_offset,
2269            page_offset,
2270            content_offset,
2271            font_offset,
2272            encrypt_offset,
2273        ] {
2274            pdf.extend_from_slice(format!("{offset:010} 00000 n \n").as_bytes());
2275        }
2276        pdf.extend_from_slice(
2277            b"trailer\n<< /Size 7 /Root 1 0 R /Encrypt 6 0 R /ID [<00112233445566778899AABBCCDDEEFF><00112233445566778899AABBCCDDEEFF>] >>\n",
2278        );
2279        pdf.extend_from_slice(format!("startxref\n{xref_offset}\n%%EOF\n").as_bytes());
2280
2281        PubSecFixture {
2282            pdf,
2283            cert_der,
2284            private_key_der,
2285            plaintext: plaintext_content,
2286        }
2287    }
2288
2289    #[test]
2290    fn parses_pubsec_s5_encrypted_pdf() {
2291        let fixture = build_pubsec_encrypted_pdf("adbe.pkcs7.s5");
2292        let document =
2293            parse_pdf_with_certificate(&fixture.pdf, &fixture.cert_der, &fixture.private_key_der)
2294                .expect("PubSec s5 PDF should decrypt with matching certificate");
2295        assert_decrypts_content_stream(&document, &fixture.plaintext);
2296    }
2297
2298    #[test]
2299    fn parses_pubsec_s4_encrypted_pdf() {
2300        let fixture = build_pubsec_encrypted_pdf("adbe.pkcs7.s4");
2301        let document =
2302            parse_pdf_with_certificate(&fixture.pdf, &fixture.cert_der, &fixture.private_key_der)
2303                .expect("PubSec s4 PDF should decrypt with matching certificate");
2304        assert_decrypts_content_stream(&document, &fixture.plaintext);
2305    }
2306
2307    #[test]
2308    fn pubsec_rejects_password_credential() {
2309        let fixture = build_pubsec_encrypted_pdf("adbe.pkcs7.s5");
2310        let err = parse_pdf_with_password(&fixture.pdf, b"any-password")
2311            .expect_err("PubSec PDF must reject a password credential");
2312        match err {
2313            PdfError::Unsupported(message) => {
2314                assert!(
2315                    message.contains("certificate"),
2316                    "error should mention certificate, got: {message}"
2317                );
2318            }
2319            other => panic!("expected Unsupported, got {other:?}"),
2320        }
2321    }
2322
2323    #[test]
2324    fn pubsec_s5_rejects_unknown_certificate() {
2325        // Build a fixture for one keypair, then attempt to open with a
2326        // different keypair's cert. The right blob is present in the
2327        // PDF but no recipient matches the supplied cert / key.
2328        use der::asn1::{Any, PrintableString, SetOfVec};
2329        use der::{Decode, Encode};
2330        use rand_chacha::ChaCha8Rng;
2331        use rand_core::SeedableRng;
2332        use rsa::RsaPrivateKey;
2333        use rsa::pkcs1v15::SigningKey;
2334        use rsa::pkcs8::{EncodePrivateKey, EncodePublicKey};
2335        use sha2::Sha256;
2336        use spki::SubjectPublicKeyInfoOwned;
2337        use std::time::Duration;
2338        use x509_cert::attr::AttributeTypeAndValue;
2339        use x509_cert::builder::{Builder, CertificateBuilder, Profile};
2340        use x509_cert::name::{Name, RdnSequence, RelativeDistinguishedName};
2341        use x509_cert::serial_number::SerialNumber;
2342        use x509_cert::time::Validity;
2343
2344        let fixture = build_pubsec_encrypted_pdf("adbe.pkcs7.s5");
2345
2346        // Different seed → different keypair.
2347        let mut rng = ChaCha8Rng::from_seed([0x99u8; 32]);
2348        let other_private = RsaPrivateKey::new(&mut rng, 2048).expect("other RSA-2048 keygen");
2349        let other_public = rsa::RsaPublicKey::from(&other_private);
2350        let other_pkcs8 = other_private
2351            .to_pkcs8_der()
2352            .expect("PKCS#8 encode")
2353            .as_bytes()
2354            .to_vec();
2355
2356        let cn = AttributeTypeAndValue {
2357            oid: const_oid::db::rfc4519::CN,
2358            value: Any::from(&PrintableString::new(b"unrelated-cert").expect("printable string")),
2359        };
2360        let rdn_set = SetOfVec::try_from(vec![cn]).expect("rdn set");
2361        let mut subject = RdnSequence::default();
2362        subject.0.push(RelativeDistinguishedName::from(rdn_set));
2363        let subject_name =
2364            Name::from_der(&subject.to_der().expect("subject encode")).expect("subject re-decode");
2365        let signer: SigningKey<Sha256> = SigningKey::new(other_private.clone());
2366        let other_pub_der = other_public
2367            .to_public_key_der()
2368            .expect("RSA public key DER");
2369        let pub_key_info =
2370            SubjectPublicKeyInfoOwned::try_from(other_pub_der.as_bytes()).expect("SPKI from DER");
2371        let cert_builder = CertificateBuilder::new(
2372            Profile::Root,
2373            SerialNumber::from(0x55u32),
2374            Validity::from_now(Duration::from_secs(3600 * 24 * 30)).expect("validity"),
2375            subject_name,
2376            pub_key_info,
2377            &signer,
2378        )
2379        .expect("CertificateBuilder::new");
2380        let other_cert: x509_cert::Certificate = cert_builder.build().expect("cert build");
2381        let other_cert_der = other_cert.to_der().expect("cert DER");
2382
2383        let err = parse_pdf_with_certificate(&fixture.pdf, &other_cert_der, &other_pkcs8)
2384            .expect_err("unrelated certificate must not unlock the PubSec PDF");
2385        assert_eq!(err, PdfError::InvalidPassword);
2386    }
2387
2388    #[test]
2389    fn standard_pdf_rejects_certificate_credential() {
2390        let (pdf, _) = build_aes_128_encrypted_pdf(b"", b"ownerpw", true);
2391        // Any DER-shaped buffers will do: dispatcher rejects before the
2392        // PubSec code ever inspects them.
2393        let err = parse_pdf_with_certificate(&pdf, &[0x30, 0x00], &[0x30, 0x00])
2394            .expect_err("Standard-encrypted PDF must reject a certificate credential");
2395        match err {
2396            PdfError::Unsupported(message) => {
2397                assert!(
2398                    message.contains("password"),
2399                    "error should mention password, got: {message}"
2400                );
2401            }
2402            other => panic!("expected Unsupported, got {other:?}"),
2403        }
2404    }
2405}