Skip to main content

pdf_objects/
crypto.rs

1//! PDF Standard Security Handler (encryption) — decryption side only.
2//!
3//! This module implements enough of the PDF 1.7 / PDF 2.0 Standard Security
4//! Handler to decrypt documents produced by:
5//!
6//! - revisions 2 and 3 (V=1 or V=2, RC4 with a 40-bit or 128-bit key),
7//! - revision 4 with V=4 crypt filters naming `/V2` (RC4-128) or
8//!   `/AESV2` (AES-128-CBC) as the stream and string method,
9//! - revisions 5 and 6 (V=5, AES-256-CBC) via the `/AESV3` crypt filter —
10//!   R=5 uses a plain SHA-256 verifier (the vulnerable Extension Level 3
11//!   form) and R=6 uses the ISO 32000-2 iterative Algorithm 2.B hash.
12//!
13//! Authentication runs against either the user password or the owner
14//! password. The empty user password is accepted as a special case of
15//! the general user-password path.
16//!
17//! Public-key security handlers are not yet implemented and still fail
18//! up front with `PdfError::Unsupported`. They can be layered on top
19//! without changing this module's public surface.
20
21use aes::cipher::{BlockDecrypt, BlockEncrypt, KeyInit, generic_array::GenericArray};
22use aes::{Aes128, Aes256};
23use md5::{Digest, Md5};
24use sha2::{Sha256, Sha384, Sha512};
25
26use crate::error::{PdfError, PdfResult};
27use crate::types::{ObjectRef, PdfDictionary, PdfValue};
28
29/// Adobe's 32-byte password padding string (PDF 1.7, algorithm 2).
30const PASSWORD_PADDING: [u8; 32] = [
31    0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
32    0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A,
33];
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum SecurityRevision {
37    R2,
38    R3,
39    R4,
40    R5,
41    R6,
42}
43
44/// Which crypt filter method applies to a given piece of ciphertext.
45///
46/// V=1/2 documents always use [`CryptMethod::V2`] (RC4) for everything.
47/// V=4 documents name a crypt filter per kind (`/StmF`, `/StrF`, `/EFF`);
48/// each may point at `/Identity` (no encryption), a V2 filter (RC4), or
49/// an AESV2 filter (AES-128-CBC).
50/// V=5 documents name the `/AESV3` filter (AES-256-CBC).
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52enum CryptMethod {
53    Identity,
54    V2,
55    AesV2,
56    AesV3,
57}
58
59/// Which slot the ciphertext belongs to. Drives the crypt-method choice
60/// (string vs stream) on V=4 documents and is a no-op on V=1/2.
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum BytesKind {
63    String,
64    Stream,
65}
66
67#[derive(Debug, Clone)]
68pub struct StandardSecurityHandler {
69    file_key: Vec<u8>,
70    string_method: CryptMethod,
71    stream_method: CryptMethod,
72    /// `false` only for V=4 documents that explicitly set
73    /// `/EncryptMetadata false`; `true` everywhere else. When `false`,
74    /// streams with `/Type /Metadata` and `/Subtype /XML` skip
75    /// decryption.
76    encrypt_metadata: bool,
77}
78
79impl StandardSecurityHandler {
80    /// Builds a decryption handler from the `/Encrypt` dictionary and the
81    /// trailer's first `/ID` string, authenticating the supplied password.
82    /// Returns `None` if the password does not authenticate.
83    pub fn open(
84        encrypt_dict: &PdfDictionary,
85        id_first: &[u8],
86        password: &[u8],
87    ) -> PdfResult<Option<Self>> {
88        let filter = encrypt_dict
89            .get("Filter")
90            .and_then(PdfValue::as_name)
91            .unwrap_or("");
92        if filter != "Standard" {
93            return Err(PdfError::Unsupported(format!(
94                "encryption filter /{filter} is not supported"
95            )));
96        }
97        let v = encrypt_dict
98            .get("V")
99            .and_then(PdfValue::as_integer)
100            .unwrap_or(0);
101        let r = encrypt_dict
102            .get("R")
103            .and_then(PdfValue::as_integer)
104            .unwrap_or(0);
105        let revision = match r {
106            2 => SecurityRevision::R2,
107            3 => SecurityRevision::R3,
108            4 => SecurityRevision::R4,
109            5 => SecurityRevision::R5,
110            6 => SecurityRevision::R6,
111            other => {
112                return Err(PdfError::Unsupported(format!(
113                    "Standard security handler revision {other} is not supported (only R=2..R=6 handled)"
114                )));
115            }
116        };
117
118        // V=5 is a separate code path: the file key is stored encrypted
119        // in /OE /UE rather than derived algorithmically from /P + /ID.
120        if v == 5 {
121            return open_v5(encrypt_dict, revision, password);
122        }
123
124        let (string_method, stream_method, key_length_bytes) = match v {
125            1 | 2 => {
126                let bits = encrypt_dict
127                    .get("Length")
128                    .and_then(PdfValue::as_integer)
129                    .unwrap_or(40);
130                if bits % 8 != 0 || !(40..=128).contains(&bits) {
131                    return Err(PdfError::Corrupt(format!(
132                        "invalid /Length {bits} in Encrypt dictionary"
133                    )));
134                }
135                (CryptMethod::V2, CryptMethod::V2, (bits / 8) as usize)
136            }
137            4 => {
138                // V=4: crypt filters decide the method per slot. The file
139                // key is always 128-bit (16 bytes).
140                let (strf, stmf) = resolve_v4_crypt_filters(encrypt_dict)?;
141                (strf, stmf, 16)
142            }
143            other => {
144                return Err(PdfError::Unsupported(format!(
145                    "Standard security handler V={other} is not supported (only V=1, V=2, V=4, and V=5 handled)"
146                )));
147            }
148        };
149
150        // V=4's Algorithm 2 step 5: when /EncryptMetadata is explicitly
151        // false, 0xFFFFFFFF is appended before the 50-round rehash.
152        let encrypt_metadata = if matches!(revision, SecurityRevision::R4) {
153            encrypt_dict
154                .get("EncryptMetadata")
155                .and_then(PdfValue::as_bool)
156                .unwrap_or(true)
157        } else {
158            true
159        };
160
161        let o = pdf_string_bytes(encrypt_dict, "O")?;
162        let u = pdf_string_bytes(encrypt_dict, "U")?;
163        let p = encrypt_dict
164            .get("P")
165            .and_then(PdfValue::as_integer)
166            .ok_or_else(|| PdfError::Corrupt("Encrypt dictionary missing /P".to_string()))?;
167        if o.len() != 32 || u.len() != 32 {
168            return Err(PdfError::Corrupt(
169                "Encrypt /O and /U must each be 32 bytes".to_string(),
170            ));
171        }
172
173        // First try the supplied password as the user password.
174        let user_file_key = compute_file_key(
175            password,
176            &o,
177            p as i32,
178            id_first,
179            key_length_bytes,
180            revision,
181            encrypt_metadata,
182        );
183        if authenticate_user_password(&user_file_key, revision, &u, id_first) {
184            return Ok(Some(Self {
185                file_key: user_file_key,
186                string_method,
187                stream_method,
188                encrypt_metadata,
189            }));
190        }
191
192        // Then try it as the owner password: Algorithm 7 recovers the
193        // padded user password from /O, after which we redo the user-
194        // password authentication with that recovered value. The file key
195        // used for object decryption is always derived from the user
196        // password — the owner password is only a way of recovering it.
197        let recovered_user_password =
198            recover_user_password_from_owner(password, &o, revision, key_length_bytes);
199        let owner_file_key = compute_file_key(
200            &recovered_user_password,
201            &o,
202            p as i32,
203            id_first,
204            key_length_bytes,
205            revision,
206            encrypt_metadata,
207        );
208        if authenticate_user_password(&owner_file_key, revision, &u, id_first) {
209            return Ok(Some(Self {
210                file_key: owner_file_key,
211                string_method,
212                stream_method,
213                encrypt_metadata,
214            }));
215        }
216
217        Ok(None)
218    }
219
220    /// Returns true when this handler was configured with
221    /// `/EncryptMetadata false`. Parser uses this to skip
222    /// `/Type /Metadata` streams.
223    pub fn encrypts_metadata(&self) -> bool {
224        self.encrypt_metadata
225    }
226
227    /// Decrypts `bytes` produced for the indirect object `(num, gen)`.
228    /// The crypt method is chosen per `kind` — strings use `/StrF`,
229    /// streams use `/StmF`. Returns the ciphertext unchanged for
230    /// `/Identity` filters; returns an error for malformed AES input
231    /// (wrong length, bad PKCS#7 padding).
232    pub fn decrypt_bytes(
233        &self,
234        bytes: &[u8],
235        object_ref: ObjectRef,
236        kind: BytesKind,
237    ) -> PdfResult<Vec<u8>> {
238        let method = match kind {
239            BytesKind::String => self.string_method,
240            BytesKind::Stream => self.stream_method,
241        };
242        match method {
243            CryptMethod::Identity => Ok(bytes.to_vec()),
244            CryptMethod::V2 => Ok(rc4(&self.object_key(object_ref, method), bytes)),
245            CryptMethod::AesV2 => aes_128_cbc_decrypt(&self.object_key(object_ref, method), bytes),
246            CryptMethod::AesV3 => {
247                // AES-256-CBC in V=5 uses the file key directly — there is
248                // no per-object key mixing, the `sAlT` suffix, or any
249                // object-number-derived material.
250                aes_256_cbc_decrypt(&self.file_key, bytes)
251            }
252        }
253    }
254
255    fn object_key(&self, object_ref: ObjectRef, method: CryptMethod) -> Vec<u8> {
256        // Algorithm 1 / 1a. Append the 4-byte ASCII suffix "sAlT" when
257        // the method is AES so keys derived for the same object under
258        // different methods never collide.
259        let suffix_len = if matches!(method, CryptMethod::AesV2) {
260            9
261        } else {
262            5
263        };
264        let mut material = Vec::with_capacity(self.file_key.len() + suffix_len);
265        material.extend_from_slice(&self.file_key);
266        let num = object_ref.object_number.to_le_bytes();
267        material.push(num[0]);
268        material.push(num[1]);
269        material.push(num[2]);
270        let generation = object_ref.generation.to_le_bytes();
271        material.push(generation[0]);
272        material.push(generation[1]);
273        if matches!(method, CryptMethod::AesV2) {
274            material.extend_from_slice(b"sAlT");
275        }
276        let digest = md5_bytes(&material);
277        let truncated_len = (self.file_key.len() + 5).min(16);
278        digest[..truncated_len].to_vec()
279    }
280}
281
282fn open_v5(
283    encrypt_dict: &PdfDictionary,
284    revision: SecurityRevision,
285    password: &[u8],
286) -> PdfResult<Option<StandardSecurityHandler>> {
287    if !matches!(revision, SecurityRevision::R5 | SecurityRevision::R6) {
288        return Err(PdfError::Unsupported(format!(
289            "V=5 Encrypt dictionary requires R=5 or R=6, got {revision:?}"
290        )));
291    }
292    let (strf, stmf) = resolve_v5_crypt_filters(encrypt_dict)?;
293
294    let encrypt_metadata = encrypt_dict
295        .get("EncryptMetadata")
296        .and_then(PdfValue::as_bool)
297        .unwrap_or(true);
298
299    let o = pdf_string_bytes(encrypt_dict, "O")?;
300    let u = pdf_string_bytes(encrypt_dict, "U")?;
301    let oe = pdf_string_bytes(encrypt_dict, "OE")?;
302    let ue = pdf_string_bytes(encrypt_dict, "UE")?;
303    if o.len() != 48 || u.len() != 48 {
304        return Err(PdfError::Corrupt(
305            "V=5 Encrypt /O and /U must each be 48 bytes".to_string(),
306        ));
307    }
308    if oe.len() != 32 || ue.len() != 32 {
309        return Err(PdfError::Corrupt(
310            "V=5 Encrypt /OE and /UE must each be 32 bytes".to_string(),
311        ));
312    }
313
314    // Passwords are UTF-8, truncated to 127 bytes per spec.
315    let truncated_password = &password[..password.len().min(127)];
316
317    // User-password attempt: hash(password || u_validation_salt) must
318    // match u[0..32]; intermediate key = hash(password || u_key_salt).
319    let u_validation_salt = &u[32..40];
320    let u_key_salt = &u[40..48];
321    let user_hash = pdf_2_b_hash(truncated_password, u_validation_salt, None, revision);
322    if user_hash[..32] == u[..32] {
323        let intermediate_key = pdf_2_b_hash(truncated_password, u_key_salt, None, revision);
324        let file_key = aes_256_cbc_decrypt_no_pad(&intermediate_key[..32], &[0u8; 16], &ue)?;
325        return Ok(Some(StandardSecurityHandler {
326            file_key,
327            string_method: strf,
328            stream_method: stmf,
329            encrypt_metadata,
330        }));
331    }
332
333    // Owner-password attempt: the hash inputs additionally include the
334    // first 48 bytes of /U, binding the owner verifier to the user
335    // record.
336    let o_validation_salt = &o[32..40];
337    let o_key_salt = &o[40..48];
338    let owner_hash = pdf_2_b_hash(
339        truncated_password,
340        o_validation_salt,
341        Some(&u[..48]),
342        revision,
343    );
344    if owner_hash[..32] == o[..32] {
345        let intermediate_key =
346            pdf_2_b_hash(truncated_password, o_key_salt, Some(&u[..48]), revision);
347        let file_key = aes_256_cbc_decrypt_no_pad(&intermediate_key[..32], &[0u8; 16], &oe)?;
348        return Ok(Some(StandardSecurityHandler {
349            file_key,
350            string_method: strf,
351            stream_method: stmf,
352            encrypt_metadata,
353        }));
354    }
355
356    Ok(None)
357}
358
359/// Algorithm 2.B (R=6) — with an R=5 short-circuit at the initial
360/// SHA-256. Returns the full 32-byte hash used by [`open_v5`] for
361/// both the verifier check and the intermediate key step.
362fn pdf_2_b_hash(
363    password: &[u8],
364    salt: &[u8],
365    user_vector: Option<&[u8]>,
366    revision: SecurityRevision,
367) -> Vec<u8> {
368    let mut hasher = Sha256::new();
369    hasher.update(password);
370    hasher.update(salt);
371    if let Some(vector) = user_vector {
372        hasher.update(vector);
373    }
374    let mut k: Vec<u8> = hasher.finalize().to_vec();
375
376    if matches!(revision, SecurityRevision::R5) {
377        return k;
378    }
379
380    // R=6 inner loop (ISO 32000-2 §7.6.4.3.3 Algorithm 2.B).
381    let user_vector = user_vector.unwrap_or(&[]);
382    let mut round: u32 = 0;
383    loop {
384        // K1 = (password || K || user_vector) repeated 64 times.
385        let mut k1 = Vec::with_capacity((password.len() + k.len() + user_vector.len()) * 64);
386        for _ in 0..64 {
387            k1.extend_from_slice(password);
388            k1.extend_from_slice(&k);
389            k1.extend_from_slice(user_vector);
390        }
391
392        // AES-128-CBC encrypt K1 without any padding (K1 is always a
393        // multiple of 16 because len(K) ∈ {32, 48, 64} and password +
394        // user_vector contribute an integer number of blocks after
395        // the 64× repetition — in practice implementations pad K1 by
396        // repetition, not PKCS#7, so no padding is added here).
397        let key: [u8; 16] = k[..16].try_into().expect("K is at least 32 bytes");
398        let iv: [u8; 16] = k[16..32].try_into().expect("K is at least 32 bytes");
399        let encrypted = aes_128_cbc_encrypt_no_pad(&key, &iv, &k1);
400
401        // Sum first-16 bytes mod 3 to choose the next hash function.
402        let selector: u32 = encrypted[..16]
403            .iter()
404            .map(|byte| u32::from(*byte % 3))
405            .sum::<u32>()
406            % 3;
407        k = match selector {
408            0 => Sha256::digest(&encrypted).to_vec(),
409            1 => Sha384::digest(&encrypted).to_vec(),
410            _ => Sha512::digest(&encrypted).to_vec(),
411        };
412
413        let last_byte = *encrypted.last().expect("AES output is non-empty");
414        round += 1;
415        if round >= 64 && u32::from(last_byte) <= round.saturating_sub(32) {
416            break;
417        }
418    }
419
420    k.truncate(32);
421    k
422}
423
424fn resolve_v5_crypt_filters(encrypt_dict: &PdfDictionary) -> PdfResult<(CryptMethod, CryptMethod)> {
425    let strf = encrypt_dict
426        .get("StrF")
427        .and_then(PdfValue::as_name)
428        .unwrap_or("Identity");
429    let stmf = encrypt_dict
430        .get("StmF")
431        .and_then(PdfValue::as_name)
432        .unwrap_or("Identity");
433    let cf = encrypt_dict.get("CF").and_then(|value| match value {
434        PdfValue::Dictionary(dict) => Some(dict),
435        _ => None,
436    });
437    Ok((
438        resolve_crypt_filter_method(cf, strf)?,
439        resolve_crypt_filter_method(cf, stmf)?,
440    ))
441}
442
443fn resolve_v4_crypt_filters(encrypt_dict: &PdfDictionary) -> PdfResult<(CryptMethod, CryptMethod)> {
444    let strf = encrypt_dict
445        .get("StrF")
446        .and_then(PdfValue::as_name)
447        .unwrap_or("Identity");
448    let stmf = encrypt_dict
449        .get("StmF")
450        .and_then(PdfValue::as_name)
451        .unwrap_or("Identity");
452    let cf = encrypt_dict.get("CF").and_then(|value| match value {
453        PdfValue::Dictionary(dict) => Some(dict),
454        _ => None,
455    });
456    Ok((
457        resolve_crypt_filter_method(cf, strf)?,
458        resolve_crypt_filter_method(cf, stmf)?,
459    ))
460}
461
462fn resolve_crypt_filter_method(cf: Option<&PdfDictionary>, name: &str) -> PdfResult<CryptMethod> {
463    // The spec reserves the `Identity` filter name for "no encryption"
464    // and specifies that it never appears in /CF; treat it as a pass-
465    // through without consulting the dictionary.
466    if name == "Identity" {
467        return Ok(CryptMethod::Identity);
468    }
469    let subfilter = cf
470        .and_then(|dict| dict.get(name))
471        .and_then(|value| match value {
472            PdfValue::Dictionary(dict) => Some(dict),
473            _ => None,
474        })
475        .ok_or_else(|| {
476            PdfError::Corrupt(format!(
477                "Encrypt /CF is missing the crypt filter entry /{name}"
478            ))
479        })?;
480    let cfm = subfilter
481        .get("CFM")
482        .and_then(PdfValue::as_name)
483        .ok_or_else(|| {
484            PdfError::Corrupt(format!("crypt filter /{name} is missing the /CFM entry"))
485        })?;
486    match cfm {
487        "V2" => Ok(CryptMethod::V2),
488        "AESV2" => Ok(CryptMethod::AesV2),
489        "AESV3" => Ok(CryptMethod::AesV3),
490        "None" => Ok(CryptMethod::Identity),
491        other => Err(PdfError::Unsupported(format!(
492            "crypt filter method /{other} is not supported (only /V2, /AESV2, and /AESV3 handled)"
493        ))),
494    }
495}
496
497/// Decrypts AES-128-CBC ciphertext whose first 16 bytes are the IV and
498/// whose payload is PKCS#7-padded. Used for V=4 /AESV2 streams and
499/// strings.
500fn aes_128_cbc_decrypt(key: &[u8], data: &[u8]) -> PdfResult<Vec<u8>> {
501    if key.len() != 16 {
502        return Err(PdfError::Corrupt(format!(
503            "AES-128 object key must be 16 bytes, got {}",
504            key.len()
505        )));
506    }
507    if data.len() < 32 || data.len() % 16 != 0 {
508        return Err(PdfError::Corrupt(format!(
509            "AES-128-CBC ciphertext must be at least 32 bytes and a multiple of 16; got {}",
510            data.len()
511        )));
512    }
513    let cipher = Aes128::new_from_slice(key)
514        .map_err(|error| PdfError::Corrupt(format!("AES-128 key rejected by cipher: {error}")))?;
515    let mut prev_block: [u8; 16] = data[..16].try_into().expect("slice is 16 bytes");
516    let mut output = Vec::with_capacity(data.len() - 16);
517    for chunk in data[16..].chunks(16) {
518        let mut block = GenericArray::clone_from_slice(chunk);
519        cipher.decrypt_block(&mut block);
520        for (plain_byte, iv_byte) in block.iter_mut().zip(prev_block.iter()) {
521            *plain_byte ^= iv_byte;
522        }
523        output.extend_from_slice(block.as_slice());
524        prev_block.copy_from_slice(chunk);
525    }
526    strip_pkcs7(output)
527}
528
529/// Decrypts AES-256-CBC ciphertext whose first 16 bytes are the IV and
530/// whose payload is PKCS#7-padded. Used by V=5 `/AESV3` strings and
531/// streams.
532fn aes_256_cbc_decrypt(key: &[u8], data: &[u8]) -> PdfResult<Vec<u8>> {
533    if key.len() != 32 {
534        return Err(PdfError::Corrupt(format!(
535            "AES-256 file key must be 32 bytes, got {}",
536            key.len()
537        )));
538    }
539    if data.len() < 32 || data.len() % 16 != 0 {
540        return Err(PdfError::Corrupt(format!(
541            "AES-256-CBC ciphertext must be at least 32 bytes and a multiple of 16; got {}",
542            data.len()
543        )));
544    }
545    let cipher = Aes256::new_from_slice(key)
546        .map_err(|error| PdfError::Corrupt(format!("AES-256 key rejected by cipher: {error}")))?;
547    let mut prev_block: [u8; 16] = data[..16].try_into().expect("slice is 16 bytes");
548    let mut output = Vec::with_capacity(data.len() - 16);
549    for chunk in data[16..].chunks(16) {
550        let mut block = GenericArray::clone_from_slice(chunk);
551        cipher.decrypt_block(&mut block);
552        for (plain_byte, iv_byte) in block.iter_mut().zip(prev_block.iter()) {
553            *plain_byte ^= iv_byte;
554        }
555        output.extend_from_slice(block.as_slice());
556        prev_block.copy_from_slice(chunk);
557    }
558    strip_pkcs7(output)
559}
560
561/// Decrypts AES-256-CBC ciphertext with a caller-supplied IV and no
562/// PKCS#7 unpadding. Used by Algorithm 2.A to recover the 32-byte file
563/// key from `/OE` / `/UE` (which are fixed 32-byte, two-block
564/// ciphertexts with an all-zero IV).
565fn aes_256_cbc_decrypt_no_pad(key: &[u8], iv: &[u8], data: &[u8]) -> PdfResult<Vec<u8>> {
566    if key.len() != 32 {
567        return Err(PdfError::Corrupt(format!(
568            "AES-256 key must be 32 bytes, got {}",
569            key.len()
570        )));
571    }
572    if iv.len() != 16 {
573        return Err(PdfError::Corrupt(format!(
574            "AES-256-CBC IV must be 16 bytes, got {}",
575            iv.len()
576        )));
577    }
578    if data.is_empty() || data.len() % 16 != 0 {
579        return Err(PdfError::Corrupt(format!(
580            "AES-256-CBC payload must be a non-empty multiple of 16 bytes; got {}",
581            data.len()
582        )));
583    }
584    let cipher = Aes256::new_from_slice(key)
585        .map_err(|error| PdfError::Corrupt(format!("AES-256 key rejected by cipher: {error}")))?;
586    let mut prev_block: [u8; 16] = iv.try_into().expect("iv length validated");
587    let mut output = Vec::with_capacity(data.len());
588    for chunk in data.chunks(16) {
589        let mut block = GenericArray::clone_from_slice(chunk);
590        cipher.decrypt_block(&mut block);
591        for (plain_byte, iv_byte) in block.iter_mut().zip(prev_block.iter()) {
592            *plain_byte ^= iv_byte;
593        }
594        output.extend_from_slice(block.as_slice());
595        prev_block.copy_from_slice(chunk);
596    }
597    Ok(output)
598}
599
600/// Encrypts the Algorithm 2.B K1 buffer with AES-128-CBC. The buffer is
601/// already a multiple of 16 bytes; the spec does not apply PKCS#7
602/// padding on this inner loop — this helper therefore expects a
603/// block-aligned input and rejects anything else.
604fn aes_128_cbc_encrypt_no_pad(key: &[u8; 16], iv: &[u8; 16], data: &[u8]) -> Vec<u8> {
605    // The callers in this module feed only block-aligned inputs; any
606    // unaligned tail is a programming error, not a runtime one.
607    assert!(
608        data.len() % 16 == 0,
609        "Algorithm 2.B K1 must be block-aligned, got {}",
610        data.len()
611    );
612    let cipher = Aes128::new_from_slice(key).expect("key length validated at compile time");
613    let mut output = Vec::with_capacity(data.len());
614    let mut prev: [u8; 16] = *iv;
615    for chunk in data.chunks(16) {
616        let mut buf = [0u8; 16];
617        for ((b, plain), iv_byte) in buf.iter_mut().zip(chunk.iter()).zip(prev.iter()) {
618            *b = plain ^ iv_byte;
619        }
620        let mut block = GenericArray::clone_from_slice(&buf);
621        cipher.encrypt_block(&mut block);
622        output.extend_from_slice(block.as_slice());
623        prev.copy_from_slice(block.as_slice());
624    }
625    output
626}
627
628fn strip_pkcs7(mut data: Vec<u8>) -> PdfResult<Vec<u8>> {
629    let Some(&pad) = data.last() else {
630        return Err(PdfError::Corrupt(
631            "AES-128-CBC plaintext is empty — missing PKCS#7 padding".to_string(),
632        ));
633    };
634    if pad == 0 || pad > 16 || (pad as usize) > data.len() {
635        return Err(PdfError::Corrupt(format!(
636            "AES-128-CBC PKCS#7 padding byte {pad} is out of range"
637        )));
638    }
639    let new_len = data.len() - pad as usize;
640    if !data[new_len..].iter().all(|byte| *byte == pad) {
641        return Err(PdfError::Corrupt(
642            "AES-128-CBC PKCS#7 padding bytes do not match".to_string(),
643        ));
644    }
645    data.truncate(new_len);
646    Ok(data)
647}
648
649fn pdf_string_bytes(dict: &PdfDictionary, key: &str) -> PdfResult<Vec<u8>> {
650    match dict.get(key) {
651        Some(PdfValue::String(s)) => Ok(s.0.clone()),
652        Some(_) => Err(PdfError::Corrupt(format!("Encrypt /{key} is not a string"))),
653        None => Err(PdfError::Corrupt(format!(
654            "Encrypt dictionary missing /{key}"
655        ))),
656    }
657}
658
659fn compute_file_key(
660    password: &[u8],
661    o_entry: &[u8],
662    permissions: i32,
663    id_first: &[u8],
664    key_length_bytes: usize,
665    revision: SecurityRevision,
666    encrypt_metadata: bool,
667) -> Vec<u8> {
668    // Algorithm 2 (PDF 1.7 section 7.6.3.3):
669    //   1. Pad the password to 32 bytes.
670    let padded = pad_password(password);
671    let mut hasher = Md5::new();
672    hasher.update(padded);
673    //   2. Append /O.
674    hasher.update(o_entry);
675    //   3. Append /P (4 bytes little-endian).
676    hasher.update(permissions.to_le_bytes());
677    //   4. Append the first element of /ID.
678    hasher.update(id_first);
679    //   5. (R>=4 only) When /EncryptMetadata is explicitly false, append
680    //      0xFFFFFFFF. R<=3 skips this step.
681    if matches!(revision, SecurityRevision::R4) && !encrypt_metadata {
682        hasher.update([0xFFu8; 4]);
683    }
684    let mut digest = hasher.finalize_reset();
685
686    // Algorithm 2, step 6: for R>=3, re-MD5 the first n bytes 50 times.
687    if matches!(revision, SecurityRevision::R3 | SecurityRevision::R4) {
688        for _ in 0..50 {
689            hasher.update(&digest[..key_length_bytes]);
690            digest = hasher.finalize_reset();
691        }
692    }
693    digest[..key_length_bytes].to_vec()
694}
695
696fn pad_password(password: &[u8]) -> [u8; 32] {
697    let mut out = [0u8; 32];
698    let take = password.len().min(32);
699    out[..take].copy_from_slice(&password[..take]);
700    if take < 32 {
701        out[take..].copy_from_slice(&PASSWORD_PADDING[..32 - take]);
702    }
703    out
704}
705
706fn recover_user_password_from_owner(
707    owner_password: &[u8],
708    o_entry: &[u8],
709    revision: SecurityRevision,
710    key_length_bytes: usize,
711) -> Vec<u8> {
712    // Algorithm 7 (PDF 1.7 §7.6.3.4). Symmetric inverse of Algorithm 3:
713    //   1. Pad the owner password and MD5 it.
714    //   2. For R>=3 re-hash 50 times.
715    //   3. Truncate to `key_length_bytes` — this is the RC4 key used on /O.
716    //   4. For R=2, RC4-decrypt /O once with that key.
717    //      For R>=3, RC4-decrypt /O 20 times with keys (base XOR i) for i
718    //      decreasing from 19 down to 0.
719    //   5. The result is the padded user password.
720    let padded = pad_password(owner_password);
721    let mut hasher = Md5::new();
722    hasher.update(padded);
723    let mut digest = hasher.finalize_reset();
724    if matches!(revision, SecurityRevision::R3 | SecurityRevision::R4) {
725        for _ in 0..50 {
726            hasher.update(&digest[..key_length_bytes]);
727            digest = hasher.finalize_reset();
728        }
729    }
730    let base_key = digest[..key_length_bytes].to_vec();
731
732    match revision {
733        SecurityRevision::R2 => rc4(&base_key, o_entry),
734        SecurityRevision::R3 | SecurityRevision::R4 => {
735            let mut buffer = o_entry.to_vec();
736            for i in (0u8..=19).rev() {
737                let key: Vec<u8> = base_key.iter().map(|byte| byte ^ i).collect();
738                buffer = rc4(&key, &buffer);
739            }
740            buffer
741        }
742        SecurityRevision::R5 | SecurityRevision::R6 => {
743            unreachable!("V=5 takes open_v5; Algorithm 7 is not applicable to R=5 / R=6")
744        }
745    }
746}
747
748fn authenticate_user_password(
749    file_key: &[u8],
750    revision: SecurityRevision,
751    u_entry: &[u8],
752    id_first: &[u8],
753) -> bool {
754    match revision {
755        SecurityRevision::R2 => {
756            // Algorithm 4: encrypt the password padding with the file key; the
757            // full 32 bytes must equal /U.
758            let encrypted = rc4(file_key, &PASSWORD_PADDING);
759            encrypted == u_entry
760        }
761        SecurityRevision::R5 | SecurityRevision::R6 => {
762            unreachable!("V=5 takes open_v5; Algorithm 5 is not applicable to R=5 / R=6")
763        }
764        SecurityRevision::R3 | SecurityRevision::R4 => {
765            // Algorithm 5.
766            let mut hasher = Md5::new();
767            hasher.update(PASSWORD_PADDING);
768            hasher.update(id_first);
769            let seed = hasher.finalize();
770            let mut buffer = rc4(file_key, &seed);
771            for i in 1u8..=19 {
772                let key: Vec<u8> = file_key.iter().map(|byte| byte ^ i).collect();
773                buffer = rc4(&key, &buffer);
774            }
775            // The first 16 bytes of /U must match the buffer; the remaining
776            // 16 bytes are arbitrary padding.
777            buffer.as_slice() == &u_entry[..16]
778        }
779    }
780}
781
782fn md5_bytes(input: &[u8]) -> [u8; 16] {
783    let mut hasher = Md5::new();
784    hasher.update(input);
785    hasher.finalize().into()
786}
787
788fn rc4(key: &[u8], data: &[u8]) -> Vec<u8> {
789    let mut s: [u8; 256] = [0; 256];
790    for (index, value) in s.iter_mut().enumerate() {
791        *value = index as u8;
792    }
793    let mut j: u8 = 0;
794    for i in 0..256 {
795        j = j.wrapping_add(s[i]).wrapping_add(key[i % key.len()]);
796        s.swap(i, j as usize);
797    }
798    let mut output = Vec::with_capacity(data.len());
799    let mut i: u8 = 0;
800    let mut j: u8 = 0;
801    for &byte in data {
802        i = i.wrapping_add(1);
803        j = j.wrapping_add(s[i as usize]);
804        s.swap(i as usize, j as usize);
805        let k = s[(s[i as usize].wrapping_add(s[j as usize])) as usize];
806        output.push(byte ^ k);
807    }
808    output
809}
810
811#[cfg(test)]
812pub(crate) mod test_helpers {
813    //! Expose the low-level primitives so parser tests can build a tiny
814    //! encrypted PDF end-to-end — pick an arbitrary `/O`, derive a file key
815    //! from the empty password, encrypt each object's data with per-object
816    //! RC4, and then round-trip it through `parse_pdf`.
817
818    use super::*;
819
820    pub fn rc4(key: &[u8], data: &[u8]) -> Vec<u8> {
821        super::rc4(key, data)
822    }
823
824    pub fn compute_file_key(
825        password: &[u8],
826        o_entry: &[u8],
827        permissions: i32,
828        id_first: &[u8],
829        key_length_bytes: usize,
830    ) -> Vec<u8> {
831        // Callers that do not care about the revision use the R=3 variant,
832        // which matches the write side of the existing RC4 fixtures.
833        super::compute_file_key(
834            password,
835            o_entry,
836            permissions,
837            id_first,
838            key_length_bytes,
839            SecurityRevision::R3,
840            true,
841        )
842    }
843
844    pub fn compute_file_key_with_revision(
845        password: &[u8],
846        o_entry: &[u8],
847        permissions: i32,
848        id_first: &[u8],
849        key_length_bytes: usize,
850        revision: SecurityRevision,
851    ) -> Vec<u8> {
852        super::compute_file_key(
853            password,
854            o_entry,
855            permissions,
856            id_first,
857            key_length_bytes,
858            revision,
859            true,
860        )
861    }
862
863    /// R=4 variant of the file-key derivation, exposed so AES-128 test
864    /// fixtures can build a matching file key and `/U` entry. Mirrors
865    /// [`compute_file_key`] but honours `encrypt_metadata` so the
866    /// Algorithm 2 step-5 branch (append 0xFFFFFFFF) can be exercised.
867    pub fn compute_file_key_r4(
868        password: &[u8],
869        o_entry: &[u8],
870        permissions: i32,
871        id_first: &[u8],
872        encrypt_metadata: bool,
873    ) -> Vec<u8> {
874        super::compute_file_key(
875            password,
876            o_entry,
877            permissions,
878            id_first,
879            16,
880            SecurityRevision::R4,
881            encrypt_metadata,
882        )
883    }
884
885    /// Produce the 32-byte `/U` value that corresponds to the empty user
886    /// password under revision 3. The first 16 bytes are the RC4 output
887    /// from algorithm 5; the remaining 16 bytes are arbitrary padding
888    /// (here zeroed, which real writers often do).
889    pub fn compute_u_r3(file_key: &[u8], id_first: &[u8]) -> Vec<u8> {
890        let mut hasher = Md5::new();
891        hasher.update(PASSWORD_PADDING);
892        hasher.update(id_first);
893        let seed = hasher.finalize();
894        let mut buffer = super::rc4(file_key, &seed);
895        for i in 1u8..=19 {
896            let key: Vec<u8> = file_key.iter().map(|byte| byte ^ i).collect();
897            buffer = super::rc4(&key, &buffer);
898        }
899        buffer.resize(32, 0);
900        buffer
901    }
902
903    /// Build the `/O` value for the Encrypt dictionary, given the owner
904    /// and user passwords and the security revision. Algorithm 3 — the
905    /// write-side inverse of Algorithm 7, used by tests to construct
906    /// synthetic encrypted PDFs with both owner and user passwords
907    /// populated.
908    pub fn compute_o(
909        owner_password: &[u8],
910        user_password: &[u8],
911        revision: SecurityRevision,
912        key_length_bytes: usize,
913    ) -> Vec<u8> {
914        let padded_owner = pad_password(owner_password);
915        let mut hasher = Md5::new();
916        hasher.update(padded_owner);
917        let mut digest = hasher.finalize_reset();
918        if matches!(revision, SecurityRevision::R3 | SecurityRevision::R4) {
919            for _ in 0..50 {
920                hasher.update(&digest[..key_length_bytes]);
921                digest = hasher.finalize_reset();
922            }
923        }
924        let base_key = digest[..key_length_bytes].to_vec();
925
926        let padded_user = pad_password(user_password);
927        match revision {
928            SecurityRevision::R2 => super::rc4(&base_key, &padded_user),
929            SecurityRevision::R3 | SecurityRevision::R4 => {
930                let mut buffer = super::rc4(&base_key, &padded_user);
931                for i in 1u8..=19 {
932                    let key: Vec<u8> = base_key.iter().map(|byte| byte ^ i).collect();
933                    buffer = super::rc4(&key, &buffer);
934                }
935                buffer
936            }
937            SecurityRevision::R5 | SecurityRevision::R6 => {
938                panic!("compute_o is not applicable to V=5 — use compute_v5_u / compute_v5_o")
939            }
940        }
941    }
942
943    /// Build the per-object RC4 key in exactly the same way the handler
944    /// does, so tests can encrypt a known plaintext and then check that
945    /// the parser's decryption path inverts the transform.
946    pub fn object_key(file_key: &[u8], object_number: u32, generation: u16) -> Vec<u8> {
947        let mut material = Vec::with_capacity(file_key.len() + 5);
948        material.extend_from_slice(file_key);
949        let num = object_number.to_le_bytes();
950        material.push(num[0]);
951        material.push(num[1]);
952        material.push(num[2]);
953        let gen_bytes = generation.to_le_bytes();
954        material.push(gen_bytes[0]);
955        material.push(gen_bytes[1]);
956        let digest = super::md5_bytes(&material);
957        let truncated_len = (file_key.len() + 5).min(16);
958        digest[..truncated_len].to_vec()
959    }
960
961    /// AES variant of [`object_key`]: appends the literal `sAlT` suffix
962    /// before the MD5 so the V=4 /AESV2 path derives a distinct key
963    /// from the RC4 path for the same indirect object.
964    pub fn object_key_aes(file_key: &[u8], object_number: u32, generation: u16) -> Vec<u8> {
965        let mut material = Vec::with_capacity(file_key.len() + 9);
966        material.extend_from_slice(file_key);
967        let num = object_number.to_le_bytes();
968        material.push(num[0]);
969        material.push(num[1]);
970        material.push(num[2]);
971        let gen_bytes = generation.to_le_bytes();
972        material.push(gen_bytes[0]);
973        material.push(gen_bytes[1]);
974        material.extend_from_slice(b"sAlT");
975        let digest = super::md5_bytes(&material);
976        let truncated_len = (file_key.len() + 5).min(16);
977        digest[..truncated_len].to_vec()
978    }
979
980    /// Compute the 48-byte V=5 `/U` entry plus the 32-byte `/UE` entry
981    /// given the user password, 8-byte validation salt, 8-byte key salt,
982    /// and 32-byte file key. Only used by tests to build synthetic V=5
983    /// fixtures.
984    pub fn compute_v5_u_and_ue(
985        user_password: &[u8],
986        validation_salt: &[u8; 8],
987        key_salt: &[u8; 8],
988        file_key: &[u8; 32],
989        revision: SecurityRevision,
990    ) -> (Vec<u8>, Vec<u8>) {
991        let verifier = super::pdf_2_b_hash(user_password, validation_salt, None, revision);
992        let mut u = Vec::with_capacity(48);
993        u.extend_from_slice(&verifier[..32]);
994        u.extend_from_slice(validation_salt);
995        u.extend_from_slice(key_salt);
996
997        let intermediate = super::pdf_2_b_hash(user_password, key_salt, None, revision);
998        let ue = aes_256_cbc_encrypt_no_pad(&intermediate[..32], &[0u8; 16], file_key);
999        (u, ue)
1000    }
1001
1002    /// Compute the 48-byte V=5 `/O` entry plus the 32-byte `/OE` entry
1003    /// given the owner password, 8-byte validation salt, 8-byte key salt,
1004    /// the 48-byte `/U` vector (typically the user hash + salts), and the
1005    /// 32-byte file key. Only used by tests to build synthetic V=5
1006    /// fixtures.
1007    pub fn compute_v5_o_and_oe(
1008        owner_password: &[u8],
1009        validation_salt: &[u8; 8],
1010        key_salt: &[u8; 8],
1011        u_vector: &[u8; 48],
1012        file_key: &[u8; 32],
1013        revision: SecurityRevision,
1014    ) -> (Vec<u8>, Vec<u8>) {
1015        let verifier =
1016            super::pdf_2_b_hash(owner_password, validation_salt, Some(u_vector), revision);
1017        let mut o = Vec::with_capacity(48);
1018        o.extend_from_slice(&verifier[..32]);
1019        o.extend_from_slice(validation_salt);
1020        o.extend_from_slice(key_salt);
1021
1022        let intermediate = super::pdf_2_b_hash(owner_password, key_salt, Some(u_vector), revision);
1023        let oe = aes_256_cbc_encrypt_no_pad(&intermediate[..32], &[0u8; 16], file_key);
1024        (o, oe)
1025    }
1026
1027    /// AES-256-CBC encrypt used by V=5 content streams and strings. The
1028    /// ciphertext is prefixed with the 16-byte IV and PKCS#7-padded to a
1029    /// 16-byte block boundary — this matches exactly what the parser's
1030    /// decryption path expects.
1031    pub fn aes_256_cbc_encrypt(key: &[u8], iv: &[u8; 16], plaintext: &[u8]) -> Vec<u8> {
1032        assert_eq!(key.len(), 32, "AES-256 key must be 32 bytes");
1033        let cipher = Aes256::new_from_slice(key).expect("key length validated");
1034        let pad_len = 16 - (plaintext.len() % 16);
1035        let mut padded = Vec::with_capacity(plaintext.len() + pad_len);
1036        padded.extend_from_slice(plaintext);
1037        padded.extend(std::iter::repeat_n(pad_len as u8, pad_len));
1038        let mut output = Vec::with_capacity(16 + padded.len());
1039        output.extend_from_slice(iv);
1040        let mut prev: [u8; 16] = *iv;
1041        for chunk in padded.chunks(16) {
1042            let mut buf = [0u8; 16];
1043            for ((b, plain), iv_byte) in buf.iter_mut().zip(chunk.iter()).zip(prev.iter()) {
1044                *b = plain ^ iv_byte;
1045            }
1046            let mut block = GenericArray::clone_from_slice(&buf);
1047            cipher.encrypt_block(&mut block);
1048            output.extend_from_slice(block.as_slice());
1049            prev.copy_from_slice(block.as_slice());
1050        }
1051        output
1052    }
1053
1054    fn aes_256_cbc_encrypt_no_pad(key: &[u8], iv: &[u8; 16], data: &[u8]) -> Vec<u8> {
1055        assert_eq!(key.len(), 32, "AES-256 key must be 32 bytes");
1056        assert!(data.len() % 16 == 0, "plaintext must be block-aligned");
1057        let cipher = Aes256::new_from_slice(key).expect("key length validated");
1058        let mut output = Vec::with_capacity(data.len());
1059        let mut prev: [u8; 16] = *iv;
1060        for chunk in data.chunks(16) {
1061            let mut buf = [0u8; 16];
1062            for ((b, plain), iv_byte) in buf.iter_mut().zip(chunk.iter()).zip(prev.iter()) {
1063                *b = plain ^ iv_byte;
1064            }
1065            let mut block = GenericArray::clone_from_slice(&buf);
1066            cipher.encrypt_block(&mut block);
1067            output.extend_from_slice(block.as_slice());
1068            prev.copy_from_slice(block.as_slice());
1069        }
1070        output
1071    }
1072
1073    /// Encrypt `plaintext` with AES-128-CBC, PKCS#7-padded, and prefix
1074    /// the 16-byte IV — matching exactly what the parser's decryption
1075    /// path expects. Used by tests to build synthetic V=4 fixtures.
1076    pub fn aes_128_cbc_encrypt(key: &[u8], iv: &[u8; 16], plaintext: &[u8]) -> Vec<u8> {
1077        use aes::cipher::BlockEncrypt;
1078
1079        assert_eq!(key.len(), 16, "AES-128 key must be 16 bytes");
1080        let cipher = Aes128::new_from_slice(key).expect("key length validated");
1081
1082        // Pad with PKCS#7, always appending at least one byte of padding.
1083        let pad_len = 16 - (plaintext.len() % 16);
1084        let mut padded = Vec::with_capacity(plaintext.len() + pad_len);
1085        padded.extend_from_slice(plaintext);
1086        padded.extend(std::iter::repeat_n(pad_len as u8, pad_len));
1087
1088        let mut output = Vec::with_capacity(16 + padded.len());
1089        output.extend_from_slice(iv);
1090        let mut prev: [u8; 16] = *iv;
1091        for chunk in padded.chunks(16) {
1092            let mut block = [0u8; 16];
1093            for ((b, plain), iv_byte) in block.iter_mut().zip(chunk.iter()).zip(prev.iter()) {
1094                *b = plain ^ iv_byte;
1095            }
1096            let mut arr = GenericArray::clone_from_slice(&block);
1097            cipher.encrypt_block(&mut arr);
1098            output.extend_from_slice(arr.as_slice());
1099            prev.copy_from_slice(arr.as_slice());
1100        }
1101        output
1102    }
1103}
1104
1105#[cfg(test)]
1106mod tests {
1107    use super::*;
1108
1109    #[test]
1110    fn rc4_empty_input_returns_empty() {
1111        assert_eq!(rc4(b"Key", b""), Vec::<u8>::new());
1112    }
1113
1114    #[test]
1115    fn rc4_matches_known_vector() {
1116        // RFC 6229 test vector: key "Key", data "Plaintext".
1117        let key = b"Key";
1118        let plaintext = b"Plaintext";
1119        let encrypted = rc4(key, plaintext);
1120        // Decrypting with the same keystream yields the original bytes.
1121        let decrypted = rc4(key, &encrypted);
1122        assert_eq!(decrypted, plaintext);
1123        // The ciphertext should match the well-known RFC 6229 output.
1124        assert_eq!(
1125            encrypted,
1126            [0xBB, 0xF3, 0x16, 0xE8, 0xD9, 0x40, 0xAF, 0x0A, 0xD3]
1127        );
1128    }
1129
1130    #[test]
1131    fn pad_password_short_pads_with_padding_string() {
1132        let padded = pad_password(b"ab");
1133        assert_eq!(padded[0], b'a');
1134        assert_eq!(padded[1], b'b');
1135        assert_eq!(padded[2], PASSWORD_PADDING[0]);
1136        assert_eq!(padded[31], PASSWORD_PADDING[29]);
1137    }
1138
1139    #[test]
1140    fn pad_password_truncates_to_32_bytes() {
1141        let long = vec![b'x'; 64];
1142        let padded = pad_password(&long);
1143        assert_eq!(padded, [b'x'; 32]);
1144    }
1145
1146    fn build_encrypt_dict_r3(
1147        o_entry: Vec<u8>,
1148        u_entry: Vec<u8>,
1149        permissions: i32,
1150    ) -> PdfDictionary {
1151        let mut dict = PdfDictionary::default();
1152        dict.insert("Filter".to_string(), PdfValue::Name("Standard".to_string()));
1153        dict.insert("V".to_string(), PdfValue::Integer(2));
1154        dict.insert("R".to_string(), PdfValue::Integer(3));
1155        dict.insert("Length".to_string(), PdfValue::Integer(128));
1156        dict.insert(
1157            "O".to_string(),
1158            PdfValue::String(crate::types::PdfString(o_entry)),
1159        );
1160        dict.insert(
1161            "U".to_string(),
1162            PdfValue::String(crate::types::PdfString(u_entry)),
1163        );
1164        dict.insert("P".to_string(), PdfValue::Integer(permissions as i64));
1165        dict
1166    }
1167
1168    fn build_r3_handler_inputs(
1169        user_password: &[u8],
1170        owner_password: &[u8],
1171        id_first: &[u8],
1172    ) -> (PdfDictionary, Vec<u8>) {
1173        let key_length_bytes = 16;
1174        let permissions: i32 = -4;
1175        let o = test_helpers::compute_o(
1176            owner_password,
1177            user_password,
1178            SecurityRevision::R3,
1179            key_length_bytes,
1180        );
1181        let file_key = test_helpers::compute_file_key(
1182            user_password,
1183            &o,
1184            permissions,
1185            id_first,
1186            key_length_bytes,
1187        );
1188        let u = test_helpers::compute_u_r3(&file_key, id_first);
1189        (build_encrypt_dict_r3(o, u, permissions), file_key)
1190    }
1191
1192    #[test]
1193    fn open_authenticates_user_password() {
1194        let id_first = b"synthetic-id-0123456789abcdef";
1195        let (dict, expected_file_key) = build_r3_handler_inputs(b"userpw", b"ownerpw", id_first);
1196        let handler = StandardSecurityHandler::open(&dict, id_first, b"userpw")
1197            .expect("open succeeds")
1198            .expect("user password authenticates");
1199        assert_eq!(handler.file_key, expected_file_key);
1200    }
1201
1202    #[test]
1203    fn open_authenticates_owner_password() {
1204        let id_first = b"synthetic-id-0123456789abcdef";
1205        let (dict, expected_file_key) = build_r3_handler_inputs(b"userpw", b"ownerpw", id_first);
1206        let handler = StandardSecurityHandler::open(&dict, id_first, b"ownerpw")
1207            .expect("open succeeds")
1208            .expect("owner password authenticates");
1209        // File key must match the one derived from the user password — the
1210        // owner password is only a way of recovering it.
1211        assert_eq!(handler.file_key, expected_file_key);
1212    }
1213
1214    #[test]
1215    fn open_rejects_wrong_password() {
1216        let id_first = b"synthetic-id-0123456789abcdef";
1217        let (dict, _) = build_r3_handler_inputs(b"userpw", b"ownerpw", id_first);
1218        let result = StandardSecurityHandler::open(&dict, id_first, b"wrongpw")
1219            .expect("open does not fail, only reports authentication");
1220        assert!(result.is_none());
1221    }
1222
1223    #[test]
1224    fn open_accepts_utf8_password() {
1225        let id_first = b"synthetic-id-0123456789abcdef";
1226        let password = "pässwörd".as_bytes();
1227        let (dict, _) = build_r3_handler_inputs(password, b"ownerpw", id_first);
1228        let handler = StandardSecurityHandler::open(&dict, id_first, password)
1229            .expect("open succeeds")
1230            .expect("UTF-8 password authenticates");
1231        assert_eq!(handler.file_key.len(), 16);
1232    }
1233
1234    fn build_encrypt_dict_v4_aesv2(
1235        o_entry: Vec<u8>,
1236        u_entry: Vec<u8>,
1237        permissions: i32,
1238        encrypt_metadata: Option<bool>,
1239    ) -> PdfDictionary {
1240        let mut std_cf = PdfDictionary::default();
1241        std_cf.insert("CFM".to_string(), PdfValue::Name("AESV2".to_string()));
1242        std_cf.insert("Length".to_string(), PdfValue::Integer(16));
1243        std_cf.insert(
1244            "AuthEvent".to_string(),
1245            PdfValue::Name("DocOpen".to_string()),
1246        );
1247
1248        let mut cf = PdfDictionary::default();
1249        cf.insert("StdCF".to_string(), PdfValue::Dictionary(std_cf));
1250
1251        let mut dict = PdfDictionary::default();
1252        dict.insert("Filter".to_string(), PdfValue::Name("Standard".to_string()));
1253        dict.insert("V".to_string(), PdfValue::Integer(4));
1254        dict.insert("R".to_string(), PdfValue::Integer(4));
1255        dict.insert("Length".to_string(), PdfValue::Integer(128));
1256        dict.insert("CF".to_string(), PdfValue::Dictionary(cf));
1257        dict.insert("StmF".to_string(), PdfValue::Name("StdCF".to_string()));
1258        dict.insert("StrF".to_string(), PdfValue::Name("StdCF".to_string()));
1259        dict.insert(
1260            "O".to_string(),
1261            PdfValue::String(crate::types::PdfString(o_entry)),
1262        );
1263        dict.insert(
1264            "U".to_string(),
1265            PdfValue::String(crate::types::PdfString(u_entry)),
1266        );
1267        dict.insert("P".to_string(), PdfValue::Integer(permissions as i64));
1268        if let Some(value) = encrypt_metadata {
1269            dict.insert("EncryptMetadata".to_string(), PdfValue::Bool(value));
1270        }
1271        dict
1272    }
1273
1274    fn build_v4_handler_inputs(
1275        user_password: &[u8],
1276        owner_password: &[u8],
1277        id_first: &[u8],
1278        encrypt_metadata: Option<bool>,
1279    ) -> (PdfDictionary, Vec<u8>) {
1280        let permissions: i32 = -4;
1281        let o = test_helpers::compute_o(owner_password, user_password, SecurityRevision::R4, 16);
1282        let file_key = test_helpers::compute_file_key_r4(
1283            user_password,
1284            &o,
1285            permissions,
1286            id_first,
1287            encrypt_metadata.unwrap_or(true),
1288        );
1289        let u = test_helpers::compute_u_r3(&file_key, id_first);
1290        (
1291            build_encrypt_dict_v4_aesv2(o, u, permissions, encrypt_metadata),
1292            file_key,
1293        )
1294    }
1295
1296    #[test]
1297    fn open_v4_aesv2_handler_authenticates_user_password() {
1298        let id_first = b"v4-synthetic-id-0123456789";
1299        let (dict, expected_file_key) =
1300            build_v4_handler_inputs(b"userpw", b"ownerpw", id_first, None);
1301        let handler = StandardSecurityHandler::open(&dict, id_first, b"userpw")
1302            .expect("open succeeds")
1303            .expect("user password authenticates on V=4");
1304        assert_eq!(handler.file_key, expected_file_key);
1305        assert_eq!(handler.string_method, CryptMethod::AesV2);
1306        assert_eq!(handler.stream_method, CryptMethod::AesV2);
1307        assert!(handler.encrypt_metadata);
1308    }
1309
1310    #[test]
1311    fn open_v4_aesv2_handler_authenticates_owner_password() {
1312        let id_first = b"v4-synthetic-id-0123456789";
1313        let (dict, expected_file_key) =
1314            build_v4_handler_inputs(b"userpw", b"ownerpw", id_first, None);
1315        let handler = StandardSecurityHandler::open(&dict, id_first, b"ownerpw")
1316            .expect("open succeeds")
1317            .expect("owner password authenticates on V=4");
1318        assert_eq!(handler.file_key, expected_file_key);
1319    }
1320
1321    #[test]
1322    fn open_v4_honours_encrypt_metadata_false() {
1323        let id_first = b"v4-metadata-id";
1324        let (dict, _) = build_v4_handler_inputs(b"", b"ownerpw", id_first, Some(false));
1325        let handler = StandardSecurityHandler::open(&dict, id_first, b"")
1326            .expect("open succeeds")
1327            .expect("empty password authenticates");
1328        assert!(!handler.encrypts_metadata());
1329    }
1330
1331    #[test]
1332    fn open_v4_identity_crypt_filter_is_passthrough() {
1333        let id_first = b"v4-identity-id";
1334        let (dict_v4, _) = build_v4_handler_inputs(b"", b"ownerpw", id_first, None);
1335        let mut dict = dict_v4;
1336        dict.insert("StrF".to_string(), PdfValue::Name("Identity".to_string()));
1337        dict.insert("StmF".to_string(), PdfValue::Name("Identity".to_string()));
1338
1339        let handler = StandardSecurityHandler::open(&dict, id_first, b"")
1340            .expect("open succeeds")
1341            .expect("empty password authenticates");
1342        assert_eq!(handler.string_method, CryptMethod::Identity);
1343        assert_eq!(handler.stream_method, CryptMethod::Identity);
1344
1345        let ciphertext = b"hello";
1346        let plaintext = handler
1347            .decrypt_bytes(ciphertext, ObjectRef::new(4, 0), BytesKind::Stream)
1348            .expect("identity passes bytes through");
1349        assert_eq!(plaintext, ciphertext);
1350    }
1351
1352    #[test]
1353    fn open_v4_rejects_unsupported_cfm() {
1354        let id_first = b"v4-unsupported-id";
1355
1356        let (dict_v4, _) = build_v4_handler_inputs(b"", b"ownerpw", id_first, None);
1357        let mut dict = dict_v4;
1358        let mut std_cf = PdfDictionary::default();
1359        // `AESV4` is not defined in any PDF version — it should be
1360        // rejected as unsupported. We previously tested /AESV3 here, but
1361        // V=5 has since landed, so /AESV3 is now a legal method name (it
1362        // just happens to be spec-invalid under V=4).
1363        std_cf.insert("CFM".to_string(), PdfValue::Name("AESV4".to_string()));
1364        std_cf.insert("Length".to_string(), PdfValue::Integer(32));
1365        let mut cf = PdfDictionary::default();
1366        cf.insert("StdCF".to_string(), PdfValue::Dictionary(std_cf));
1367        dict.insert("CF".to_string(), PdfValue::Dictionary(cf));
1368
1369        let error = StandardSecurityHandler::open(&dict, id_first, b"")
1370            .expect_err("unknown CFM must be rejected as unsupported");
1371        assert!(matches!(error, PdfError::Unsupported(_)), "got {error:?}");
1372    }
1373
1374    #[test]
1375    fn aes_128_cbc_round_trip() {
1376        let key = [0x11u8; 16];
1377        let iv = [0x22u8; 16];
1378        let plaintext = b"redact me, please";
1379        let ciphertext = test_helpers::aes_128_cbc_encrypt(&key, &iv, plaintext);
1380        let decrypted = aes_128_cbc_decrypt(&key, &ciphertext).expect("round trip succeeds");
1381        assert_eq!(decrypted, plaintext);
1382    }
1383
1384    #[test]
1385    fn aes_128_cbc_rejects_bad_pkcs7_padding() {
1386        let key = [0x11u8; 16];
1387        let iv = [0x22u8; 16];
1388        let plaintext = b"abcdef";
1389        let mut ciphertext = test_helpers::aes_128_cbc_encrypt(&key, &iv, plaintext);
1390        // Flip the last ciphertext byte so the plaintext padding becomes
1391        // invalid (with high probability) after decryption.
1392        let last = ciphertext.len() - 1;
1393        ciphertext[last] ^= 0xFF;
1394        let error =
1395            aes_128_cbc_decrypt(&key, &ciphertext).expect_err("corrupted padding must be rejected");
1396        assert!(matches!(error, PdfError::Corrupt(_)), "got {error:?}");
1397    }
1398
1399    #[test]
1400    fn aes_128_cbc_rejects_short_ciphertext() {
1401        let key = [0x11u8; 16];
1402        let error = aes_128_cbc_decrypt(&key, &[0u8; 16])
1403            .expect_err("ciphertext shorter than IV+1 block must be rejected");
1404        assert!(matches!(error, PdfError::Corrupt(_)), "got {error:?}");
1405    }
1406
1407    fn build_encrypt_dict_v5_aesv3(
1408        o: Vec<u8>,
1409        u: Vec<u8>,
1410        oe: Vec<u8>,
1411        ue: Vec<u8>,
1412        permissions: i32,
1413        perms: Option<Vec<u8>>,
1414        revision: SecurityRevision,
1415    ) -> PdfDictionary {
1416        let mut std_cf = PdfDictionary::default();
1417        std_cf.insert("CFM".to_string(), PdfValue::Name("AESV3".to_string()));
1418        std_cf.insert("Length".to_string(), PdfValue::Integer(32));
1419        std_cf.insert(
1420            "AuthEvent".to_string(),
1421            PdfValue::Name("DocOpen".to_string()),
1422        );
1423
1424        let mut cf = PdfDictionary::default();
1425        cf.insert("StdCF".to_string(), PdfValue::Dictionary(std_cf));
1426
1427        let r_value = match revision {
1428            SecurityRevision::R5 => 5,
1429            SecurityRevision::R6 => 6,
1430            _ => panic!("test helper only supports R5 / R6"),
1431        };
1432
1433        let mut dict = PdfDictionary::default();
1434        dict.insert("Filter".to_string(), PdfValue::Name("Standard".to_string()));
1435        dict.insert("V".to_string(), PdfValue::Integer(5));
1436        dict.insert("R".to_string(), PdfValue::Integer(r_value));
1437        dict.insert("Length".to_string(), PdfValue::Integer(256));
1438        dict.insert("CF".to_string(), PdfValue::Dictionary(cf));
1439        dict.insert("StmF".to_string(), PdfValue::Name("StdCF".to_string()));
1440        dict.insert("StrF".to_string(), PdfValue::Name("StdCF".to_string()));
1441        dict.insert(
1442            "O".to_string(),
1443            PdfValue::String(crate::types::PdfString(o)),
1444        );
1445        dict.insert(
1446            "U".to_string(),
1447            PdfValue::String(crate::types::PdfString(u)),
1448        );
1449        dict.insert(
1450            "OE".to_string(),
1451            PdfValue::String(crate::types::PdfString(oe)),
1452        );
1453        dict.insert(
1454            "UE".to_string(),
1455            PdfValue::String(crate::types::PdfString(ue)),
1456        );
1457        dict.insert("P".to_string(), PdfValue::Integer(permissions as i64));
1458        if let Some(value) = perms {
1459            dict.insert(
1460                "Perms".to_string(),
1461                PdfValue::String(crate::types::PdfString(value)),
1462            );
1463        }
1464        dict
1465    }
1466
1467    fn build_v5_handler_inputs(
1468        user_password: &[u8],
1469        owner_password: &[u8],
1470        revision: SecurityRevision,
1471    ) -> (PdfDictionary, [u8; 32]) {
1472        let file_key = [0x13u8; 32];
1473        let u_validation_salt = [0xAAu8; 8];
1474        let u_key_salt = [0xBBu8; 8];
1475        let o_validation_salt = [0xCCu8; 8];
1476        let o_key_salt = [0xDDu8; 8];
1477
1478        let (u, ue) = test_helpers::compute_v5_u_and_ue(
1479            user_password,
1480            &u_validation_salt,
1481            &u_key_salt,
1482            &file_key,
1483            revision,
1484        );
1485        let u_vector: [u8; 48] = u.as_slice().try_into().expect("U is 48 bytes");
1486        let (o, oe) = test_helpers::compute_v5_o_and_oe(
1487            owner_password,
1488            &o_validation_salt,
1489            &o_key_salt,
1490            &u_vector,
1491            &file_key,
1492            revision,
1493        );
1494
1495        (
1496            build_encrypt_dict_v5_aesv3(o, u, oe, ue, -4, None, revision),
1497            file_key,
1498        )
1499    }
1500
1501    #[test]
1502    fn open_v5_r6_authenticates_user_password() {
1503        let (dict, expected_file_key) =
1504            build_v5_handler_inputs(b"userpw", b"ownerpw", SecurityRevision::R6);
1505        let handler = StandardSecurityHandler::open(&dict, b"", b"userpw")
1506            .expect("open succeeds")
1507            .expect("user password authenticates on V=5 / R=6");
1508        assert_eq!(handler.file_key, expected_file_key);
1509        assert_eq!(handler.string_method, CryptMethod::AesV3);
1510        assert_eq!(handler.stream_method, CryptMethod::AesV3);
1511    }
1512
1513    #[test]
1514    fn open_v5_r6_authenticates_owner_password() {
1515        let (dict, expected_file_key) =
1516            build_v5_handler_inputs(b"userpw", b"ownerpw", SecurityRevision::R6);
1517        let handler = StandardSecurityHandler::open(&dict, b"", b"ownerpw")
1518            .expect("open succeeds")
1519            .expect("owner password authenticates on V=5 / R=6");
1520        assert_eq!(handler.file_key, expected_file_key);
1521    }
1522
1523    #[test]
1524    fn open_v5_r6_rejects_wrong_password() {
1525        let (dict, _) = build_v5_handler_inputs(b"userpw", b"ownerpw", SecurityRevision::R6);
1526        let result = StandardSecurityHandler::open(&dict, b"", b"wrongpw")
1527            .expect("open does not fail, only reports authentication");
1528        assert!(result.is_none());
1529    }
1530
1531    #[test]
1532    fn open_v5_r5_authenticates_user_password() {
1533        let (dict, expected_file_key) =
1534            build_v5_handler_inputs(b"userpw", b"ownerpw", SecurityRevision::R5);
1535        let handler = StandardSecurityHandler::open(&dict, b"", b"userpw")
1536            .expect("open succeeds")
1537            .expect("user password authenticates on V=5 / R=5");
1538        assert_eq!(handler.file_key, expected_file_key);
1539    }
1540
1541    #[test]
1542    fn open_v5_r5_empty_password_authenticates() {
1543        let (dict, _) = build_v5_handler_inputs(b"", b"ownerpw", SecurityRevision::R5);
1544        let handler = StandardSecurityHandler::open(&dict, b"", b"")
1545            .expect("open succeeds")
1546            .expect("empty password authenticates on V=5 / R=5");
1547        assert_eq!(handler.string_method, CryptMethod::AesV3);
1548    }
1549
1550    #[test]
1551    fn aes_256_cbc_round_trip_through_handler() {
1552        let key = [0x13u8; 32];
1553        let iv = [0x77u8; 16];
1554        let plaintext = b"top secret V=5 content";
1555        let ciphertext = test_helpers::aes_256_cbc_encrypt(&key, &iv, plaintext);
1556        let decrypted = aes_256_cbc_decrypt(&key, &ciphertext).expect("round trip succeeds");
1557        assert_eq!(decrypted, plaintext);
1558    }
1559
1560    #[test]
1561    fn open_r2_authenticates_owner_password() {
1562        // Algorithm 4 / 7 divergence from R=3: single RC4 round for /O,
1563        // full 32-byte /U match.
1564        let id_first = b"r2-synthetic-id";
1565        let user_password = b"u2";
1566        let owner_password = b"o2";
1567        let key_length_bytes = 5; // 40-bit key, matching R=2 default.
1568        let permissions: i32 = -4;
1569        let o = test_helpers::compute_o(
1570            owner_password,
1571            user_password,
1572            SecurityRevision::R2,
1573            key_length_bytes,
1574        );
1575        let file_key = test_helpers::compute_file_key_with_revision(
1576            user_password,
1577            &o,
1578            permissions,
1579            id_first,
1580            key_length_bytes,
1581            SecurityRevision::R2,
1582        );
1583        // Algorithm 4: /U is RC4(file_key, PASSWORD_PADDING).
1584        let u = test_helpers::rc4(&file_key, &PASSWORD_PADDING);
1585
1586        let mut dict = PdfDictionary::default();
1587        dict.insert("Filter".to_string(), PdfValue::Name("Standard".to_string()));
1588        dict.insert("V".to_string(), PdfValue::Integer(1));
1589        dict.insert("R".to_string(), PdfValue::Integer(2));
1590        dict.insert("Length".to_string(), PdfValue::Integer(40));
1591        dict.insert(
1592            "O".to_string(),
1593            PdfValue::String(crate::types::PdfString(o)),
1594        );
1595        dict.insert(
1596            "U".to_string(),
1597            PdfValue::String(crate::types::PdfString(u)),
1598        );
1599        dict.insert("P".to_string(), PdfValue::Integer(permissions as i64));
1600
1601        let handler = StandardSecurityHandler::open(&dict, id_first, owner_password)
1602            .expect("open succeeds")
1603            .expect("owner password authenticates on R=2");
1604        assert_eq!(handler.file_key, file_key);
1605    }
1606}