Skip to main content

rpdfium_parser/
security.rs

1// Derived from PDFium's core/fpdfapi/parser/cpdf_security_handler.cpp
2// Original: Copyright 2014 The PDFium Authors
3// Licensed under BSD-3-Clause / Apache-2.0
4// See pdfium-upstream/LICENSE for the original license.
5
6//! PDF Standard Security Handler (R2–R6).
7//!
8//! Implements password verification and object/stream decryption
9//! per the PDF specification's Standard Security Handler.
10
11use std::collections::HashMap;
12
13use rpdfium_core::{Name, PdfSource};
14
15use crate::crypto::{self, CryptoError};
16use crate::object::{Object, ObjectId};
17use crate::store::ObjectStore;
18
19/// Padding string used in password computation (Table 3.18 in PDF spec).
20const PASSWORD_PADDING: [u8; 32] = [
21    0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
22    0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A,
23];
24
25/// Errors from the security handler.
26#[derive(Debug, thiserror::Error)]
27pub enum SecurityError {
28    /// The supplied password does not match owner or user password.
29    #[error("invalid password")]
30    InvalidPassword,
31    /// The encryption version/revision is not supported.
32    #[error("unsupported encryption version: V={0}, R={1}")]
33    UnsupportedVersion(u32, u32),
34    /// A required key is missing from the encryption dictionary.
35    #[error("missing encryption dictionary key: {0}")]
36    MissingKey(String),
37    /// An underlying cryptographic error.
38    #[error("crypto error: {0}")]
39    Crypto(#[from] CryptoError),
40}
41
42/// The method used by a crypt filter.
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum CryptFilterMethod {
45    /// No encryption.
46    None,
47    /// RC4 encryption (V2).
48    V2,
49    /// AES-128 encryption (AESV2).
50    Aesv2,
51    /// AES-256 encryption (AESV3).
52    Aesv3,
53}
54
55/// Typed wrapper for the PDF permission bits stored in the `/P` field of the
56/// encrypt dictionary (PDF 1.7 spec, Table 3.20).
57///
58/// The raw value is a signed 32-bit integer where the upper bits are always 1
59/// per the spec; only bits 3, 5, 8, and 9 carry meaningful permission flags.
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub struct Permissions(i32);
62
63impl Permissions {
64    /// Wrap a raw `/P` integer value.
65    pub fn from_bits(bits: i32) -> Self {
66        Self(bits)
67    }
68
69    /// Return the raw integer value.
70    pub fn bits(self) -> i32 {
71        self.0
72    }
73
74    /// Bit 4: Allow modifying document content.
75    pub fn modify_content(self) -> bool {
76        self.0 & (1 << 3) != 0
77    }
78
79    /// Bit 6: Allow adding, modifying, or deleting annotations.
80    pub fn modify_annotation(self) -> bool {
81        self.0 & (1 << 5) != 0
82    }
83
84    /// Bit 9: Allow filling in form fields and signing.
85    pub fn fill_form(self) -> bool {
86        self.0 & (1 << 8) != 0
87    }
88
89    /// Bit 10: Allow extracting text and graphics for accessibility.
90    pub fn extract_for_accessibility(self) -> bool {
91        self.0 & (1 << 9) != 0
92    }
93}
94
95/// PDF Standard Security Handler — holds the derived encryption key
96/// and methods for decrypting strings and streams.
97pub struct SecurityHandler {
98    revision: u32,
99    key_length_bytes: usize,
100    encryption_key: Vec<u8>,
101    permissions: Permissions,
102    encrypt_metadata: bool,
103    stream_cf: CryptFilterMethod,
104    string_cf: CryptFilterMethod,
105    /// The /O value (owner hash) from the encrypt dictionary.
106    o_value: Vec<u8>,
107    /// The /U value (user hash) from the encrypt dictionary.
108    u_value: Vec<u8>,
109    /// The successfully-verified encoded password (Latin-1 for R2–R4, UTF-8 for R5/R6).
110    /// Corresponds to `m_EncodedPassword` / `GetEncodedPassword()` in PDFium.
111    encoded_password: Vec<u8>,
112}
113
114impl SecurityHandler {
115    /// Build a `SecurityHandler` from the `/Encrypt` dictionary, verifying the
116    /// password against the stored owner/user hashes.
117    ///
118    /// `encrypt_dict` is the resolved encryption dictionary.
119    /// `file_id` is the first element of the trailer `/ID` array.
120    pub fn from_encrypt_dict<S: PdfSource>(
121        encrypt_dict: &HashMap<Name, Object>,
122        store: &ObjectStore<S>,
123        password: &str,
124        file_id: &[u8],
125    ) -> Result<Self, SecurityError> {
126        // Parse required fields
127        let v = get_int(encrypt_dict, store, &Name::v())? as u32;
128        let r = get_int(encrypt_dict, store, &Name::r())? as u32;
129        let p = get_int(encrypt_dict, store, &Name::p())?;
130
131        let o_bytes = get_string_bytes(encrypt_dict, store, &Name::o())?;
132        let u_bytes = get_string_bytes(encrypt_dict, store, &Name::u())?;
133
134        // Key length: /Length in bits, default 40
135        let key_length_bits =
136            get_optional_int(encrypt_dict, store, &Name::length()).unwrap_or(40) as usize;
137        let key_length_bytes = key_length_bits / 8;
138
139        // Validate key length to prevent panics in key derivation.
140        // R2-R4: 40-128 bits (5-16 bytes); R5-R6 always use 256-bit (32 bytes).
141        if r <= 4 && !(5..=16).contains(&key_length_bytes) {
142            return Err(SecurityError::UnsupportedVersion(v, r));
143        }
144
145        // EncryptMetadata: default true
146        let encrypt_metadata =
147            get_optional_bool(encrypt_dict, store, &Name::encrypt_metadata()).unwrap_or(true);
148
149        // Determine crypt filter methods for streams and strings
150        let (stream_cf, string_cf) = if v >= 4 {
151            let stm = parse_single_crypt_filter(encrypt_dict, store, &Name::stm_f());
152            let str_ = parse_single_crypt_filter(encrypt_dict, store, &Name::str_f());
153            (stm, str_)
154        } else if v == 1 || v == 2 || v == 3 {
155            (CryptFilterMethod::V2, CryptFilterMethod::V2)
156        } else {
157            (CryptFilterMethod::None, CryptFilterMethod::None)
158        };
159
160        match (v, r) {
161            (1, 2) | (2, 3) | (3, 3) | (4, 4) => {
162                // R2-R4: MD5-based key derivation
163                let pwd_bytes = encode_password_latin1(password);
164                let key = derive_key_r2_r4(
165                    &pwd_bytes,
166                    &o_bytes,
167                    p as i32,
168                    file_id,
169                    r,
170                    key_length_bytes,
171                    encrypt_metadata,
172                );
173
174                // Try user password first
175                if verify_user_password_r2_r4(&key, &u_bytes, r, file_id) {
176                    return Ok(Self {
177                        revision: r,
178                        key_length_bytes,
179                        encryption_key: key,
180                        permissions: Permissions::from_bits(p as i32),
181                        encrypt_metadata,
182                        stream_cf,
183                        string_cf,
184                        o_value: o_bytes.clone(),
185                        u_value: u_bytes.clone(),
186                        encoded_password: pwd_bytes,
187                    });
188                }
189
190                // Try as owner password: recover user password from O value
191                let user_password =
192                    recover_user_password_from_owner(&pwd_bytes, &o_bytes, r, key_length_bytes);
193                let key = derive_key_r2_r4(
194                    &user_password,
195                    &o_bytes,
196                    p as i32,
197                    file_id,
198                    r,
199                    key_length_bytes,
200                    encrypt_metadata,
201                );
202
203                if verify_user_password_r2_r4(&key, &u_bytes, r, file_id) {
204                    Ok(Self {
205                        revision: r,
206                        key_length_bytes,
207                        encryption_key: key,
208                        permissions: Permissions::from_bits(p as i32),
209                        encrypt_metadata,
210                        stream_cf,
211                        string_cf,
212                        o_value: o_bytes,
213                        u_value: u_bytes,
214                        encoded_password: user_password,
215                    })
216                } else {
217                    Err(SecurityError::InvalidPassword)
218                }
219            }
220            (5, 5) | (5, 6) => {
221                // R5/R6: SHA-256 based (ISO 32000-2)
222                let oe_bytes = get_string_bytes(encrypt_dict, store, &Name::oe())?;
223                let ue_bytes = get_string_bytes(encrypt_dict, store, &Name::ue())?;
224
225                let key = if r == 5 {
226                    derive_key_r5(password, &u_bytes, &ue_bytes, &o_bytes, &oe_bytes)?
227                } else {
228                    derive_key_r6(password, &u_bytes, &ue_bytes, &o_bytes, &oe_bytes)?
229                };
230
231                match key {
232                    Some(k) => {
233                        // P2-33: Validate /Perms entry (R5/R6).
234                        //
235                        // Upstream deviation — /Perms is required:
236                        //
237                        // Upstream PDFium (`AES256_CheckPassword`) returns `false`
238                        // when `/Perms` is missing or empty. We match that behavior
239                        // by requiring the entry to be present and at least 16 bytes.
240                        // The previous implementation silently skipped validation
241                        // when `/Perms` was absent or malformed, which could allow
242                        // an attacker to strip `/Perms` and tamper with the `/P`
243                        // permission bits without detection.
244                        let perms_bytes = get_string_bytes(encrypt_dict, store, &Name::perms())?;
245                        if perms_bytes.len() < 16 {
246                            return Err(SecurityError::MissingKey(
247                                "Perms (too short, need ≥16 bytes)".into(),
248                            ));
249                        }
250                        let decrypted = crypto::aes256_ecb_decrypt_block(&k, &perms_bytes[..16])?;
251                        // Verify bytes 9-11 = "adb" (PDF spec Algorithm 2.A step 2).
252                        // Byte 8 is the EncryptMetadata flag ('T'/'F'), checked
253                        // separately. Upstream PDFium checks only bytes 9-11.
254                        if &decrypted[9..12] != b"adb" {
255                            return Err(SecurityError::InvalidPassword);
256                        }
257                        // Verify bytes 0-3 match /P as LE i32
258                        let perms_p = i32::from_le_bytes([
259                            decrypted[0],
260                            decrypted[1],
261                            decrypted[2],
262                            decrypted[3],
263                        ]);
264                        if perms_p != p as i32 {
265                            return Err(SecurityError::InvalidPassword);
266                        }
267
268                        Ok(Self {
269                            revision: r,
270                            key_length_bytes: 32,
271                            encryption_key: k,
272                            permissions: Permissions::from_bits(p as i32),
273                            encrypt_metadata,
274                            stream_cf: CryptFilterMethod::Aesv3,
275                            string_cf: CryptFilterMethod::Aesv3,
276                            o_value: o_bytes,
277                            u_value: u_bytes,
278                            encoded_password: password.as_bytes().to_vec(),
279                        })
280                    }
281                    None => Err(SecurityError::InvalidPassword),
282                }
283            }
284            _ => Err(SecurityError::UnsupportedVersion(v, r)),
285        }
286    }
287
288    /// Return the document access permissions stored in the `/P` entry.
289    pub fn permissions(&self) -> Permissions {
290        self.permissions
291    }
292
293    /// Upstream-aligned alias for [`Self::permissions()`].
294    ///
295    /// Corresponds to `CPDF_SecurityHandler::GetPermissions()` in PDFium.
296    #[inline]
297    pub fn get_permissions(&self) -> Permissions {
298        self.permissions()
299    }
300
301    /// Returns the successfully-verified encoded password bytes.
302    ///
303    /// For R2–R4, this is the Latin-1 encoded user password (or the recovered
304    /// user password when authenticated as owner). For R5/R6, this is the
305    /// UTF-8 encoded password. Useful for incremental save with the same password.
306    ///
307    /// Corresponds to `CPDF_SecurityHandler::GetEncodedPassword()` in PDFium.
308    pub fn encoded_password(&self) -> &[u8] {
309        &self.encoded_password
310    }
311
312    /// ADR-019 alias for [`encoded_password()`](Self::encoded_password).
313    ///
314    /// Corresponds to `CPDF_SecurityHandler::GetEncodedPassword()` in PDFium.
315    #[inline]
316    pub fn get_encoded_password(&self) -> &[u8] {
317        self.encoded_password()
318    }
319
320    /// Decrypt a string value belonging to the given object.
321    pub fn decrypt_string(&self, data: &[u8], obj_id: ObjectId) -> Vec<u8> {
322        if self.string_cf == CryptFilterMethod::None {
323            return data.to_vec();
324        }
325
326        if self.revision >= 5 {
327            // R5/R6: AES-256 with the file encryption key directly
328            crypto::aes256_cbc_decrypt(&self.encryption_key, data).unwrap_or_else(|_| data.to_vec())
329        } else {
330            let object_key = self.compute_object_key(obj_id, self.string_cf);
331            match self.string_cf {
332                CryptFilterMethod::V2 => {
333                    crypto::rc4_crypt(&object_key, data).unwrap_or_else(|_| data.to_vec())
334                }
335                CryptFilterMethod::Aesv2 => {
336                    crypto::aes128_cbc_decrypt(&object_key, data).unwrap_or_else(|_| data.to_vec())
337                }
338                CryptFilterMethod::Aesv3 => crypto::aes256_cbc_decrypt(&self.encryption_key, data)
339                    .unwrap_or_else(|_| data.to_vec()),
340                CryptFilterMethod::None => data.to_vec(),
341            }
342        }
343    }
344
345    /// Decrypt stream data belonging to the given object.
346    pub fn decrypt_stream(&self, data: &[u8], obj_id: ObjectId) -> Vec<u8> {
347        if self.stream_cf == CryptFilterMethod::None {
348            return data.to_vec();
349        }
350
351        if self.revision >= 5 {
352            crypto::aes256_cbc_decrypt(&self.encryption_key, data).unwrap_or_else(|_| data.to_vec())
353        } else {
354            let object_key = self.compute_object_key(obj_id, self.stream_cf);
355            match self.stream_cf {
356                CryptFilterMethod::V2 => {
357                    crypto::rc4_crypt(&object_key, data).unwrap_or_else(|_| data.to_vec())
358                }
359                CryptFilterMethod::Aesv2 => {
360                    crypto::aes128_cbc_decrypt(&object_key, data).unwrap_or_else(|_| data.to_vec())
361                }
362                CryptFilterMethod::Aesv3 => crypto::aes256_cbc_decrypt(&self.encryption_key, data)
363                    .unwrap_or_else(|_| data.to_vec()),
364                CryptFilterMethod::None => data.to_vec(),
365            }
366        }
367    }
368
369    /// Returns `true` if metadata streams are encrypted.
370    ///
371    /// Corresponds to `CPDF_SecurityHandler::IsMetadataEncrypted()` in PDFium.
372    pub fn is_metadata_encrypted(&self) -> bool {
373        self.encrypt_metadata
374    }
375
376    /// The encryption revision number.
377    pub fn revision(&self) -> u32 {
378        self.revision
379    }
380
381    /// The crypt filter method in use (returns stream filter for backwards compat).
382    pub fn crypt_filter_method(&self) -> CryptFilterMethod {
383        self.stream_cf
384    }
385
386    /// The encryption key length in bytes.
387    pub fn key_length_bytes(&self) -> usize {
388        self.key_length_bytes
389    }
390
391    /// The raw encryption key.
392    pub fn encryption_key(&self) -> &[u8] {
393        &self.encryption_key
394    }
395
396    /// The stream crypt filter method.
397    pub fn stream_crypt_filter(&self) -> CryptFilterMethod {
398        self.stream_cf
399    }
400
401    /// The string crypt filter method.
402    pub fn string_crypt_filter(&self) -> CryptFilterMethod {
403        self.string_cf
404    }
405
406    /// The /O value (owner hash) from the encrypt dictionary.
407    pub fn o_value(&self) -> &[u8] {
408        &self.o_value
409    }
410
411    /// The /U value (user hash) from the encrypt dictionary.
412    pub fn u_value(&self) -> &[u8] {
413        &self.u_value
414    }
415
416    /// The encryption version number (V value).
417    ///
418    /// Derived from the revision: R2→V1, R3/R4→V2..V4, R5/R6→V5.
419    pub fn version(&self) -> u32 {
420        match self.revision {
421            2 => 1,
422            3 | 4 => {
423                if self.stream_cf == CryptFilterMethod::V2
424                    && self.string_cf == CryptFilterMethod::V2
425                {
426                    if self.key_length_bytes == 5 { 1 } else { 2 }
427                } else {
428                    4
429                }
430            }
431            5 | 6 => 5,
432            _ => 0,
433        }
434    }
435
436    /// Encrypt a string value belonging to the given object.
437    ///
438    /// RC4 is symmetric (encrypt = decrypt). For AES, a zero IV is used
439    /// and prepended to the ciphertext (matching PDF convention where the
440    /// first 16 bytes of the encrypted data are the IV).
441    pub fn encrypt_string(&self, data: &[u8], obj_id: ObjectId) -> Vec<u8> {
442        if self.string_cf == CryptFilterMethod::None {
443            return data.to_vec();
444        }
445        self.encrypt_with_method(data, obj_id, self.string_cf)
446    }
447
448    /// Encrypt stream data belonging to the given object.
449    pub fn encrypt_stream(&self, data: &[u8], obj_id: ObjectId) -> Vec<u8> {
450        if self.stream_cf == CryptFilterMethod::None {
451            return data.to_vec();
452        }
453        self.encrypt_with_method(data, obj_id, self.stream_cf)
454    }
455
456    /// Encrypt data with the specified crypt filter method.
457    ///
458    /// # Panics
459    ///
460    /// Panics if AES encryption fails due to an invalid key length.
461    /// Key sizes are validated in [`SecurityHandler::from_encrypt_dict`], so
462    /// this should never happen in normal operation.
463    ///
464    /// # Upstream deviation — error handling
465    ///
466    /// Upstream PDFium (`CPDF_CryptoHandler::EncryptContent`) does not handle
467    /// encryption failure at this level — the C++ code assumes valid key sizes
468    /// and cannot return an error. Our previous implementation silently returned
469    /// plaintext on failure (`Err(_) => data.to_vec()`), which could expose
470    /// confidential data in the output PDF without any warning. We now panic
471    /// instead, since key sizes are validated at construction time and an error
472    /// here indicates a programming bug, not a recoverable condition.
473    fn encrypt_with_method(&self, data: &[u8], obj_id: ObjectId, cf: CryptFilterMethod) -> Vec<u8> {
474        let iv = Self::generate_iv();
475
476        if self.revision >= 5 {
477            let encrypted = crypto::aes256_cbc_encrypt(&self.encryption_key, &iv, data)
478                .expect("AES-256 encryption failed: key size validated at construction");
479            let mut result = iv.to_vec();
480            result.extend_from_slice(&encrypted);
481            result
482        } else {
483            let object_key = self.compute_object_key(obj_id, cf);
484            match cf {
485                CryptFilterMethod::V2 => crypto::rc4_crypt(&object_key, data)
486                    .expect("RC4 encryption failed: key size validated at construction"),
487                CryptFilterMethod::Aesv2 => {
488                    let encrypted = crypto::aes128_cbc_encrypt(&object_key, &iv, data)
489                        .expect("AES-128 encryption failed: key size validated at construction");
490                    let mut result = iv.to_vec();
491                    result.extend_from_slice(&encrypted);
492                    result
493                }
494                CryptFilterMethod::Aesv3 => {
495                    let encrypted = crypto::aes256_cbc_encrypt(&self.encryption_key, &iv, data)
496                        .expect("AES-256 encryption failed: key size validated at construction");
497                    let mut result = iv.to_vec();
498                    result.extend_from_slice(&encrypted);
499                    result
500                }
501                CryptFilterMethod::None => data.to_vec(),
502            }
503        }
504    }
505
506    /// Generate a cryptographically random 16-byte IV for AES encryption.
507    ///
508    /// # Upstream deviation — IV generation
509    ///
510    /// Upstream PDFium uses C `rand()` (seeded from system state) to fill IVs.
511    /// We use `getrandom` (OS CSPRNG) instead for stronger cryptographic
512    /// guarantees: upstream's `rand()` is not a CSPRNG and its output quality
513    /// varies by platform. Using a true CSPRNG ensures IVs are unpredictable
514    /// across process invocations, which is required for AES-CBC semantic
515    /// security (IND-CPA).
516    fn generate_iv() -> [u8; 16] {
517        let mut iv = [0u8; 16];
518        getrandom::fill(&mut iv).expect("OS RNG unavailable");
519        iv
520    }
521
522    /// Compute the per-object encryption key (Algorithm 1 in PDF spec, R2–R4).
523    ///
524    /// 1. Start with the file encryption key.
525    /// 2. Append the low 3 bytes of the object number (LE).
526    /// 3. Append the low 2 bytes of the generation number (LE).
527    /// 4. If AES (AESV2): append `sAlT` bytes.
528    /// 5. MD5 hash, take first min(key_length + 5, 16) bytes.
529    fn compute_object_key(&self, obj_id: ObjectId, cf: CryptFilterMethod) -> Vec<u8> {
530        let obj_num = obj_id.number;
531        let gen_num = obj_id.generation;
532
533        let mut buf = Vec::with_capacity(self.encryption_key.len() + 9);
534        buf.extend_from_slice(&self.encryption_key);
535        // Object number: 3 bytes LE
536        buf.push((obj_num & 0xFF) as u8);
537        buf.push(((obj_num >> 8) & 0xFF) as u8);
538        buf.push(((obj_num >> 16) & 0xFF) as u8);
539        // Generation number: 2 bytes LE
540        buf.push((gen_num & 0xFF) as u8);
541        buf.push(((gen_num >> 8) & 0xFF) as u8);
542
543        if cf == CryptFilterMethod::Aesv2 {
544            // "sAlT" = 0x73 0x41 0x6C 0x54
545            buf.extend_from_slice(&[0x73, 0x41, 0x6C, 0x54]);
546        }
547
548        let hash = crypto::md5(&buf);
549        let n = std::cmp::min(self.key_length_bytes + 5, 16);
550        hash[..n].to_vec()
551    }
552}
553
554// ---------------------------------------------------------------------------
555// Key derivation (R2–R4) — Algorithm 2
556// ---------------------------------------------------------------------------
557
558/// Encode a UTF-8 password to ISO 8859-1 (Latin-1) for R2-R4 compatibility.
559/// Characters outside the Latin-1 range (>255) are replaced with `?` (0x3F).
560fn encode_password_latin1(password: &str) -> Vec<u8> {
561    password
562        .chars()
563        .map(|c| {
564            let code = c as u32;
565            if code <= 255 {
566                code as u8
567            } else {
568                0x3F // '?'
569            }
570        })
571        .collect()
572}
573
574/// Pad or truncate a password to exactly 32 bytes using PASSWORD_PADDING.
575fn pad_password(password: &[u8]) -> [u8; 32] {
576    let mut padded = [0u8; 32];
577    let copy_len = std::cmp::min(password.len(), 32);
578    padded[..copy_len].copy_from_slice(&password[..copy_len]);
579    if copy_len < 32 {
580        padded[copy_len..].copy_from_slice(&PASSWORD_PADDING[..(32 - copy_len)]);
581    }
582    padded
583}
584
585/// Derive the file encryption key for R2–R4 (PDF spec Algorithm 2).
586fn derive_key_r2_r4(
587    password: &[u8],
588    o_value: &[u8],
589    permissions: i32,
590    file_id: &[u8],
591    revision: u32,
592    key_length_bytes: usize,
593    encrypt_metadata: bool,
594) -> Vec<u8> {
595    let padded = pad_password(password);
596
597    let mut buf = Vec::with_capacity(32 + o_value.len() + 4 + file_id.len() + 4);
598    buf.extend_from_slice(&padded);
599    buf.extend_from_slice(o_value);
600    buf.extend_from_slice(&(permissions as u32).to_le_bytes());
601    buf.extend_from_slice(file_id);
602
603    if revision >= 4 && !encrypt_metadata {
604        buf.extend_from_slice(&[0xFF, 0xFF, 0xFF, 0xFF]);
605    }
606
607    let mut hash = crypto::md5(&buf);
608
609    if revision >= 3 {
610        for _ in 0..50 {
611            hash = crypto::md5(&hash[..key_length_bytes]);
612        }
613    }
614
615    hash[..key_length_bytes].to_vec()
616}
617
618// ---------------------------------------------------------------------------
619// Password verification (R2) — Algorithm 6
620// ---------------------------------------------------------------------------
621
622/// Verify user password for R2 (Algorithm 6).
623fn verify_user_password_r2(key: &[u8], u_value: &[u8]) -> bool {
624    let Ok(decrypted) = crypto::rc4_crypt(key, u_value) else {
625        return false;
626    };
627    let expected = crypto::md5(&PASSWORD_PADDING);
628    decrypted[..] == expected[..]
629}
630
631// ---------------------------------------------------------------------------
632// Password verification (R3–R4) — Algorithm 7
633// ---------------------------------------------------------------------------
634
635/// Verify user password for R3/R4 (Algorithm 7).
636fn verify_user_password_r3_r4(key: &[u8], u_value: &[u8], file_id: &[u8]) -> bool {
637    // Step a: MD5(PASSWORD_PADDING + file_id)
638    let mut buf = Vec::with_capacity(32 + file_id.len());
639    buf.extend_from_slice(&PASSWORD_PADDING);
640    buf.extend_from_slice(file_id);
641    let hash = crypto::md5(&buf);
642
643    // Step b: RC4 with key
644    let Ok(mut result) = crypto::rc4_crypt(key, &hash) else {
645        return false;
646    };
647
648    // Step c: 19 rounds of RC4 with modified keys
649    for i in 1..=19u8 {
650        let modified_key: Vec<u8> = key.iter().map(|b| b ^ i).collect();
651        let Ok(next) = crypto::rc4_crypt(&modified_key, &result) else {
652            return false;
653        };
654        result = next;
655    }
656
657    // Compare first 16 bytes
658    u_value.len() >= 16 && result.len() >= 16 && result[..16] == u_value[..16]
659}
660
661/// Dispatch to the correct password verification for R2–R4.
662fn verify_user_password_r2_r4(key: &[u8], u_value: &[u8], revision: u32, file_id: &[u8]) -> bool {
663    if revision == 2 {
664        verify_user_password_r2(key, u_value)
665    } else {
666        verify_user_password_r3_r4(key, u_value, file_id)
667    }
668}
669
670// ---------------------------------------------------------------------------
671// Owner password recovery (R2–R4)
672// ---------------------------------------------------------------------------
673
674/// Recover the user password from the O value using the owner password.
675fn recover_user_password_from_owner(
676    owner_password: &[u8],
677    o_value: &[u8],
678    revision: u32,
679    key_length_bytes: usize,
680) -> Vec<u8> {
681    let padded = pad_password(owner_password);
682    let mut hash = crypto::md5(&padded);
683
684    if revision >= 3 {
685        for _ in 0..50 {
686            hash = crypto::md5(&hash[..key_length_bytes]);
687        }
688    }
689
690    let key = &hash[..key_length_bytes];
691
692    if revision == 2 {
693        crypto::rc4_crypt(key, o_value)
694            .expect("RC4 key size validated from MD5 hash (always 5-16 bytes)")
695    } else {
696        let mut result = o_value.to_vec();
697        for i in (0..=19u8).rev() {
698            let modified_key: Vec<u8> = key.iter().map(|b| b ^ i).collect();
699            result = crypto::rc4_crypt(&modified_key, &result)
700                .expect("RC4 key size validated from MD5 hash (always 5-16 bytes)");
701        }
702        result
703    }
704}
705
706// ---------------------------------------------------------------------------
707// R5/R6 key derivation (ISO 32000-2)
708// ---------------------------------------------------------------------------
709
710/// Derive the file encryption key for R5 (deprecated draft, Adobe extension).
711///
712/// Returns `Some(key)` if either user or owner password matches, else `None`.
713fn derive_key_r5(
714    password: &str,
715    u_value: &[u8],
716    ue_value: &[u8],
717    o_value: &[u8],
718    oe_value: &[u8],
719) -> Result<Option<Vec<u8>>, CryptoError> {
720    let pwd = truncate_utf8_password(password);
721
722    // Try user password: hash(password + Validation Salt from U)
723    if u_value.len() >= 48 && ue_value.len() >= 32 {
724        let validation_salt = &u_value[32..40];
725        let hash = compute_r5_hash(&pwd, validation_salt);
726        if hash[..] == u_value[..32] {
727            // Decrypt UE with hash(password + Key Salt from U)
728            let key_salt = &u_value[40..48];
729            let key_hash = compute_r5_hash(&pwd, key_salt);
730            let file_key = crypto::aes256_cbc_decrypt(&key_hash, &prepend_zero_iv(ue_value))?;
731            return Ok(Some(file_key));
732        }
733    }
734
735    // Try owner password: hash(password + Validation Salt from O + U[0..48])
736    if o_value.len() >= 48 && oe_value.len() >= 32 && u_value.len() >= 48 {
737        let validation_salt = &o_value[32..40];
738        let mut input = Vec::with_capacity(pwd.len() + 8 + 48);
739        input.extend_from_slice(&pwd);
740        input.extend_from_slice(validation_salt);
741        input.extend_from_slice(&u_value[..48]);
742        let hash = crypto::sha256(&input);
743        if hash[..] == o_value[..32] {
744            let key_salt = &o_value[40..48];
745            let mut key_input = Vec::with_capacity(pwd.len() + 8 + 48);
746            key_input.extend_from_slice(&pwd);
747            key_input.extend_from_slice(key_salt);
748            key_input.extend_from_slice(&u_value[..48]);
749            let key_hash = crypto::sha256(&key_input);
750            let file_key = crypto::aes256_cbc_decrypt(&key_hash, &prepend_zero_iv(oe_value))?;
751            return Ok(Some(file_key));
752        }
753    }
754
755    Ok(None)
756}
757
758/// Derive the file encryption key for R6 (ISO 32000-2, Algorithm 2.A).
759///
760/// R6 uses Algorithm 2.B (iterative hash) instead of simple SHA-256.
761fn derive_key_r6(
762    password: &str,
763    u_value: &[u8],
764    ue_value: &[u8],
765    o_value: &[u8],
766    oe_value: &[u8],
767) -> Result<Option<Vec<u8>>, CryptoError> {
768    let pwd = truncate_utf8_password(password);
769
770    // Try user password: compute_r6_hash(password, validation_salt, "")
771    if u_value.len() >= 48 && ue_value.len() >= 32 {
772        let validation_salt = &u_value[32..40];
773        let hash = compute_r6_hash(&pwd, validation_salt, &[]);
774        if hash[..] == u_value[..32] {
775            // Decrypt UE with hash(password, key_salt, "")
776            let key_salt = &u_value[40..48];
777            let key_hash = compute_r6_hash(&pwd, key_salt, &[]);
778            let file_key = crypto::aes256_cbc_decrypt(&key_hash, &prepend_zero_iv(ue_value))?;
779            return Ok(Some(file_key));
780        }
781    }
782
783    // Try owner password: compute_r6_hash(password, validation_salt, U[0..48])
784    if o_value.len() >= 48 && oe_value.len() >= 32 && u_value.len() >= 48 {
785        let validation_salt = &o_value[32..40];
786        let hash = compute_r6_hash(&pwd, validation_salt, &u_value[..48]);
787        if hash[..] == o_value[..32] {
788            let key_salt = &o_value[40..48];
789            let key_hash = compute_r6_hash(&pwd, key_salt, &u_value[..48]);
790            let file_key = crypto::aes256_cbc_decrypt(&key_hash, &prepend_zero_iv(oe_value))?;
791            return Ok(Some(file_key));
792        }
793    }
794
795    Ok(None)
796}
797
798/// Interpret the first 16 bytes of `bytes` as a big-endian 128-bit integer and return it mod 3.
799fn big_endian_mod3(bytes: &[u8]) -> u32 {
800    bytes
801        .iter()
802        .take(16)
803        .fold(0u32, |acc, &b| (acc * 256 + b as u32) % 3)
804}
805
806/// ISO 32000-2 Algorithm 2.B — compute hash for R6 encryption.
807///
808/// This implements the iterative hash algorithm that uses SHA-256, SHA-384,
809/// and SHA-512 in a loop until convergence.
810fn compute_r6_hash(password: &[u8], salt: &[u8], u_value: &[u8]) -> [u8; 32] {
811    // Step a: K = SHA-256(password || salt || u_value)
812    let mut input = Vec::with_capacity(password.len() + salt.len() + u_value.len());
813    input.extend_from_slice(password);
814    input.extend_from_slice(salt);
815    input.extend_from_slice(u_value);
816    let mut k = crypto::sha256(&input);
817
818    let mut round: u32 = 0;
819    loop {
820        // Step b: K1 = (password || K || u_value) repeated 64 times
821        let sequence_len = password.len() + k.len() + u_value.len();
822        let mut k1 = Vec::with_capacity(sequence_len * 64);
823        for _ in 0..64 {
824            k1.extend_from_slice(password);
825            k1.extend_from_slice(&k);
826            k1.extend_from_slice(u_value);
827        }
828
829        // Step c: E = AES-128-CBC(K[0..16], K[16..32], K1)
830        let aes_key = &k[..16];
831        let aes_iv = &k[16..32];
832        // Key is always 16 bytes (from SHA-256 hash), IV is always 16 bytes.
833        let e = crypto::aes128_cbc_encrypt(aes_key, aes_iv, &k1)
834            .expect("AES-128-CBC key/IV from SHA-256 are always valid");
835
836        // Step d: Select hash based on big-endian 128-bit value of first 16 bytes mod 3
837        let hash_select = big_endian_mod3(&e);
838
839        // Step e: K = selected_hash(E)
840        match hash_select {
841            0 => {
842                let h = crypto::sha256(&e);
843                k = h;
844            }
845            1 => {
846                let h = crypto::sha384(&e);
847                k.copy_from_slice(&h[..32]);
848            }
849            _ => {
850                let h = crypto::sha512(&e);
851                k.copy_from_slice(&h[..32]);
852            }
853        }
854
855        // Step f: if round >= 64, check last byte of E
856        if round >= 64 {
857            let last_byte = *e.last().unwrap_or(&0);
858            if last_byte <= (round - 32) as u8 {
859                break;
860            }
861        }
862
863        round += 1;
864
865        // Safety limit to prevent infinite loops on malformed data
866        if round >= 1000 {
867            break;
868        }
869    }
870
871    k
872}
873
874/// Compute R5 hash: SHA-256(password + salt).
875fn compute_r5_hash(password: &[u8], salt: &[u8]) -> [u8; 32] {
876    let mut input = Vec::with_capacity(password.len() + salt.len());
877    input.extend_from_slice(password);
878    input.extend_from_slice(salt);
879    crypto::sha256(&input)
880}
881
882/// Truncate a UTF-8 password to at most 127 bytes (R5/R6 spec).
883fn truncate_utf8_password(password: &str) -> Vec<u8> {
884    let bytes = password.as_bytes();
885    if bytes.len() <= 127 {
886        bytes.to_vec()
887    } else {
888        bytes[..127].to_vec()
889    }
890}
891
892/// Prepend a zero IV (16 bytes of 0x00) for AES-256 decryption of UE/OE.
893fn prepend_zero_iv(data: &[u8]) -> Vec<u8> {
894    let mut result = vec![0u8; 16];
895    result.extend_from_slice(data);
896    result
897}
898
899// ---------------------------------------------------------------------------
900// Crypt filter method parsing
901// ---------------------------------------------------------------------------
902
903/// Parse a single crypt filter from the encryption dictionary by filter key name
904/// (e.g. /StmF or /StrF). Looks up the named filter in /CF to determine the method.
905fn parse_single_crypt_filter<S: PdfSource>(
906    encrypt_dict: &HashMap<Name, Object>,
907    store: &ObjectStore<S>,
908    filter_key: &Name,
909) -> CryptFilterMethod {
910    let filter_name = encrypt_dict
911        .get(filter_key)
912        .and_then(|obj| store.deep_resolve(obj).ok())
913        .and_then(|obj| obj.as_name())
914        .cloned()
915        .unwrap_or_else(|| Name::from("StdCF"));
916
917    if filter_name == Name::from("Identity") {
918        return CryptFilterMethod::None;
919    }
920
921    // Look up the filter in /CF dict
922    let cf_dict = encrypt_dict
923        .get(&Name::cf())
924        .and_then(|obj| store.deep_resolve(obj).ok())
925        .and_then(|obj| obj.as_dict());
926
927    if let Some(cf) = cf_dict {
928        if let Some(filter_obj) = cf.get(&filter_name) {
929            if let Ok(filter_resolved) = store.deep_resolve(filter_obj) {
930                if let Some(filter_dict) = filter_resolved.as_dict() {
931                    let method = filter_dict
932                        .get(&Name::cfm())
933                        .and_then(|obj| store.deep_resolve(obj).ok())
934                        .and_then(|obj| obj.as_name())
935                        .cloned();
936
937                    if let Some(m) = method {
938                        if m == Name::aesv2() {
939                            return CryptFilterMethod::Aesv2;
940                        } else if m == Name::aesv3() {
941                            return CryptFilterMethod::Aesv3;
942                        } else if m == Name::v2() {
943                            return CryptFilterMethod::V2;
944                        } else if m == Name::none() {
945                            return CryptFilterMethod::None;
946                        }
947                    }
948                }
949            }
950        }
951    }
952
953    // Default: RC4 for V4
954    CryptFilterMethod::V2
955}
956
957// ---------------------------------------------------------------------------
958// Dictionary helpers
959// ---------------------------------------------------------------------------
960
961fn get_int<S: PdfSource>(
962    dict: &HashMap<Name, Object>,
963    store: &ObjectStore<S>,
964    key: &Name,
965) -> Result<i64, SecurityError> {
966    dict.get(key)
967        .and_then(|obj| store.deep_resolve(obj).ok())
968        .and_then(|obj| obj.as_i64())
969        .ok_or_else(|| SecurityError::MissingKey(key.to_string()))
970}
971
972fn get_optional_int<S: PdfSource>(
973    dict: &HashMap<Name, Object>,
974    store: &ObjectStore<S>,
975    key: &Name,
976) -> Option<i64> {
977    dict.get(key)
978        .and_then(|obj| store.deep_resolve(obj).ok())
979        .and_then(|obj| obj.as_i64())
980}
981
982fn get_optional_bool<S: PdfSource>(
983    dict: &HashMap<Name, Object>,
984    store: &ObjectStore<S>,
985    key: &Name,
986) -> Option<bool> {
987    dict.get(key)
988        .and_then(|obj| store.deep_resolve(obj).ok())
989        .and_then(|obj| obj.as_bool())
990}
991
992fn get_string_bytes<S: PdfSource>(
993    dict: &HashMap<Name, Object>,
994    store: &ObjectStore<S>,
995    key: &Name,
996) -> Result<Vec<u8>, SecurityError> {
997    dict.get(key)
998        .and_then(|obj| store.deep_resolve(obj).ok())
999        .and_then(|obj| obj.as_string())
1000        .map(|s| s.as_bytes().to_vec())
1001        .ok_or_else(|| SecurityError::MissingKey(key.to_string()))
1002}
1003
1004#[cfg(test)]
1005mod tests {
1006    use super::*;
1007
1008    #[test]
1009    fn test_password_padding_empty() {
1010        let padded = pad_password(b"");
1011        assert_eq!(padded, PASSWORD_PADDING);
1012    }
1013
1014    #[test]
1015    fn test_password_padding_short() {
1016        let padded = pad_password(b"test");
1017        assert_eq!(&padded[..4], b"test");
1018        assert_eq!(&padded[4..], &PASSWORD_PADDING[..28]);
1019    }
1020
1021    #[test]
1022    fn test_password_padding_exact_32() {
1023        let pwd = b"abcdefghijklmnopqrstuvwxyz012345"; // 32 bytes
1024        let padded = pad_password(pwd);
1025        assert_eq!(&padded[..], &pwd[..]);
1026    }
1027
1028    #[test]
1029    fn test_password_padding_longer_than_32() {
1030        let pwd = b"abcdefghijklmnopqrstuvwxyz0123456789"; // 36 bytes
1031        let padded = pad_password(pwd);
1032        assert_eq!(&padded[..], &pwd[..32]);
1033    }
1034
1035    #[test]
1036    fn test_key_derivation_r2_known() {
1037        // Use empty password, known O value, P=-44, known file_id
1038        let o_value = [0u8; 32];
1039        let file_id = b"test_file_id_123";
1040        let key = derive_key_r2_r4(b"", &o_value, -44, file_id, 2, 5, true);
1041        assert_eq!(key.len(), 5);
1042        // Verify the key is deterministic
1043        let key2 = derive_key_r2_r4(b"", &o_value, -44, file_id, 2, 5, true);
1044        assert_eq!(key, key2);
1045    }
1046
1047    #[test]
1048    fn test_key_derivation_r3_known() {
1049        let o_value = [0u8; 32];
1050        let file_id = b"test_file_id_123";
1051        let key = derive_key_r2_r4(b"", &o_value, -44, file_id, 3, 16, true);
1052        assert_eq!(key.len(), 16);
1053        // R3 applies 50 additional MD5 rounds — key should differ from R2
1054        let key_r2 = derive_key_r2_r4(b"", &o_value, -44, file_id, 2, 5, true);
1055        assert_ne!(key[..5], key_r2[..]);
1056    }
1057
1058    #[test]
1059    fn test_user_password_verification_r2_round_trip() {
1060        // Derive a key, compute the expected U value, then verify
1061        let o_value = [0x42u8; 32];
1062        let file_id = b"abcdef0123456789";
1063        let key = derive_key_r2_r4(b"password", &o_value, -4, file_id, 2, 5, true);
1064
1065        // Compute U: RC4-encrypt MD5(PASSWORD_PADDING) with key
1066        let expected_u = crypto::rc4_crypt(&key, &crypto::md5(&PASSWORD_PADDING)).unwrap();
1067        assert!(verify_user_password_r2(&key, &expected_u));
1068    }
1069
1070    #[test]
1071    fn test_user_password_verification_r2_wrong_password() {
1072        let o_value = [0x42u8; 32];
1073        let file_id = b"abcdef0123456789";
1074        let key = derive_key_r2_r4(b"password", &o_value, -4, file_id, 2, 5, true);
1075        let expected_u = crypto::rc4_crypt(&key, &crypto::md5(&PASSWORD_PADDING)).unwrap();
1076
1077        // Wrong password yields different key
1078        let wrong_key = derive_key_r2_r4(b"wrong", &o_value, -4, file_id, 2, 5, true);
1079        assert!(!verify_user_password_r2(&wrong_key, &expected_u));
1080    }
1081
1082    #[test]
1083    fn test_user_password_verification_r3_round_trip() {
1084        let o_value = [0x42u8; 32];
1085        let file_id = b"abcdef0123456789";
1086        let key = derive_key_r2_r4(b"pass", &o_value, -4, file_id, 3, 16, true);
1087
1088        // Compute U for R3: MD5(PADDING + file_id), then 20 rounds of RC4
1089        let mut buf = Vec::new();
1090        buf.extend_from_slice(&PASSWORD_PADDING);
1091        buf.extend_from_slice(file_id);
1092        let hash = crypto::md5(&buf);
1093
1094        let mut result = crypto::rc4_crypt(&key, &hash).unwrap();
1095        for i in 1..=19u8 {
1096            let modified_key: Vec<u8> = key.iter().map(|b| b ^ i).collect();
1097            result = crypto::rc4_crypt(&modified_key, &result).unwrap();
1098        }
1099        // Pad U to 32 bytes (remaining bytes are random, but first 16 matter)
1100        let mut u_value = result;
1101        u_value.resize(32, 0);
1102
1103        assert!(verify_user_password_r3_r4(&key, &u_value, file_id));
1104    }
1105
1106    #[test]
1107    fn test_object_key_computation() {
1108        let handler = SecurityHandler {
1109            revision: 3,
1110            key_length_bytes: 5,
1111            encryption_key: vec![0x01, 0x02, 0x03, 0x04, 0x05],
1112            permissions: Permissions::from_bits(-4),
1113            encrypt_metadata: true,
1114            stream_cf: CryptFilterMethod::V2,
1115            string_cf: CryptFilterMethod::V2,
1116            o_value: Vec::new(),
1117            u_value: Vec::new(),
1118            encoded_password: Vec::new(),
1119        };
1120
1121        let obj_id = ObjectId::new(10, 0);
1122        let key = handler.compute_object_key(obj_id, CryptFilterMethod::V2);
1123
1124        // key_length + 5 = 10, but min(10, 16) = 10
1125        assert_eq!(key.len(), 10);
1126
1127        // Verify deterministic
1128        let key2 = handler.compute_object_key(obj_id, CryptFilterMethod::V2);
1129        assert_eq!(key, key2);
1130
1131        // Different object ID should yield different key
1132        let key3 = handler.compute_object_key(ObjectId::new(11, 0), CryptFilterMethod::V2);
1133        assert_ne!(key, key3);
1134    }
1135
1136    #[test]
1137    fn test_object_key_aesv2_includes_salt() {
1138        let handler = SecurityHandler {
1139            revision: 4,
1140            key_length_bytes: 16,
1141            encryption_key: vec![0xAA; 16],
1142            permissions: Permissions::from_bits(-4),
1143            encrypt_metadata: true,
1144            stream_cf: CryptFilterMethod::Aesv2,
1145            string_cf: CryptFilterMethod::V2,
1146            o_value: Vec::new(),
1147            u_value: Vec::new(),
1148            encoded_password: Vec::new(),
1149        };
1150
1151        let obj_id = ObjectId::new(1, 0);
1152        let key_rc4 = handler.compute_object_key(obj_id, CryptFilterMethod::V2);
1153        let key_aes = handler.compute_object_key(obj_id, CryptFilterMethod::Aesv2);
1154
1155        // AES key should differ because of "sAlT" suffix
1156        assert_ne!(key_rc4, key_aes);
1157    }
1158
1159    #[test]
1160    fn test_decrypt_string_rc4_round_trip() {
1161        let handler = SecurityHandler {
1162            revision: 3,
1163            key_length_bytes: 5,
1164            encryption_key: vec![0x01, 0x02, 0x03, 0x04, 0x05],
1165            permissions: Permissions::from_bits(-4),
1166            encrypt_metadata: true,
1167            stream_cf: CryptFilterMethod::V2,
1168            string_cf: CryptFilterMethod::V2,
1169            o_value: Vec::new(),
1170            u_value: Vec::new(),
1171            encoded_password: Vec::new(),
1172        };
1173
1174        let obj_id = ObjectId::new(1, 0);
1175        let plaintext = b"Hello, encrypted world!";
1176
1177        // Encrypt (RC4 is symmetric, so encrypt = decrypt)
1178        let object_key = handler.compute_object_key(obj_id, handler.string_cf);
1179        let encrypted = crypto::rc4_crypt(&object_key, plaintext).unwrap();
1180
1181        // Decrypt via handler
1182        let decrypted = handler.decrypt_string(&encrypted, obj_id);
1183        assert_eq!(decrypted, plaintext);
1184    }
1185
1186    #[test]
1187    fn test_crypt_filter_method_none_passthrough() {
1188        let handler = SecurityHandler {
1189            revision: 4,
1190            key_length_bytes: 16,
1191            encryption_key: vec![0; 16],
1192            permissions: Permissions::from_bits(-4),
1193            encrypt_metadata: true,
1194            stream_cf: CryptFilterMethod::None,
1195            string_cf: CryptFilterMethod::None,
1196            o_value: Vec::new(),
1197            u_value: Vec::new(),
1198            encoded_password: Vec::new(),
1199        };
1200
1201        let data = b"unencrypted data";
1202        let obj_id = ObjectId::new(1, 0);
1203        assert_eq!(handler.decrypt_string(data, obj_id), data);
1204        assert_eq!(handler.decrypt_stream(data, obj_id), data);
1205    }
1206
1207    #[test]
1208    fn test_truncate_utf8_password_short() {
1209        let pwd = truncate_utf8_password("hello");
1210        assert_eq!(pwd, b"hello");
1211    }
1212
1213    #[test]
1214    fn test_truncate_utf8_password_long() {
1215        let long_pwd = "a".repeat(200);
1216        let pwd = truncate_utf8_password(&long_pwd);
1217        assert_eq!(pwd.len(), 127);
1218    }
1219
1220    #[test]
1221    fn test_compute_r6_hash_basic() {
1222        // Verify that compute_r6_hash returns a deterministic 32-byte result
1223        let hash = compute_r6_hash(b"password", &[0u8; 8], &[]);
1224        assert_eq!(hash.len(), 32);
1225        // Same input should produce same output
1226        let hash2 = compute_r6_hash(b"password", &[0u8; 8], &[]);
1227        assert_eq!(hash, hash2);
1228    }
1229
1230    #[test]
1231    fn test_compute_r6_hash_differs_from_simple_sha256() {
1232        // R6 iterative hash should differ from simple SHA-256
1233        let salt = [0x01u8; 8];
1234        let r6_hash = compute_r6_hash(b"test", &salt, &[]);
1235        let simple_hash = crypto::sha256(&{
1236            let mut v = b"test".to_vec();
1237            v.extend_from_slice(&salt);
1238            v
1239        });
1240        // The iterative hash should produce a different result than simple SHA-256
1241        // (because it goes through multiple rounds with AES + different hash functions)
1242        assert_ne!(r6_hash, simple_hash);
1243    }
1244
1245    #[test]
1246    fn test_compute_r6_hash_with_u_value() {
1247        // Hash with non-empty u_value should differ from empty u_value
1248        let salt = [0x42u8; 8];
1249        let hash_no_u = compute_r6_hash(b"pwd", &salt, &[]);
1250        let hash_with_u = compute_r6_hash(b"pwd", &salt, &[0xAAu8; 48]);
1251        assert_ne!(hash_no_u, hash_with_u);
1252    }
1253
1254    #[test]
1255    fn test_r6_user_password_round_trip() {
1256        // Simulate creating R6 encryption data and verifying it
1257        let password = b"test123";
1258        let validation_salt = [0x11u8; 8];
1259        let key_salt = [0x22u8; 8];
1260        let file_key = [0x42u8; 32]; // the actual file encryption key
1261
1262        // Compute U value: hash(password, validation_salt) || validation_salt || key_salt
1263        let u_hash = compute_r6_hash(password, &validation_salt, &[]);
1264        let mut u_value = Vec::with_capacity(48);
1265        u_value.extend_from_slice(&u_hash);
1266        u_value.extend_from_slice(&validation_salt);
1267        u_value.extend_from_slice(&key_salt);
1268
1269        // Compute UE: AES-256-CBC encrypt file_key with hash(password, key_salt)
1270        let key_hash = compute_r6_hash(password, &key_salt, &[]);
1271        let iv = [0u8; 16];
1272        let ue_value = crypto::aes256_cbc_encrypt(&key_hash, &iv, &file_key).unwrap();
1273
1274        // Now verify: derive_key_r6 should recover the file key
1275        let result = derive_key_r6(
1276            "test123", &u_value, &ue_value, &[0u8; 48], // dummy O
1277            &[0u8; 32], // dummy OE
1278        )
1279        .unwrap();
1280
1281        assert!(result.is_some());
1282        assert_eq!(result.unwrap(), file_key);
1283    }
1284
1285    #[test]
1286    fn test_r6_wrong_password_returns_none() {
1287        let password = b"correct";
1288        let validation_salt = [0x33u8; 8];
1289        let key_salt = [0x44u8; 8];
1290
1291        let u_hash = compute_r6_hash(password, &validation_salt, &[]);
1292        let mut u_value = Vec::with_capacity(48);
1293        u_value.extend_from_slice(&u_hash);
1294        u_value.extend_from_slice(&validation_salt);
1295        u_value.extend_from_slice(&key_salt);
1296
1297        let key_hash = compute_r6_hash(password, &key_salt, &[]);
1298        let ue_value = crypto::aes256_cbc_encrypt(&key_hash, &[0u8; 16], &[0u8; 32]).unwrap();
1299
1300        // Wrong password should not match
1301        let result = derive_key_r6("wrong", &u_value, &ue_value, &[0u8; 48], &[0u8; 32]).unwrap();
1302
1303        assert!(result.is_none());
1304    }
1305
1306    #[test]
1307    fn test_owner_password_recovery_r2() {
1308        // Set up: compute O from a known user password + owner password
1309        let user_pwd = b"user";
1310        let owner_pwd = b"owner";
1311        let key_len = 5;
1312
1313        // Compute O value: pad owner_pwd, MD5, RC4-encrypt padded user_pwd
1314        let padded_owner = pad_password(owner_pwd);
1315        let hash = crypto::md5(&padded_owner);
1316        let owner_key = &hash[..key_len];
1317        let padded_user = pad_password(user_pwd);
1318        let o_value = crypto::rc4_crypt(owner_key, &padded_user).unwrap();
1319
1320        // Now recover
1321        let recovered = recover_user_password_from_owner(owner_pwd, &o_value, 2, key_len);
1322        assert_eq!(&recovered[..], &padded_user[..]);
1323    }
1324
1325    // -----------------------------------------------------------------------
1326    // P0-1: Verify PASSWORD_PADDING matches PDF spec byte-by-byte
1327    // -----------------------------------------------------------------------
1328
1329    #[test]
1330    fn test_password_padding_matches_pdf_spec() {
1331        // Table 3.18 / ISO 32000-1:2008 section 7.6.3.3
1332        let spec_padding: [u8; 32] = [
1333            0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA,
1334            0x01, 0x08, 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE,
1335            0x64, 0x53, 0x69, 0x7A,
1336        ];
1337        assert_eq!(PASSWORD_PADDING, spec_padding);
1338    }
1339
1340    // -----------------------------------------------------------------------
1341    // P0-2: Test big-endian mod 3 vs sum mod 3 divergence
1342    // -----------------------------------------------------------------------
1343
1344    #[test]
1345    fn test_big_endian_mod3_basic() {
1346        // All zeros: 0 mod 3 = 0
1347        assert_eq!(big_endian_mod3(&[0u8; 16]), 0);
1348        // All 0xFF: large number mod 3
1349        assert_eq!(big_endian_mod3(&[0xFF; 16]), big_endian_mod3(&[0xFF; 16]));
1350    }
1351
1352    #[test]
1353    fn test_big_endian_mod3_differs_from_sum_mod3() {
1354        // Find a value where sum%3 differs from big-endian%3
1355        // bytes = [0x01, 0x00, ...0x00] (15 zeros after):
1356        //   sum   = 1, 1 % 3 = 1
1357        //   big-endian = 256^15 mod 3. Since 256 mod 3 = 1, 256^15 mod 3 = 1. Same.
1358        // Try [0x02, 0x01, 0, ...]:
1359        //   sum   = 3, 3 % 3 = 0
1360        //   big-endian = 2*256 + 1 = 513 mod 3 = 0. Same.
1361        // Try [0x80, 0x00, ...0x00, 0x01]:
1362        //   sum   = 0x80 + 1 = 129, 129 % 3 = 0
1363        //   big-endian = 0x80 * 256^15 + 1. Since 256 mod 3 = 1: 128*1 + 1 = 129 mod 3 = 0. Same.
1364        // Try [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]:
1365        //   sum = 5, 5%3 = 2
1366        //   big-endian = 3*256^15 + 2. 256^15 mod 3 = 1, so 3+2=5 mod 3 = 2. Same.
1367        // Actually the two methods differ when the positional weights matter.
1368        // [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0]:
1369        //   sum = 2, 2%3 = 2
1370        //   big-endian: 2*256 = 512. 512%3 = 2. Same.
1371        // [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]:
1372        //   sum = 2, 2%3 = 2
1373        //   big-endian: 1*256+1 = 257. 257%3 = 2. Same.
1374        // Since 256 mod 3 = 1, positional weighting doesn't change mod3 result.
1375        // So sum%3 == big-endian%3 for mod 3 specifically.
1376        // However, the spec says to interpret as big-endian, so our implementation is still correct.
1377        // Let's just verify the function works correctly on known values.
1378        let bytes = [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
1379        assert_eq!(big_endian_mod3(&bytes), 1);
1380        let bytes = [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2];
1381        assert_eq!(big_endian_mod3(&bytes), 2);
1382        let bytes = [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3];
1383        assert_eq!(big_endian_mod3(&bytes), 0);
1384    }
1385
1386    // -----------------------------------------------------------------------
1387    // P2-31: Test separate StrF/StmF
1388    // -----------------------------------------------------------------------
1389
1390    #[test]
1391    fn test_separate_strf_stmf_decrypt() {
1392        // Create a handler where streams use AES but strings use RC4
1393        let handler = SecurityHandler {
1394            revision: 4,
1395            key_length_bytes: 16,
1396            encryption_key: vec![0xBB; 16],
1397            permissions: Permissions::from_bits(-4),
1398            encrypt_metadata: true,
1399            stream_cf: CryptFilterMethod::Aesv2,
1400            string_cf: CryptFilterMethod::V2,
1401            o_value: Vec::new(),
1402            u_value: Vec::new(),
1403            encoded_password: Vec::new(),
1404        };
1405
1406        // Verify accessor returns stream_cf
1407        assert_eq!(handler.crypt_filter_method(), CryptFilterMethod::Aesv2);
1408
1409        let obj_id = ObjectId::new(1, 0);
1410
1411        // String uses RC4 (V2) — round-trip
1412        let plaintext = b"hello";
1413        let obj_key_str = handler.compute_object_key(obj_id, CryptFilterMethod::V2);
1414        let encrypted_str = crypto::rc4_crypt(&obj_key_str, plaintext).unwrap();
1415        let decrypted = handler.decrypt_string(&encrypted_str, obj_id);
1416        assert_eq!(decrypted, plaintext);
1417
1418        // Stream uses AESV2 — verify the object key includes sAlT
1419        let obj_key_stm = handler.compute_object_key(obj_id, CryptFilterMethod::Aesv2);
1420        assert_ne!(obj_key_str, obj_key_stm); // sAlT suffix makes them differ
1421    }
1422
1423    // -----------------------------------------------------------------------
1424    // P2-32: Test Latin-1 password encoding
1425    // -----------------------------------------------------------------------
1426
1427    #[test]
1428    fn test_encode_password_latin1_ascii() {
1429        let encoded = encode_password_latin1("hello");
1430        assert_eq!(encoded, b"hello");
1431    }
1432
1433    #[test]
1434    fn test_encode_password_latin1_latin_chars() {
1435        // e-acute (U+00E9) is within Latin-1 range
1436        let encoded = encode_password_latin1("\u{00E9}");
1437        assert_eq!(encoded, vec![0xE9]);
1438    }
1439
1440    #[test]
1441    fn test_encode_password_latin1_non_latin() {
1442        // CJK character (U+4E2D) is outside Latin-1 range, replaced with '?'
1443        let encoded = encode_password_latin1("\u{4E2D}");
1444        assert_eq!(encoded, vec![0x3F]);
1445    }
1446
1447    #[test]
1448    fn test_encode_password_latin1_mixed() {
1449        // Mix of ASCII, Latin-1, and non-Latin-1
1450        let encoded = encode_password_latin1("a\u{00FC}\u{4E2D}b");
1451        assert_eq!(encoded, vec![b'a', 0xFC, 0x3F, b'b']);
1452    }
1453
1454    // -----------------------------------------------------------------------
1455    // P2-33: Test /Perms validation
1456    // -----------------------------------------------------------------------
1457
1458    #[test]
1459    fn test_encode_password_latin1_empty() {
1460        let encoded = encode_password_latin1("");
1461        assert!(encoded.is_empty());
1462    }
1463
1464    #[test]
1465    fn test_encode_password_latin1_ascii_alphanumeric() {
1466        let encoded = encode_password_latin1("ABC123");
1467        assert_eq!(encoded, b"ABC123");
1468    }
1469
1470    #[test]
1471    fn test_encode_password_latin1_0xff() {
1472        // U+00FF (y-diaeresis) is the last character in Latin-1 range
1473        let encoded = encode_password_latin1("\u{00FF}");
1474        assert_eq!(encoded, vec![0xFF]);
1475    }
1476
1477    #[test]
1478    fn test_encode_password_latin1_over_range() {
1479        // U+0100 (Latin Extended-A) is just outside Latin-1 range
1480        let encoded = encode_password_latin1("\u{0100}");
1481        assert_eq!(encoded, vec![0x3F]); // '?' fallback
1482    }
1483
1484    #[test]
1485    fn test_truncate_utf8_password_empty() {
1486        let pwd = truncate_utf8_password("");
1487        assert!(pwd.is_empty());
1488    }
1489
1490    #[test]
1491    fn test_truncate_utf8_password_exact_127() {
1492        let input = "a".repeat(127);
1493        let pwd = truncate_utf8_password(&input);
1494        assert_eq!(pwd.len(), 127);
1495        assert_eq!(pwd, input.as_bytes());
1496    }
1497
1498    #[test]
1499    fn test_truncate_utf8_password_over_127() {
1500        let input = "a".repeat(128);
1501        let pwd = truncate_utf8_password(&input);
1502        assert_eq!(pwd.len(), 127);
1503    }
1504
1505    #[test]
1506    fn test_perms_validation_construct_and_verify() {
1507        // Build a valid /Perms block per PDF spec Algorithm 2.A:
1508        //   [0..4]  = /P value as LE i32
1509        //   [8]     = 'T' (EncryptMetadata=true) or 'F'
1510        //   [9..12] = "adb"
1511        let permissions: i32 = -4;
1512        let mut perms_plain = [0u8; 16];
1513        perms_plain[0..4].copy_from_slice(&permissions.to_le_bytes());
1514        // Bytes 4-7: can be anything
1515        perms_plain[8] = b'T'; // EncryptMetadata flag
1516        perms_plain[9] = b'a';
1517        perms_plain[10] = b'd';
1518        perms_plain[11] = b'b';
1519
1520        // Encrypt with AES-256-ECB (single block, no padding)
1521        let file_key = [0x42u8; 32];
1522        // Use aes256_ecb_encrypt_block equivalent: encrypt then decrypt round-trip
1523        // For this test, we encrypt using the raw AES block cipher
1524        use aes::cipher::{BlockEncrypt, KeyInit};
1525        let cipher = aes::Aes256::new_from_slice(&file_key).unwrap();
1526        let mut block = aes::cipher::generic_array::GenericArray::clone_from_slice(&perms_plain);
1527        cipher.encrypt_block(&mut block);
1528        let encrypted_perms: [u8; 16] = block.into();
1529
1530        // Now decrypt and validate (simulating what from_encrypt_dict does)
1531        let decrypted = crypto::aes256_ecb_decrypt_block(&file_key, &encrypted_perms).unwrap();
1532
1533        // Byte 8 = 'T' (EncryptMetadata), bytes 9-11 = "adb"
1534        assert_eq!(decrypted[8], b'T');
1535        assert_eq!(&decrypted[9..12], b"adb");
1536        let recovered_p =
1537            i32::from_le_bytes([decrypted[0], decrypted[1], decrypted[2], decrypted[3]]);
1538        assert_eq!(recovered_p, permissions);
1539    }
1540
1541    #[test]
1542    fn test_get_permissions_alias_delegates_to_permissions() {
1543        let handler = SecurityHandler {
1544            revision: 3,
1545            key_length_bytes: 5,
1546            encryption_key: vec![0x01; 5],
1547            permissions: Permissions::from_bits(-4),
1548            encrypt_metadata: true,
1549            stream_cf: CryptFilterMethod::V2,
1550            string_cf: CryptFilterMethod::V2,
1551            o_value: Vec::new(),
1552            u_value: Vec::new(),
1553            encoded_password: b"secret".to_vec(),
1554        };
1555        assert_eq!(handler.get_permissions(), handler.permissions());
1556    }
1557
1558    #[test]
1559    fn test_encoded_password_stored_and_accessible() {
1560        let handler = SecurityHandler {
1561            revision: 3,
1562            key_length_bytes: 5,
1563            encryption_key: vec![0x01; 5],
1564            permissions: Permissions::from_bits(-4),
1565            encrypt_metadata: true,
1566            stream_cf: CryptFilterMethod::V2,
1567            string_cf: CryptFilterMethod::V2,
1568            o_value: Vec::new(),
1569            u_value: Vec::new(),
1570            encoded_password: b"my_password".to_vec(),
1571        };
1572        assert_eq!(handler.encoded_password(), b"my_password");
1573    }
1574
1575    // -----------------------------------------------------------------------
1576    // Encrypted PDF round-trip tests (P3: integration)
1577    // -----------------------------------------------------------------------
1578
1579    /// Verify SecurityHandler stores encoded_password for R2/R4 user password path.
1580    ///
1581    /// Derives a key for a known password, computes the matching U value for R3,
1582    /// constructs a SecurityHandler directly, and verifies that `encoded_password()`
1583    /// returns the same Latin-1-encoded bytes used during key derivation.
1584    #[test]
1585    fn test_security_handler_stores_encoded_password_user_path() {
1586        // Derive key for a known password
1587        let password = "test";
1588        let pwd_bytes = encode_password_latin1(password);
1589        let o_value = [0x42u8; 32];
1590        let file_id = b"abcdef0123456789";
1591        let key = derive_key_r2_r4(&pwd_bytes, &o_value, -4, file_id, 3, 5, true);
1592
1593        // Compute U value for R3: MD5(PASSWORD_PADDING + file_id), then 20 rounds of RC4
1594        let mut buf = Vec::new();
1595        buf.extend_from_slice(&PASSWORD_PADDING);
1596        buf.extend_from_slice(file_id);
1597        let hash = crypto::md5(&buf);
1598        let mut result = crypto::rc4_crypt(&key, &hash).unwrap();
1599        for i in 1..=19u8 {
1600            let modified: Vec<u8> = key.iter().map(|b| b ^ i).collect();
1601            result = crypto::rc4_crypt(&modified, &result).unwrap();
1602        }
1603        let mut u_value = result;
1604        u_value.resize(32, 0);
1605
1606        // Construct a SecurityHandler directly with the same components
1607        let handler = SecurityHandler {
1608            revision: 3,
1609            key_length_bytes: 5,
1610            encryption_key: key,
1611            permissions: Permissions::from_bits(-4),
1612            encrypt_metadata: true,
1613            stream_cf: CryptFilterMethod::V2,
1614            string_cf: CryptFilterMethod::V2,
1615            o_value: o_value.to_vec(),
1616            u_value,
1617            encoded_password: pwd_bytes.clone(),
1618        };
1619
1620        assert_eq!(handler.encoded_password(), pwd_bytes.as_slice());
1621        // "test" encodes as plain ASCII bytes in Latin-1
1622        assert_eq!(handler.encoded_password(), b"test");
1623    }
1624
1625    /// Verify that `get_permissions()` alias returns the same value as `permissions()`.
1626    ///
1627    /// Uses a non-trivial permission value (-3904) to confirm both accessors agree
1628    /// and that `Permissions::bits()` round-trips correctly through the alias.
1629    #[test]
1630    fn test_security_handler_get_permissions_alias_r3() {
1631        let handler = SecurityHandler {
1632            revision: 3,
1633            key_length_bytes: 5,
1634            encryption_key: vec![0x01; 5],
1635            permissions: Permissions::from_bits(-3904),
1636            encrypt_metadata: true,
1637            stream_cf: CryptFilterMethod::V2,
1638            string_cf: CryptFilterMethod::V2,
1639            o_value: Vec::new(),
1640            u_value: Vec::new(),
1641            encoded_password: b"owner".to_vec(),
1642        };
1643        assert_eq!(handler.get_permissions().bits(), -3904);
1644        assert_eq!(handler.get_permissions(), handler.permissions());
1645    }
1646}