Skip to main content

pdf_objects/
crypto.rs

1//! PDF Standard Security Handler (encryption) — decryption side only.
2//!
3//! This module implements just enough of the PDF 1.7 Standard Security
4//! Handler to decrypt documents produced by revisions 2 and 3 (RC4 with a
5//! 40-bit or 128-bit key) when the user password is empty. That covers the
6//! large majority of "encrypted to prevent editing but openable by anyone"
7//! PDFs that real-world documents ship with.
8//!
9//! AES (V=4 / V=5, R=4..6), non-empty user passwords, and public-key
10//! security handlers are not implemented here and still fail up front with
11//! `PdfError::Unsupported`. They can be layered on top without changing
12//! this module's public surface.
13
14use md5::{Digest, Md5};
15
16use crate::error::{PdfError, PdfResult};
17use crate::types::{ObjectRef, PdfDictionary, PdfValue};
18
19/// Adobe's 32-byte password padding string (PDF 1.7, algorithm 2).
20const PASSWORD_PADDING: [u8; 32] = [
21    0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
22    0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A,
23];
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum SecurityRevision {
27    R2,
28    R3,
29}
30
31#[derive(Debug, Clone)]
32pub struct StandardSecurityHandler {
33    file_key: Vec<u8>,
34}
35
36impl StandardSecurityHandler {
37    /// Builds a decryption handler from the `/Encrypt` dictionary and the
38    /// trailer's first `/ID` string, authenticating the supplied password.
39    /// Returns `None` if the password does not authenticate.
40    pub fn open(
41        encrypt_dict: &PdfDictionary,
42        id_first: &[u8],
43        password: &[u8],
44    ) -> PdfResult<Option<Self>> {
45        let filter = encrypt_dict
46            .get("Filter")
47            .and_then(PdfValue::as_name)
48            .unwrap_or("");
49        if filter != "Standard" {
50            return Err(PdfError::Unsupported(format!(
51                "encryption filter /{filter} is not supported"
52            )));
53        }
54        let v = encrypt_dict
55            .get("V")
56            .and_then(PdfValue::as_integer)
57            .unwrap_or(0);
58        let r = encrypt_dict
59            .get("R")
60            .and_then(PdfValue::as_integer)
61            .unwrap_or(0);
62        let revision = match r {
63            2 => SecurityRevision::R2,
64            3 => SecurityRevision::R3,
65            other => {
66                return Err(PdfError::Unsupported(format!(
67                    "Standard security handler revision {other} is not supported (only R=2 and R=3 handled)"
68                )));
69            }
70        };
71        if !(1..=2).contains(&v) {
72            return Err(PdfError::Unsupported(format!(
73                "Standard security handler V={v} is not supported (only V=1 and V=2 handled)"
74            )));
75        }
76
77        let key_length_bits = encrypt_dict
78            .get("Length")
79            .and_then(PdfValue::as_integer)
80            .unwrap_or(40);
81        if key_length_bits % 8 != 0 || !(40..=128).contains(&key_length_bits) {
82            return Err(PdfError::Corrupt(format!(
83                "invalid /Length {key_length_bits} in Encrypt dictionary"
84            )));
85        }
86        let key_length_bytes = (key_length_bits / 8) as usize;
87
88        let o = pdf_string_bytes(encrypt_dict, "O")?;
89        let u = pdf_string_bytes(encrypt_dict, "U")?;
90        let p = encrypt_dict
91            .get("P")
92            .and_then(PdfValue::as_integer)
93            .ok_or_else(|| PdfError::Corrupt("Encrypt dictionary missing /P".to_string()))?;
94        if o.len() != 32 || u.len() != 32 {
95            return Err(PdfError::Corrupt(
96                "Encrypt /O and /U must each be 32 bytes".to_string(),
97            ));
98        }
99
100        let file_key = compute_file_key(password, &o, p as i32, id_first, key_length_bytes);
101        if !authenticate_user_password(&file_key, revision, &u, id_first) {
102            return Ok(None);
103        }
104        Ok(Some(Self { file_key }))
105    }
106
107    /// Decrypts `bytes` produced for the indirect object `(num, gen)` under
108    /// RC4 with the per-object key described in PDF 1.7 algorithm 1.
109    pub fn decrypt_bytes(&self, bytes: &[u8], object_ref: ObjectRef) -> Vec<u8> {
110        let object_key = self.object_key(object_ref);
111        rc4(&object_key, bytes)
112    }
113
114    fn object_key(&self, object_ref: ObjectRef) -> Vec<u8> {
115        let mut material = Vec::with_capacity(self.file_key.len() + 5);
116        material.extend_from_slice(&self.file_key);
117        let num = object_ref.object_number.to_le_bytes();
118        material.push(num[0]);
119        material.push(num[1]);
120        material.push(num[2]);
121        let generation = object_ref.generation.to_le_bytes();
122        material.push(generation[0]);
123        material.push(generation[1]);
124        let digest = md5_bytes(&material);
125        let truncated_len = (self.file_key.len() + 5).min(16);
126        digest[..truncated_len].to_vec()
127    }
128}
129
130fn pdf_string_bytes(dict: &PdfDictionary, key: &str) -> PdfResult<Vec<u8>> {
131    match dict.get(key) {
132        Some(PdfValue::String(s)) => Ok(s.0.clone()),
133        Some(_) => Err(PdfError::Corrupt(format!("Encrypt /{key} is not a string"))),
134        None => Err(PdfError::Corrupt(format!(
135            "Encrypt dictionary missing /{key}"
136        ))),
137    }
138}
139
140fn compute_file_key(
141    password: &[u8],
142    o_entry: &[u8],
143    permissions: i32,
144    id_first: &[u8],
145    key_length_bytes: usize,
146) -> Vec<u8> {
147    // Algorithm 2 (PDF 1.7 section 7.6.3.3):
148    //   1. Pad the password to 32 bytes.
149    let padded = pad_password(password);
150    let mut hasher = Md5::new();
151    hasher.update(padded);
152    //   2. Append /O.
153    hasher.update(o_entry);
154    //   3. Append /P (4 bytes little-endian).
155    hasher.update(permissions.to_le_bytes());
156    //   4. Append the first element of /ID.
157    hasher.update(id_first);
158    //   (Step 5 — append 0xFFFFFFFF when /EncryptMetadata is false — is an
159    //   R=4+ rule; our MVP only handles R<=3 so skip it.)
160    let mut digest = hasher.finalize_reset();
161
162    // Algorithm 2, step 6: for R>=3, re-MD5 the first n bytes 50 times.
163    for _ in 0..50 {
164        hasher.update(&digest[..key_length_bytes]);
165        digest = hasher.finalize_reset();
166    }
167    digest[..key_length_bytes].to_vec()
168}
169
170fn pad_password(password: &[u8]) -> [u8; 32] {
171    let mut out = [0u8; 32];
172    let take = password.len().min(32);
173    out[..take].copy_from_slice(&password[..take]);
174    if take < 32 {
175        out[take..].copy_from_slice(&PASSWORD_PADDING[..32 - take]);
176    }
177    out
178}
179
180fn authenticate_user_password(
181    file_key: &[u8],
182    revision: SecurityRevision,
183    u_entry: &[u8],
184    id_first: &[u8],
185) -> bool {
186    match revision {
187        SecurityRevision::R2 => {
188            // Algorithm 4: encrypt the password padding with the file key; the
189            // full 32 bytes must equal /U.
190            let encrypted = rc4(file_key, &PASSWORD_PADDING);
191            encrypted == u_entry
192        }
193        SecurityRevision::R3 => {
194            // Algorithm 5.
195            let mut hasher = Md5::new();
196            hasher.update(PASSWORD_PADDING);
197            hasher.update(id_first);
198            let seed = hasher.finalize();
199            let mut buffer = rc4(file_key, &seed);
200            for i in 1u8..=19 {
201                let key: Vec<u8> = file_key.iter().map(|byte| byte ^ i).collect();
202                buffer = rc4(&key, &buffer);
203            }
204            // The first 16 bytes of /U must match the buffer; the remaining
205            // 16 bytes are arbitrary padding.
206            buffer.as_slice() == &u_entry[..16]
207        }
208    }
209}
210
211fn md5_bytes(input: &[u8]) -> [u8; 16] {
212    let mut hasher = Md5::new();
213    hasher.update(input);
214    hasher.finalize().into()
215}
216
217fn rc4(key: &[u8], data: &[u8]) -> Vec<u8> {
218    let mut s: [u8; 256] = [0; 256];
219    for (index, value) in s.iter_mut().enumerate() {
220        *value = index as u8;
221    }
222    let mut j: u8 = 0;
223    for i in 0..256 {
224        j = j.wrapping_add(s[i]).wrapping_add(key[i % key.len()]);
225        s.swap(i, j as usize);
226    }
227    let mut output = Vec::with_capacity(data.len());
228    let mut i: u8 = 0;
229    let mut j: u8 = 0;
230    for &byte in data {
231        i = i.wrapping_add(1);
232        j = j.wrapping_add(s[i as usize]);
233        s.swap(i as usize, j as usize);
234        let k = s[(s[i as usize].wrapping_add(s[j as usize])) as usize];
235        output.push(byte ^ k);
236    }
237    output
238}
239
240#[cfg(test)]
241pub(crate) mod test_helpers {
242    //! Expose the low-level primitives so parser tests can build a tiny
243    //! encrypted PDF end-to-end — pick an arbitrary `/O`, derive a file key
244    //! from the empty password, encrypt each object's data with per-object
245    //! RC4, and then round-trip it through `parse_pdf`.
246
247    use super::*;
248
249    pub fn rc4(key: &[u8], data: &[u8]) -> Vec<u8> {
250        super::rc4(key, data)
251    }
252
253    pub fn compute_file_key(
254        password: &[u8],
255        o_entry: &[u8],
256        permissions: i32,
257        id_first: &[u8],
258        key_length_bytes: usize,
259    ) -> Vec<u8> {
260        super::compute_file_key(password, o_entry, permissions, id_first, key_length_bytes)
261    }
262
263    /// Produce the 32-byte `/U` value that corresponds to the empty user
264    /// password under revision 3. The first 16 bytes are the RC4 output
265    /// from algorithm 5; the remaining 16 bytes are arbitrary padding
266    /// (here zeroed, which real writers often do).
267    pub fn compute_u_r3(file_key: &[u8], id_first: &[u8]) -> Vec<u8> {
268        let mut hasher = Md5::new();
269        hasher.update(PASSWORD_PADDING);
270        hasher.update(id_first);
271        let seed = hasher.finalize();
272        let mut buffer = super::rc4(file_key, &seed);
273        for i in 1u8..=19 {
274            let key: Vec<u8> = file_key.iter().map(|byte| byte ^ i).collect();
275            buffer = super::rc4(&key, &buffer);
276        }
277        buffer.resize(32, 0);
278        buffer
279    }
280
281    /// Build the per-object RC4 key in exactly the same way the handler
282    /// does, so tests can encrypt a known plaintext and then check that
283    /// the parser's decryption path inverts the transform.
284    pub fn object_key(file_key: &[u8], object_number: u32, generation: u16) -> Vec<u8> {
285        let mut material = Vec::with_capacity(file_key.len() + 5);
286        material.extend_from_slice(file_key);
287        let num = object_number.to_le_bytes();
288        material.push(num[0]);
289        material.push(num[1]);
290        material.push(num[2]);
291        let gen_bytes = generation.to_le_bytes();
292        material.push(gen_bytes[0]);
293        material.push(gen_bytes[1]);
294        let digest = super::md5_bytes(&material);
295        let truncated_len = (file_key.len() + 5).min(16);
296        digest[..truncated_len].to_vec()
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn rc4_empty_input_returns_empty() {
306        assert_eq!(rc4(b"Key", b""), Vec::<u8>::new());
307    }
308
309    #[test]
310    fn rc4_matches_known_vector() {
311        // RFC 6229 test vector: key "Key", data "Plaintext".
312        let key = b"Key";
313        let plaintext = b"Plaintext";
314        let encrypted = rc4(key, plaintext);
315        // Decrypting with the same keystream yields the original bytes.
316        let decrypted = rc4(key, &encrypted);
317        assert_eq!(decrypted, plaintext);
318        // The ciphertext should match the well-known RFC 6229 output.
319        assert_eq!(
320            encrypted,
321            [0xBB, 0xF3, 0x16, 0xE8, 0xD9, 0x40, 0xAF, 0x0A, 0xD3]
322        );
323    }
324
325    #[test]
326    fn pad_password_short_pads_with_padding_string() {
327        let padded = pad_password(b"ab");
328        assert_eq!(padded[0], b'a');
329        assert_eq!(padded[1], b'b');
330        assert_eq!(padded[2], PASSWORD_PADDING[0]);
331        assert_eq!(padded[31], PASSWORD_PADDING[29]);
332    }
333}