pdf_oxide 0.3.38

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
//! Encryption handler for writing encrypted PDFs.
//!
//! This module provides the EncryptionWriteHandler which encrypts PDF objects
//! (strings and streams) when saving an encrypted PDF.

use super::aes;
use super::algorithms;
use super::rc4;
use super::Algorithm;
use md5::{Digest, Md5};

/// Handler for encrypting PDF objects during write operations.
///
/// This struct manages the encryption state and provides methods for
/// encrypting strings and streams according to the PDF encryption specification.
pub struct EncryptionWriteHandler {
    /// The base encryption key (derived from password)
    encryption_key: Vec<u8>,
    /// The encryption algorithm in use
    algorithm: Algorithm,
    /// Whether to encrypt metadata streams
    encrypt_metadata: bool,
}

impl EncryptionWriteHandler {
    /// Create a new encryption write handler.
    ///
    /// # Arguments
    /// * `user_password` - The user password for the document
    /// * `owner_hash` - The computed owner password hash (O value)
    /// * `permissions` - The permission bits (P value)
    /// * `file_id` - The first element of the file ID array
    /// * `algorithm` - The encryption algorithm to use
    /// * `encrypt_metadata` - Whether to encrypt metadata
    pub fn new(
        user_password: &[u8],
        owner_hash: &[u8],
        permissions: i32,
        file_id: &[u8],
        algorithm: Algorithm,
        encrypt_metadata: bool,
    ) -> Self {
        let (_, revision) = Self::get_version_revision(algorithm);
        let key_length = algorithm.key_length();

        let encryption_key = algorithms::compute_encryption_key(
            user_password,
            owner_hash,
            permissions,
            file_id,
            revision,
            key_length,
            encrypt_metadata,
        );

        Self {
            encryption_key,
            algorithm,
            encrypt_metadata,
        }
    }

    /// Create a handler from an already computed encryption key.
    ///
    /// This is useful when the key has already been derived during
    /// EncryptDict construction.
    pub fn from_key(encryption_key: Vec<u8>, algorithm: Algorithm, encrypt_metadata: bool) -> Self {
        Self {
            encryption_key,
            algorithm,
            encrypt_metadata,
        }
    }

    /// Get the (V, R) version/revision tuple for an algorithm.
    fn get_version_revision(algorithm: Algorithm) -> (u32, u32) {
        match algorithm {
            Algorithm::None => (0, 0),
            Algorithm::RC4_40 => (1, 2),
            Algorithm::Rc4_128 => (2, 3),
            Algorithm::Aes128 => (4, 4),
            Algorithm::Aes256 => (5, 6),
        }
    }

    /// Derive the object-specific encryption key.
    ///
    /// PDF Spec: Algorithm 1 - Encryption key derivation for individual objects
    ///
    /// For R=2-4, the object key is derived by appending the object number
    /// and generation number to the base key, then hashing.
    fn derive_object_key(&self, obj_num: u32, gen_num: u16) -> Vec<u8> {
        let (_, revision) = Self::get_version_revision(self.algorithm);

        // For AES-256 (R=5/6), use the encryption key directly
        if revision >= 5 {
            return self.encryption_key.clone();
        }

        // Algorithm 1: Derive object-specific key
        let mut hasher = Md5::new();

        // Start with the encryption key
        hasher.update(&self.encryption_key);

        // Append object number (3 bytes, little-endian)
        hasher.update(&obj_num.to_le_bytes()[..3]);

        // Append generation number (2 bytes, little-endian)
        hasher.update(gen_num.to_le_bytes());

        // For AES, append the "sAlT" salt bytes
        if self.algorithm.is_aes() {
            hasher.update(b"sAlT");
        }

        let hash = hasher.finalize();

        // Key length is min(n + 5, 16) for RC4, min(n + 5, 16) for AES-128
        let key_length = (self.encryption_key.len() + 5).min(16);
        hash[..key_length].to_vec()
    }

    /// Encrypt a string for a specific object.
    ///
    /// # Arguments
    /// * `data` - The plaintext string data
    /// * `obj_num` - The object number containing this string
    /// * `gen_num` - The generation number
    ///
    /// # Returns
    /// The encrypted data
    pub fn encrypt_string(&self, data: &[u8], obj_num: u32, gen_num: u16) -> Vec<u8> {
        if self.algorithm == Algorithm::None {
            return data.to_vec();
        }

        let key = self.derive_object_key(obj_num, gen_num);
        self.encrypt_with_key(&key, data)
    }

    /// Encrypt a stream for a specific object.
    ///
    /// For AES encryption, a random 16-byte IV is prepended to the ciphertext.
    ///
    /// # Arguments
    /// * `data` - The plaintext stream data
    /// * `obj_num` - The object number containing this stream
    /// * `gen_num` - The generation number
    ///
    /// # Returns
    /// The encrypted data (with IV prepended for AES)
    pub fn encrypt_stream(&self, data: &[u8], obj_num: u32, gen_num: u16) -> Vec<u8> {
        if self.algorithm == Algorithm::None {
            return data.to_vec();
        }

        let key = self.derive_object_key(obj_num, gen_num);
        self.encrypt_with_key(&key, data)
    }

    /// Encrypt data using the specified key.
    fn encrypt_with_key(&self, key: &[u8], data: &[u8]) -> Vec<u8> {
        match self.algorithm {
            Algorithm::None => data.to_vec(),
            Algorithm::RC4_40 | Algorithm::Rc4_128 => rc4::rc4_crypt(key, data),
            Algorithm::Aes128 => {
                // Generate random IV
                let iv = Self::generate_iv();
                match aes::aes128_encrypt(key, &iv, data) {
                    Ok(ciphertext) => {
                        // Prepend IV to ciphertext
                        let mut result = iv.to_vec();
                        result.extend(ciphertext);
                        result
                    },
                    Err(_) => data.to_vec(), // Fallback on error
                }
            },
            Algorithm::Aes256 => {
                // Generate random IV
                let iv = Self::generate_iv();
                match aes::aes256_encrypt(key, &iv, data) {
                    Ok(ciphertext) => {
                        // Prepend IV to ciphertext
                        let mut result = iv.to_vec();
                        result.extend(ciphertext);
                        result
                    },
                    Err(_) => data.to_vec(), // Fallback on error
                }
            },
        }
    }

    /// Generate a random 16-byte IV for AES encryption.
    fn generate_iv() -> [u8; 16] {
        use md5::{Digest, Md5};

        // Generate a UUID and hash it for randomness
        let uuid = uuid::Uuid::new_v4();
        let uuid_bytes = uuid.as_bytes();

        let mut hasher = Md5::new();
        hasher.update(uuid_bytes);

        // Add timestamp for extra entropy
        let now = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap_or_default();
        hasher.update(now.as_nanos().to_le_bytes());

        let hash = hasher.finalize();
        let mut iv = [0u8; 16];
        iv.copy_from_slice(&hash);
        iv
    }

    /// Get the encryption algorithm.
    pub fn algorithm(&self) -> Algorithm {
        self.algorithm
    }

    /// Check if metadata should be encrypted.
    pub fn encrypt_metadata(&self) -> bool {
        self.encrypt_metadata
    }

    /// Get the encryption key (for testing purposes).
    #[cfg(test)]
    pub fn encryption_key(&self) -> &[u8] {
        &self.encryption_key
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_object_key_derivation_rc4() {
        let key = vec![0x01, 0x02, 0x03, 0x04, 0x05]; // 40-bit key
        let handler = EncryptionWriteHandler::from_key(key, Algorithm::RC4_40, true);

        let obj_key1 = handler.derive_object_key(1, 0);
        let obj_key2 = handler.derive_object_key(2, 0);
        let obj_key3 = handler.derive_object_key(1, 1);

        // Different objects should have different keys
        assert_ne!(obj_key1, obj_key2);
        assert_ne!(obj_key1, obj_key3);

        // Key should be derived to correct length (n+5, max 16)
        assert_eq!(obj_key1.len(), 10); // 5 + 5 = 10
    }

    #[test]
    fn test_object_key_derivation_aes128() {
        let key = vec![0u8; 16]; // 128-bit key
        let handler = EncryptionWriteHandler::from_key(key, Algorithm::Aes128, true);

        let obj_key1 = handler.derive_object_key(1, 0);
        let obj_key2 = handler.derive_object_key(2, 0);

        // Different objects should have different keys
        assert_ne!(obj_key1, obj_key2);

        // Key should be 16 bytes (min(16+5, 16))
        assert_eq!(obj_key1.len(), 16);
    }

    #[test]
    fn test_object_key_derivation_aes256() {
        let key = vec![0u8; 32]; // 256-bit key
        let handler = EncryptionWriteHandler::from_key(key.clone(), Algorithm::Aes256, true);

        let obj_key = handler.derive_object_key(1, 0);

        // For AES-256 (R>=5), the key should be unchanged
        assert_eq!(obj_key, key);
    }

    #[test]
    fn test_rc4_encryption_roundtrip() {
        let key = vec![0x01, 0x02, 0x03, 0x04, 0x05];
        let handler = EncryptionWriteHandler::from_key(key, Algorithm::RC4_40, true);

        let plaintext = b"Hello, encrypted world!";
        let ciphertext = handler.encrypt_string(plaintext, 1, 0);

        // RC4 is symmetric - encrypt again to decrypt
        let obj_key = handler.derive_object_key(1, 0);
        let decrypted = rc4::rc4_crypt(&obj_key, &ciphertext);

        assert_eq!(&decrypted, plaintext);
    }

    #[test]
    fn test_aes_encryption() {
        let key = vec![0u8; 16];
        let handler = EncryptionWriteHandler::from_key(key, Algorithm::Aes128, true);

        let plaintext = b"Hello, AES encrypted world!";
        let ciphertext = handler.encrypt_stream(plaintext, 1, 0);

        // Ciphertext should have IV prepended (16 bytes) + encrypted data
        assert!(ciphertext.len() >= 16);

        // First 16 bytes are the IV
        let iv = &ciphertext[..16];
        let encrypted = &ciphertext[16..];

        // Decrypt to verify
        let obj_key = handler.derive_object_key(1, 0);
        let decrypted = aes::aes128_decrypt(&obj_key, iv, encrypted).unwrap();

        assert_eq!(&decrypted, plaintext);
    }
}