oxidize-pdf 2.4.2

A pure Rust PDF generation and manipulation library with zero external dependencies
Documentation
//! Cross-validation tests for encryption compatibility with other PDF libraries.
//!
//! These tests verify that oxidize-pdf can correctly validate passwords and
//! recover encryption keys from PDFs generated by different tools:
//! - qpdf (already covered in encryption_r5_real_pdf_test.rs and encryption_r6_real_pdf_test.rs)
//! - pypdf (Python library)
//!
//! This ensures interoperability with the wider PDF ecosystem.

use oxidize_pdf::encryption::{StandardSecurityHandler, UserPassword};

const FIXTURES_DIR: &str = "tests/fixtures";

/// Test helper to read PDF bytes
fn read_pdf_bytes(filename: &str) -> Vec<u8> {
    let path = format!("{}/{}", FIXTURES_DIR, filename);
    std::fs::read(&path).unwrap_or_else(|_| panic!("Failed to read {}", path))
}

/// Extract U entry bytes from PDF (48 bytes for R5/R6)
fn extract_u_entry(pdf_bytes: &[u8]) -> Option<Vec<u8>> {
    let pdf_str = String::from_utf8_lossy(pdf_bytes);

    // Try multiple patterns
    for pattern in ["/U <", "/U<", "/U ("] {
        if let Some(pos) = pdf_str.find(pattern) {
            let start = pos + pattern.len() - 1;
            let rest = &pdf_str[start..];

            if rest.starts_with('<') {
                if let Some(end) = rest[1..].find('>') {
                    let hex = &rest[1..end + 1];
                    return Some(hex_to_bytes(hex));
                }
            } else if rest.starts_with('(') {
                return extract_literal_string(rest);
            }
        }
    }
    None
}

/// Extract UE entry bytes from PDF (32 bytes)
fn extract_ue_entry(pdf_bytes: &[u8]) -> Option<Vec<u8>> {
    let pdf_str = String::from_utf8_lossy(pdf_bytes);

    for pattern in ["/UE <", "/UE<", "/UE ("] {
        if let Some(pos) = pdf_str.find(pattern) {
            let start = pos + pattern.len() - 1;
            let rest = &pdf_str[start..];

            if rest.starts_with('<') {
                if let Some(end) = rest[1..].find('>') {
                    let hex = &rest[1..end + 1];
                    return Some(hex_to_bytes(hex));
                }
            } else if rest.starts_with('(') {
                return extract_literal_string(rest);
            }
        }
    }
    None
}

/// Extract Perms entry bytes from PDF (16 bytes, R6 only)
fn extract_perms_entry(pdf_bytes: &[u8]) -> Option<Vec<u8>> {
    let pdf_str = String::from_utf8_lossy(pdf_bytes);

    for pattern in ["/Perms <", "/Perms<", "/Perms ("] {
        if let Some(pos) = pdf_str.find(pattern) {
            let start = pos + pattern.len() - 1;
            let rest = &pdf_str[start..];

            if rest.starts_with('<') {
                if let Some(end) = rest[1..].find('>') {
                    let hex = &rest[1..end + 1];
                    return Some(hex_to_bytes(hex));
                }
            } else if rest.starts_with('(') {
                return extract_literal_string(rest);
            }
        }
    }
    None
}

/// Extract encryption revision from PDF
fn extract_revision(pdf_bytes: &[u8]) -> Option<i32> {
    let pdf_str = String::from_utf8_lossy(pdf_bytes);

    for pattern in ["/R ", "/R\n", "/R\t", "/R/"] {
        if let Some(pos) = pdf_str.find(pattern) {
            let start = pos + pattern.len();
            let rest = &pdf_str[start..];
            let end = rest
                .find(|c: char| !c.is_ascii_digit())
                .unwrap_or(rest.len());
            if end > 0 {
                if let Ok(r) = rest[..end].parse() {
                    return Some(r);
                }
            }
        }
    }
    None
}

/// Convert hex string to bytes
fn hex_to_bytes(hex: &str) -> Vec<u8> {
    let clean: String = hex.chars().filter(|c| c.is_ascii_hexdigit()).collect();
    (0..clean.len())
        .step_by(2)
        .filter_map(|i| u8::from_str_radix(&clean[i..i + 2], 16).ok())
        .collect()
}

/// Extract literal string from PDF (handles escapes)
fn extract_literal_string(s: &str) -> Option<Vec<u8>> {
    if !s.starts_with('(') {
        return None;
    }

    let mut bytes = Vec::new();
    let mut chars = s[1..].chars().peekable();
    let mut depth = 1;

    while let Some(c) = chars.next() {
        if depth == 0 {
            break;
        }

        match c {
            '(' => {
                depth += 1;
                bytes.push(b'(');
            }
            ')' => {
                depth -= 1;
                if depth > 0 {
                    bytes.push(b')');
                }
            }
            '\\' => match chars.next() {
                Some('n') => bytes.push(b'\n'),
                Some('r') => bytes.push(b'\r'),
                Some('t') => bytes.push(b'\t'),
                Some('b') => bytes.push(0x08),
                Some('f') => bytes.push(0x0C),
                Some('(') => bytes.push(b'('),
                Some(')') => bytes.push(b')'),
                Some('\\') => bytes.push(b'\\'),
                Some(d) if d.is_ascii_digit() => {
                    let mut oct = String::new();
                    oct.push(d);
                    for _ in 0..2 {
                        if chars
                            .peek()
                            .map(|&c| c.is_ascii_digit() && c < '8')
                            .unwrap_or(false)
                        {
                            oct.push(chars.next().unwrap());
                        }
                    }
                    if let Ok(val) = u8::from_str_radix(&oct, 8) {
                        bytes.push(val);
                    }
                }
                Some(other) => bytes.push(other as u8),
                None => {}
            },
            _ => bytes.push(c as u8),
        }
    }

    Some(bytes)
}

// ============================================================================
// pypdf Cross-Validation Tests (R6 - AES-256 with Algorithm 2.B)
// ============================================================================

#[test]
fn test_pypdf_fixture_exists() {
    let path = std::path::Path::new(FIXTURES_DIR).join("encrypted_pypdf_aes256_user.pdf");
    assert!(
        path.exists(),
        "pypdf fixture not found. Run: python3 tools/scripts/generate_pypdf_encrypted.py"
    );
}

#[test]
fn test_pypdf_r6_password_validation() {
    let pdf_bytes = read_pdf_bytes("encrypted_pypdf_aes256_user.pdf");

    let revision = extract_revision(&pdf_bytes).expect("Failed to extract revision");
    assert_eq!(revision, 6, "pypdf should generate R6 encrypted PDFs");

    let u_entry = extract_u_entry(&pdf_bytes).expect("Failed to extract U entry");
    assert_eq!(u_entry.len(), 48, "R6 U entry should be 48 bytes");

    let handler = StandardSecurityHandler::aes_256_r6();
    let correct_password = UserPassword("pypdf_test".to_string());
    let wrong_password = UserPassword("wrong".to_string());

    // Validate correct password
    let is_valid = handler
        .validate_r6_user_password(&correct_password, &u_entry)
        .expect("Validation should not error");

    assert!(is_valid, "pypdf R6: correct password should validate");

    // Validate wrong password
    let is_invalid = handler
        .validate_r6_user_password(&wrong_password, &u_entry)
        .expect("Validation should not error");

    assert!(!is_invalid, "pypdf R6: wrong password should not validate");
}

#[test]
fn test_pypdf_r6_empty_password() {
    let pdf_bytes = read_pdf_bytes("encrypted_pypdf_aes256_empty.pdf");

    let u_entry = extract_u_entry(&pdf_bytes).expect("Failed to extract U entry");

    let handler = StandardSecurityHandler::aes_256_r6();
    let empty_password = UserPassword(String::new());

    let is_valid = handler
        .validate_r6_user_password(&empty_password, &u_entry)
        .expect("Validation should not error");

    assert!(is_valid, "pypdf R6: empty password should validate");
}

#[test]
#[ignore = "pypdf uses different Unicode normalization (SASLprep) - known limitation"]
fn test_pypdf_r6_unicode_password() {
    // NOTE: This test may fail due to differences in Unicode normalization between
    // pypdf and oxidize-pdf. pypdf uses stringprep/SASLprep (RFC 4013) while
    // oxidize-pdf currently uses raw UTF-8.
    //
    // This is a known limitation documented in CLAUDE.md and will be addressed
    // in a future release with full SASLprep support.
    let pdf_bytes = read_pdf_bytes("encrypted_pypdf_aes256_spanish.pdf");

    let u_entry = extract_u_entry(&pdf_bytes).expect("Failed to extract U entry");

    let handler = StandardSecurityHandler::aes_256_r6();
    let unicode_password = UserPassword("Contraseña123".to_string());

    let is_valid = handler
        .validate_r6_user_password(&unicode_password, &u_entry)
        .expect("Validation should not error");

    assert!(is_valid, "pypdf R6: Unicode password should validate");
}

#[test]
fn test_pypdf_r6_key_recovery() {
    let pdf_bytes = read_pdf_bytes("encrypted_pypdf_aes256_user.pdf");

    let u_entry = extract_u_entry(&pdf_bytes).expect("Failed to extract U entry");
    let ue_entry = extract_ue_entry(&pdf_bytes).expect("Failed to extract UE entry");

    assert_eq!(ue_entry.len(), 32, "UE entry should be 32 bytes");

    let handler = StandardSecurityHandler::aes_256_r6();
    let password = UserPassword("pypdf_test".to_string());

    let key = handler
        .recover_r6_encryption_key(&password, &u_entry, &ue_entry)
        .expect("Key recovery should succeed");

    assert_eq!(key.len(), 32, "Recovered key should be 32 bytes");
}

#[test]
fn test_pypdf_r6_perms_exists() {
    let pdf_bytes = read_pdf_bytes("encrypted_pypdf_aes256_user.pdf");

    if let Some(perms) = extract_perms_entry(&pdf_bytes) {
        assert_eq!(perms.len(), 16, "Perms entry should be 16 bytes");
        println!("pypdf R6: Perms entry found ({} bytes)", perms.len());
    } else {
        println!("Note: pypdf R6 PDF does not have Perms entry (may be R5)");
    }
}

// ============================================================================
// Summary Test
// ============================================================================

#[test]
fn test_cross_validation_summary() {
    println!("\n=== Cross-Validation Summary ===\n");

    let qpdf_r5 = std::path::Path::new(FIXTURES_DIR).join("encrypted_aes256_r5_user.pdf");
    let qpdf_r6 = std::path::Path::new(FIXTURES_DIR).join("encrypted_aes256_r6_user.pdf");
    let pypdf_r6 = std::path::Path::new(FIXTURES_DIR).join("encrypted_pypdf_aes256_user.pdf");

    println!("qpdf R5:  {}", if qpdf_r5.exists() { "" } else { "" });
    println!("qpdf R6:  {}", if qpdf_r6.exists() { "" } else { "" });
    println!("pypdf R6: {}", if pypdf_r6.exists() { "" } else { "" });

    println!("\n================================\n");
}