pdf2john 0.1.1

Extract a hash from an encrypted PDF for cracking with John the Ripper or Hashcat
Documentation
use std::fmt;
use std::path::Path;

use lopdf::Document;

#[derive(Debug)]
pub enum ExtractError {
    Io(std::io::Error),
    Pdf(lopdf::Error),
    NotEncrypted,
    MissingField(&'static str),
    InvalidField(&'static str),
}

impl fmt::Display for ExtractError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            ExtractError::Io(e) => write!(f, "I/O error: {e}"),
            ExtractError::Pdf(e) => write!(f, "PDF error: {e}"),
            ExtractError::NotEncrypted => write!(f, "File is not encrypted"),
            ExtractError::MissingField(name) => write!(f, "Missing field: {name}"),
            ExtractError::InvalidField(name) => write!(f, "Invalid field: {name}"),
        }
    }
}

impl std::error::Error for ExtractError {}

impl From<std::io::Error> for ExtractError {
    fn from(e: std::io::Error) -> Self {
        ExtractError::Io(e)
    }
}

impl From<lopdf::Error> for ExtractError {
    fn from(e: lopdf::Error) -> Self {
        ExtractError::Pdf(e)
    }
}

pub struct PdfHashExtractor {
    pub algorithm: i64,
    pub revision: i64,
    pub length: i64,
    pub permissions: i64,
    pub encrypt_metadata: bool,
    pub document_id: Vec<u8>,
    pub user_password: Vec<u8>,
    pub owner_password: Vec<u8>,
    pub owner_encryption_seed: Option<Vec<u8>>,
    pub user_encryption_seed: Option<Vec<u8>>,
}

fn max_password_length(revision: i64) -> usize {
    match revision {
        2 | 3 | 4 => 32,
        5 | 6 => 48,
        _ => 48,
    }
}

fn get_integer(dict: &lopdf::Dictionary, key: &[u8]) -> Option<i64> {
    dict.get(key).ok().and_then(|v| v.as_i64().ok())
}

fn get_bytes(dict: &lopdf::Dictionary, key: &[u8]) -> Option<Vec<u8>> {
    dict.get(key).ok().and_then(|v| match v {
        lopdf::Object::String(bytes, _) => Some(bytes.clone()),
        _ => None,
    })
}

impl PdfHashExtractor {
    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, ExtractError> {
        let doc = Document::load(path)?;

        let encrypt_dict = doc
            .get_encrypted()
            .map_err(|_| ExtractError::NotEncrypted)?;

        let algorithm = get_integer(encrypt_dict, b"V")
            .ok_or(ExtractError::MissingField("/V"))?;
        let revision = get_integer(encrypt_dict, b"R")
            .ok_or(ExtractError::MissingField("/R"))?;
        let length = get_integer(encrypt_dict, b"Length").unwrap_or(40);
        let raw_p = get_integer(encrypt_dict, b"P")
            .ok_or(ExtractError::MissingField("/P"))?;
        let permissions = (raw_p as i32) as i64;

        let encrypt_metadata = match encrypt_dict.get(b"EncryptMetadata") {
            Ok(lopdf::Object::Boolean(b)) => *b,
            _ => true,
        };

        let document_id = doc
            .trailer
            .get(b"ID")
            .map_err(|_| ExtractError::MissingField("/ID"))?
            .as_array()
            .map_err(|_| ExtractError::InvalidField("/ID"))?
            .first()
            .ok_or(ExtractError::InvalidField("/ID"))?
            .as_str()
            .map_err(|_| ExtractError::InvalidField("/ID"))?
            .to_vec();

        let max_len = max_password_length(revision);

        let u_data = get_bytes(encrypt_dict, b"U")
            .ok_or(ExtractError::MissingField("/U"))?;
        let user_password = u_data[..max_len.min(u_data.len())].to_vec();

        let o_data = get_bytes(encrypt_dict, b"O")
            .ok_or(ExtractError::MissingField("/O"))?;
        let owner_password = o_data[..max_len.min(o_data.len())].to_vec();

        let owner_encryption_seed = get_bytes(encrypt_dict, b"OE")
            .map(|d| d[..max_len.min(d.len())].to_vec());
        let user_encryption_seed = get_bytes(encrypt_dict, b"UE")
            .map(|d| d[..max_len.min(d.len())].to_vec());

        Ok(PdfHashExtractor {
            algorithm,
            revision,
            length,
            permissions,
            encrypt_metadata,
            document_id,
            user_password,
            owner_password,
            owner_encryption_seed,
            user_encryption_seed,
        })
    }

    pub fn format_hash(&self) -> String {
        let encrypt_metadata_flag = if self.encrypt_metadata { 1 } else { 0 };
        let id_hex = hex::encode(&self.document_id);
        let u_hex = hex::encode(&self.user_password);
        let o_hex = hex::encode(&self.owner_password);

        let mut result = format!(
            "$pdf${}*{}*{}*{}*{}*{}*{}*{}*{}*{}*{}",
            self.algorithm,
            self.revision,
            self.length,
            self.permissions,
            encrypt_metadata_flag,
            self.document_id.len(),
            id_hex,
            self.user_password.len(),
            u_hex,
            self.owner_password.len(),
            o_hex,
        );

        if let Some(ref oe) = self.owner_encryption_seed {
            let oe_hex = hex::encode(oe);
            result.push_str(&format!("*{}*{}", oe.len(), oe_hex));
        }

        if let Some(ref ue) = self.user_encryption_seed {
            let ue_hex = hex::encode(ue);
            result.push_str(&format!("*{}*{}", ue.len(), ue_hex));
        }

        result
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_max_password_length() {
        assert_eq!(max_password_length(2), 32);
        assert_eq!(max_password_length(3), 32);
        assert_eq!(max_password_length(4), 32);
        assert_eq!(max_password_length(5), 48);
        assert_eq!(max_password_length(6), 48);
        assert_eq!(max_password_length(99), 48);
    }

    #[test]
    fn test_r2_rc4_40() {
        let extractor = PdfHashExtractor::from_file("docs/r2_rc4_40.pdf")
            .expect("Failed to extract hash from r2_rc4_40.pdf");
        let hash = extractor.format_hash();
        let expected = include_str!("../docs/r2_rc4_40.txt").trim();
        assert_eq!(hash, expected);
    }

    #[test]
    fn test_r3_rc4_128() {
        let extractor = PdfHashExtractor::from_file("docs/r3_rc4_128.pdf")
            .expect("Failed to extract hash from r3_rc4_128.pdf");
        let hash = extractor.format_hash();
        let expected = include_str!("../docs/r3_rc4_128.txt").trim();
        assert_eq!(hash, expected);
    }

    #[test]
    fn test_r4_aes_128() {
        let extractor = PdfHashExtractor::from_file("docs/r4_aes_128.pdf")
            .expect("Failed to extract hash from r4_aes_128.pdf");
        let hash = extractor.format_hash();
        let expected = include_str!("../docs/r4_aes_128.txt").trim();
        assert_eq!(hash, expected);
    }

    #[test]
    fn test_r6_aes_256() {
        let extractor = PdfHashExtractor::from_file("docs/r6_aes_256.pdf")
            .expect("Failed to extract hash from r6_aes_256.pdf");
        let hash = extractor.format_hash();
        let expected = include_str!("../docs/r6_aes_256.txt").trim();
        assert_eq!(hash, expected);
    }
}