use crate::{
error::{LupinError, Result},
SteganographyEngine,
};
use base64::{engine::general_purpose, Engine as _};
use log::debug;
pub struct PdfEngine;
impl PdfEngine {
pub fn new() -> Self {
Self
}
fn find_eof_end(&self, pdf: &[u8]) -> Option<usize> {
let eof_marker = b"%%EOF";
pdf.windows(eof_marker.len())
.rposition(|window| window == eof_marker)
.map(|pos| pos + eof_marker.len())
}
}
impl Default for PdfEngine {
fn default() -> Self {
Self::new()
}
}
impl SteganographyEngine for PdfEngine {
fn magic_bytes(&self) -> &[u8] {
b"%PDF"
}
fn format_name(&self) -> &str {
"PDF"
}
fn format_ext(&self) -> &str {
".pdf"
}
fn embed(&self, source_data: &[u8], payload: &[u8]) -> Result<Vec<u8>> {
let eof_end = self
.find_eof_end(source_data)
.ok_or(LupinError::PdfNoEofMarker)?;
debug!("PDF: Found %%EOF at position {}", eof_end - 5);
let encoded_payload = general_purpose::STANDARD.encode(payload);
let content_after_eof = &source_data[eof_end..];
let has_non_whitespace = content_after_eof.iter().any(|&b| !b.is_ascii_whitespace());
if has_non_whitespace {
return Err(LupinError::EmbedCollision {
source: std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
"PDF: Already contains some data after %%EOF",
),
});
}
let mut result = Vec::with_capacity(eof_end + encoded_payload.len());
result.extend_from_slice(&source_data[..eof_end]);
result.extend_from_slice(encoded_payload.as_bytes());
Ok(result)
}
fn extract(&self, source_data: &[u8]) -> Result<Vec<u8>> {
let eof_marker = b"%%EOF";
let eof_pos = source_data
.windows(eof_marker.len())
.rposition(|w| w == eof_marker)
.ok_or(LupinError::PdfNoEofMarker)?;
debug!("PDF: Found %%EOF at position {}", eof_pos);
let payload_start = eof_pos + eof_marker.len();
let payload = &source_data[payload_start..];
let payload: Vec<u8> = payload
.iter()
.skip_while(|&&b| b.is_ascii_whitespace())
.copied()
.collect();
if payload.is_empty() {
return Err(LupinError::PdfNoHiddenData);
}
general_purpose::STANDARD
.decode(&payload)
.map_err(|_| LupinError::PdfCorruptedData)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_minimal_pdf() -> Vec<u8> {
b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj\nxref\n0 1\n0000000000 65535 f\ntrailer\n<<\n/Size 1\n/Root 1 0 R\n>>\nstartxref\n73\n%%EOF".to_vec()
}
fn create_invalid_pdf_no_eof() -> Vec<u8> {
b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj".to_vec()
}
#[test]
fn test_magic_bytes() {
let engine = PdfEngine::new();
assert_eq!(engine.magic_bytes(), b"%PDF"); }
#[test]
fn test_format_name() {
let engine = PdfEngine::new();
assert_eq!(engine.format_name(), "PDF"); }
#[test]
fn test_format_ext() {
let engine = PdfEngine::new();
assert_eq!(engine.format_ext(), ".pdf"); }
#[test]
fn test_find_eof_end() {
let engine = PdfEngine::new();
let pdf = create_minimal_pdf();
let eof_end = engine.find_eof_end(&pdf);
assert!(eof_end.is_some()); let pos = eof_end.unwrap();
assert_eq!(pos, 125); assert_eq!(&pdf[120..125], b"%%EOF"); }
#[test]
fn test_find_eof_end_multiple_eof() {
let engine = PdfEngine::new();
let mut pdf = create_minimal_pdf();
pdf.extend_from_slice(b"\n%%EOF\nfake_data");
let eof_end = engine.find_eof_end(&pdf);
assert!(eof_end.is_some()); let pos = eof_end.unwrap();
assert_eq!(pos, 131); assert_eq!(&pdf[126..131], b"%%EOF"); assert_eq!(&pdf[131..], b"\nfake_data"); }
#[test]
fn test_find_eof_end_no_eof() {
let engine = PdfEngine::new();
let pdf = create_invalid_pdf_no_eof();
let eof_end = engine.find_eof_end(&pdf);
assert!(eof_end.is_none()); }
#[test]
fn test_embed_success() {
let engine = PdfEngine::new();
let pdf = create_minimal_pdf();
let payload = b"secret message";
let result = engine.embed(&pdf, payload);
assert!(result.is_ok());
let embedded = result.unwrap();
assert_eq!(embedded.len(), 145); assert!(embedded.starts_with(b"%PDF")); assert!(embedded.ends_with(b"c2VjcmV0IG1lc3NhZ2U=")); }
#[test]
fn test_embed_no_eof_marker() {
let engine = PdfEngine::new();
let invalid_pdf = create_invalid_pdf_no_eof();
let payload = b"secret message";
let result = engine.embed(&invalid_pdf, payload);
assert!(result.is_err());
match result.unwrap_err() {
LupinError::PdfNoEofMarker => (), other => panic!("Expected PdfNoEofMarker, got {:?}", other),
}
}
#[test]
fn test_embed_empty_payload() {
let engine = PdfEngine::new();
let pdf = create_minimal_pdf();
let payload = b"";
let result = engine.embed(&pdf, payload);
assert!(result.is_ok());
let embedded = result.unwrap();
assert_eq!(embedded.len(), 125); }
#[test]
fn test_embed_into_already_embedded_pdf() {
let engine = PdfEngine::new();
let mut pdf = create_minimal_pdf();
pdf.extend_from_slice(b"c2VjcmV0IG1lc3NhZ2U=");
let result = engine.embed(&pdf, "more secret".as_bytes());
assert!(result.is_err());
match result.unwrap_err() {
LupinError::EmbedCollision { .. } => (), other => panic!("Expected EmbedCollision, got {:?}", other),
}
}
#[test]
fn test_extract_success() {
let engine = PdfEngine::new();
let mut pdf = create_minimal_pdf();
pdf.extend_from_slice(b"c2VjcmV0IG1lc3NhZ2U=");
let result = engine.extract(&pdf);
assert!(result.is_ok());
let extracted_payload = result.unwrap();
assert_eq!(extracted_payload, b"secret message"); }
#[test]
fn test_extract_no_eof_marker() {
let engine = PdfEngine::new();
let invalid_pdf = create_invalid_pdf_no_eof();
let result = engine.extract(&invalid_pdf);
assert!(result.is_err());
match result.unwrap_err() {
LupinError::PdfNoEofMarker => (), other => panic!("Expected PdfNoEofMarker, got {:?}", other),
}
}
#[test]
fn test_extract_no_hidden_data() {
let engine = PdfEngine::new();
let clean_pdf = create_minimal_pdf();
let result = engine.extract(&clean_pdf);
assert!(result.is_err());
match result.unwrap_err() {
LupinError::PdfNoHiddenData => (), other => panic!("Expected PdfNoHiddenData, got {:?}", other),
}
}
#[test]
fn test_extract_corrupted_data() {
let engine = PdfEngine::new();
let mut pdf = create_minimal_pdf();
pdf.extend_from_slice(b"invalid@base64!");
let result = engine.extract(&pdf);
assert!(result.is_err());
match result.unwrap_err() {
LupinError::PdfCorruptedData => (), other => panic!("Expected PdfCorruptedData, got {:?}", other),
}
}
#[test]
fn test_extract_with_whitespace() {
let engine = PdfEngine::new();
let pdf = create_minimal_pdf();
let mut embedded = Vec::new();
embedded.extend_from_slice(&pdf);
embedded.extend_from_slice(b" \n\t"); embedded.extend_from_slice(b"dGVzdCB3aXRoIHNwYWNlcw==");
let result = engine.extract(&embedded);
assert!(result.is_ok());
let extracted_payload = result.unwrap();
assert_eq!(extracted_payload, b"test with spaces"); }
#[test]
fn test_round_trip_with_binary_data() {
let engine = PdfEngine::new();
let pdf = create_minimal_pdf();
let binary_payload = b"\x00\x01\x02\xff";
let embedded2 = engine.embed(&pdf, binary_payload).unwrap();
let extracted2 = engine.extract(&embedded2).unwrap();
assert_eq!(extracted2, b"\x00\x01\x02\xff"); }
#[test]
fn test_round_trip_with_unicode_data() {
let engine = PdfEngine::new();
let pdf = create_minimal_pdf();
let unicode_payload = "unicode: 🕵️ αβγ δεζ".as_bytes();
let embedded3 = engine.embed(&pdf, unicode_payload).unwrap();
let extracted3 = engine.extract(&embedded3).unwrap();
assert_eq!(extracted3, "unicode: 🕵️ αβγ δεζ".as_bytes()); }
}