rpdfium 7676.6.4

A faithful Rust port of Google's PDFium PDF rendering engine
Documentation
//! Workspace-level integration tests for rpdfium.
//!
//! These tests verify that types from sub-crates are accessible through
//! the facade and behave correctly.

use rpdfium::arc::ArcLibrary;
use rpdfium::{PdfString, PdfStringEncoding};

#[test]
fn test_pdf_string_encoding_detection_pdfdoc() {
    let s = PdfString::from_bytes(b"Hello, PDF!".to_vec());
    assert_eq!(s.encoding(), PdfStringEncoding::PdfDocEncoding);
    assert_eq!(s.to_string_lossy(), "Hello, PDF!");
}

#[test]
fn test_pdf_string_encoding_detection_utf16be() {
    // UTF-16BE BOM followed by "AB"
    let bytes = vec![0xFE, 0xFF, 0x00, 0x41, 0x00, 0x42];
    let s = PdfString::from_bytes(bytes);
    assert_eq!(s.encoding(), PdfStringEncoding::Utf16Be);
    assert_eq!(s.to_string_lossy(), "AB");
}

#[test]
fn test_pdf_string_empty() {
    let s = PdfString::from_bytes(Vec::new());
    assert!(s.is_empty());
    assert_eq!(s.len(), 0);
}

#[test]
fn test_library_creation() {
    let _lib = rpdfium::Library::new();
}

#[test]
fn test_arc_library_is_send_sync() {
    fn assert_send_sync<T: Send + Sync + 'static>() {}
    assert_send_sync::<ArcLibrary>();
}

#[test]
fn test_arc_library_creation() {
    let lib = ArcLibrary::new();
    // Arc types should be cloneable
    let _lib2 = lib.clone();
}

// ---------------------------------------------------------------------------
// End-to-end PDF parsing integration tests
// ---------------------------------------------------------------------------

use rpdfium::ObjectId;
use rpdfium_core::{Name, ParsingMode};
use rpdfium_parser::ObjectStore;

/// Build a minimal valid PDF for integration testing.
fn build_minimal_pdf() -> Vec<u8> {
    let mut pdf = Vec::new();
    pdf.extend_from_slice(b"%PDF-1.4\n");

    let obj1_offset = pdf.len();
    pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");

    let obj2_offset = pdf.len();
    pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");

    let xref_offset = pdf.len();
    pdf.extend_from_slice(b"xref\n");
    pdf.extend_from_slice(b"0 3\n");
    pdf.extend_from_slice(b"0000000000 65535 f \r\n");
    pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
    pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
    pdf.extend_from_slice(b"trailer\n");
    pdf.extend_from_slice(b"<< /Size 3 /Root 1 0 R >>\n");
    pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());

    pdf
}

/// Build a PDF with a stream object for testing.
fn build_pdf_with_stream() -> Vec<u8> {
    let mut pdf = Vec::new();
    pdf.extend_from_slice(b"%PDF-1.4\n");

    let obj1_offset = pdf.len();
    pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");

    let obj2_offset = pdf.len();
    pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n");

    let obj3_offset = pdf.len();
    pdf.extend_from_slice(
        b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>\nendobj\n",
    );

    let stream_content = b"BT /F1 12 Tf 100 700 Td (Hello) Tj ET";
    let obj4_offset = pdf.len();
    pdf.extend_from_slice(
        format!("4 0 obj\n<< /Length {} >>\nstream\n", stream_content.len()).as_bytes(),
    );
    pdf.extend_from_slice(stream_content);
    pdf.extend_from_slice(b"\nendstream\nendobj\n");

    let xref_offset = pdf.len();
    pdf.extend_from_slice(b"xref\n");
    pdf.extend_from_slice(b"0 5\n");
    pdf.extend_from_slice(b"0000000000 65535 f \r\n");
    pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
    pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
    pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj3_offset).as_bytes());
    pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj4_offset).as_bytes());
    pdf.extend_from_slice(b"trailer\n");
    pdf.extend_from_slice(b"<< /Size 5 /Root 1 0 R >>\n");
    pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());

    pdf
}

/// End-to-end: open minimal PDF and verify catalog structure.
#[test]
fn test_e2e_open_minimal_pdf() {
    let pdf = build_minimal_pdf();
    let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();

    assert_eq!(store.file_version(), rpdfium_parser::PdfVersion::new(1, 4));
    assert_eq!(store.trailer().root, ObjectId::new(1, 0));
    assert_eq!(store.trailer().size, 3);
    assert_eq!(store.object_count(), 2);
}

/// End-to-end: resolve objects and traverse catalog → pages.
#[test]
fn test_e2e_resolve_catalog_to_pages() {
    let pdf = build_minimal_pdf();
    let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();

    // Resolve catalog
    let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
    let catalog_dict = catalog.as_dict().unwrap();

    // Verify /Type is /Catalog
    let type_obj = catalog_dict.get(&Name::r#type()).unwrap();
    assert_eq!(type_obj.as_name().unwrap().as_bytes(), b"Catalog");

    // Follow /Pages reference
    let pages_ref = catalog_dict.get(&Name::pages()).unwrap();
    let pages_id = pages_ref.as_reference().unwrap();
    assert_eq!(pages_id, ObjectId::new(2, 0));

    let pages = store.resolve(pages_id).unwrap();
    let pages_dict = pages.as_dict().unwrap();

    // Verify /Type is /Pages
    let type_obj = pages_dict.get(&Name::r#type()).unwrap();
    assert_eq!(type_obj.as_name().unwrap().as_bytes(), b"Pages");

    // Verify /Count is 0
    let count = pages_dict.get(&Name::count()).unwrap();
    assert_eq!(count.as_i64(), Some(0));
}

/// End-to-end: deep_resolve follows references correctly.
#[test]
fn test_e2e_deep_resolve_through_reference() {
    let pdf = build_minimal_pdf();
    let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();

    let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
    let catalog_dict = catalog.as_dict().unwrap();

    // /Pages is a reference — deep_resolve should follow it
    let pages_ref = catalog_dict.get(&Name::pages()).unwrap();
    assert!(pages_ref.is_reference());

    let resolved = store.deep_resolve(pages_ref).unwrap();
    assert!(resolved.as_dict().is_some());
}

/// End-to-end: PDF with a content stream — verify stream decode.
#[test]
fn test_e2e_decode_content_stream() {
    let pdf = build_pdf_with_stream();
    let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();

    // Resolve the stream object
    let stream_obj = store.resolve(ObjectId::new(4, 0)).unwrap();
    let raw = store.decode_stream(stream_obj).unwrap();

    let content = String::from_utf8_lossy(&raw);
    assert!(content.contains("BT"));
    assert!(content.contains("Hello"));
    assert!(content.contains("ET"));
}

/// End-to-end: verify page tree structure for PDF with a page.
#[test]
fn test_e2e_page_tree_structure() {
    let pdf = build_pdf_with_stream();
    let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();

    // Navigate catalog → pages → kids → page
    let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
    let catalog_dict = catalog.as_dict().unwrap();

    let pages = store
        .dict_resolve(catalog_dict, &Name::pages())
        .unwrap()
        .unwrap();
    let pages_dict = pages.as_dict().unwrap();

    // /Kids should be an array with one page reference
    let kids = pages_dict.get(&Name::kids()).unwrap();
    let kids_arr = kids.as_array().unwrap();
    assert_eq!(kids_arr.len(), 1);

    let page_id = kids_arr[0].as_reference().unwrap();
    let page = store.resolve(page_id).unwrap();
    let page_dict = page.as_dict().unwrap();

    // Verify /MediaBox
    let media_box = page_dict.get(&Name::media_box()).unwrap();
    let mb = media_box.as_array().unwrap();
    assert_eq!(mb.len(), 4);
    assert_eq!(mb[2].as_i64(), Some(612));
    assert_eq!(mb[3].as_i64(), Some(792));
}

/// End-to-end: opening garbage data fails gracefully.
#[test]
fn test_e2e_open_garbage_fails() {
    let result = ObjectStore::open(b"not a pdf file".to_vec(), ParsingMode::Strict);
    assert!(result.is_err());
}

/// End-to-end: opening empty data fails gracefully.
#[test]
fn test_e2e_open_empty_fails() {
    let result = ObjectStore::open(Vec::new(), ParsingMode::Strict);
    assert!(result.is_err());
}

/// dict_resolve with a missing key returns None.
#[test]
fn test_e2e_dict_resolve_missing_key() {
    let pdf = build_minimal_pdf();
    let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();

    let catalog = store.resolve(ObjectId::new(1, 0)).unwrap();
    let dict = catalog.as_dict().unwrap();

    let result = store.dict_resolve(dict, &Name::encrypt()).unwrap();
    assert!(result.is_none());
}

/// Resolving an object that doesn't exist returns error.
#[test]
fn test_e2e_resolve_nonexistent_object() {
    let pdf = build_minimal_pdf();
    let store = ObjectStore::open(pdf, ParsingMode::Strict).unwrap();

    let result = store.resolve(ObjectId::new(999, 0));
    assert!(result.is_err());
}