spectre_parse 1.0.0

Lazy PDF parser — xref-only at open(), objects materialize on demand. Read-only. Powers the spectre_pdf extraction crate.
Documentation
//! Diagnose why a particular PDF's page tree walk returns nothing.

use spectre_parse::{Document, Object};

fn main() {
    let path = std::env::args().nth(1).expect("usage: trace_pages <pdf>");
    let bytes = std::fs::read(&path).expect("read");
    let doc = Document::open(&bytes).expect("open");
    println!("version: {}", doc.version);
    println!("xref entries: {}", doc.xref_size());
    let trailer_root = doc.trailer.get(b"Root").expect("Root").as_reference().expect("Root ref");
    println!("Root: {trailer_root:?}");
    let catalog = doc.get_dictionary(trailer_root).expect("catalog dict");
    println!("Catalog keys: {:?}", catalog.iter().map(|(k, _)| String::from_utf8_lossy(k).into_owned()).collect::<Vec<_>>());
    let pages_ref = catalog.get(b"Pages").expect("Pages").as_reference().expect("Pages ref");
    println!("Pages ref: {pages_ref:?}");
    let pages_root = doc.get_dictionary(pages_ref).expect("pages dict");
    println!("Pages keys: {:?}", pages_root.iter().map(|(k, _)| String::from_utf8_lossy(k).into_owned()).collect::<Vec<_>>());
    if let Some(count) = pages_root.get_optional(b"Count") {
        println!("/Count: {count:?}");
    }
    if let Some(kids) = pages_root.get_optional(b"Kids") {
        if let Ok(arr) = kids.as_array() {
            println!("/Kids count: {}", arr.len());
            if let Some(first) = arr.first() {
                println!("first kid: {first:?}");
                if let Ok(id) = first.as_reference() {
                    let d = doc.get_dictionary(id).expect("first kid dict");
                    println!("first kid keys: {:?}", d.iter().map(|(k, _)| String::from_utf8_lossy(k).into_owned()).collect::<Vec<_>>());
                    println!("first kid /Type: {:?}", d.get_optional(b"Type"));
                }
            }
        }
    }
    println!("get_pages() len: {}", doc.get_pages().len());
}