spectre_parse 1.0.0

Lazy PDF parser — xref-only at open(), objects materialize on demand. Read-only. Powers the spectre_pdf extraction crate.
Documentation
//! Diagnose named-destination resolution.
use spectre_parse::Document;

fn main() {
    let path = std::env::args().nth(1).expect("usage: trace_names_tree <pdf>");
    let bytes = std::fs::read(&path).expect("read");
    let doc = Document::open(&bytes).expect("open");
    println!("opened {}", doc.version);
    let cat = doc.catalog().expect("catalog");
    println!(
        "catalog keys: {:?}",
        cat.keys()
            .map(|k| String::from_utf8_lossy(k).into_owned())
            .collect::<Vec<_>>()
    );
    if let Some(dests) = cat.get_optional(b"Dests") {
        println!("/Dests present: {dests:?}");
    } else {
        println!("/Dests absent");
    }
    if let Some(names_obj) = cat.get_optional(b"Names") {
        println!("/Names: {names_obj:?}");
        let names = match names_obj {
            spectre_parse::Object::Reference(id) => doc.get_dictionary(*id).expect("Names dict"),
            spectre_parse::Object::Dictionary(d) => d.clone(),
            _ => panic!("unexpected /Names shape"),
        };
        println!(
            "Names keys: {:?}",
            names
                .keys()
                .map(|k| String::from_utf8_lossy(k).into_owned())
                .collect::<Vec<_>>()
        );
        if let Some(dests_obj) = names.get_optional(b"Dests") {
            println!("/Names/Dests: {dests_obj:?}");
            let dests = match dests_obj {
                spectre_parse::Object::Reference(id) => doc.get_dictionary(*id).expect("Dests dict"),
                spectre_parse::Object::Dictionary(d) => d.clone(),
                _ => panic!("unexpected /Names/Dests shape"),
            };
            println!(
                "Dests-tree keys: {:?}",
                dests
                    .keys()
                    .map(|k| String::from_utf8_lossy(k).into_owned())
                    .collect::<Vec<_>>()
            );
            // Try a couple of expected names.
            for name in ["contents", "what_s_new", "filing_requirements"] {
                let v = lookup(&doc, &dests, name.as_bytes());
                println!("lookup {:?}: {:?}", name, v);
            }
        }
    } else {
        println!("/Names absent");
    }
}

fn lookup(
    doc: &Document,
    node: &spectre_parse::Dictionary,
    key: &[u8],
) -> Option<spectre_parse::Object> {
    if let Some(names_obj) = node.get_optional(b"Names") {
        if let Ok(arr) = names_obj.as_array() {
            let mut i = 0;
            while i + 1 < arr.len() {
                if let spectre_parse::Object::String(k_bytes, _) = &arr[i] {
                    if k_bytes.as_slice() == key {
                        return Some(arr[i + 1].clone());
                    }
                }
                i += 2;
            }
        }
    }
    if let Some(kids_obj) = node.get_optional(b"Kids") {
        if let Ok(kids) = kids_obj.as_array() {
            for kid in kids {
                let Ok(kid_id) = kid.as_reference() else { continue };
                let Ok(kid_dict) = doc.get_dictionary(kid_id) else { continue };
                if let Some(v) = lookup(doc, &kid_dict, key) {
                    return Some(v);
                }
            }
        }
    }
    None
}