spectre_parse 1.0.0

Lazy PDF parser — xref-only at open(), objects materialize on demand. Read-only. Powers the spectre_pdf extraction crate.
Documentation
//! Cross-reference table. Built eagerly at `Document::open`; object
//! bodies parse on demand against entries here.

use std::collections::BTreeMap;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum XrefEntry {
    /// Live object: `offset` is the byte position of `n g obj` in the
    /// source buffer; `generation` is the entry's gen (matches `(n g R)`).
    Normal { offset: u32, generation: u16 },
    /// Object lives inside an object-stream `container` at `index`.
    Compressed { container: u32, index: u32 },
    Free,
}

/// Indexed xref table. Iteration is by object number ascending.
#[derive(Debug, Clone, Default)]
pub struct Xref {
    entries: BTreeMap<u32, XrefEntry>,
    /// `/Size` reported by the trailer.
    pub size: u32,
}

impl Xref {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn insert(&mut self, object_number: u32, entry: XrefEntry) {
        self.entries.insert(object_number, entry);
    }

    pub fn get(&self, object_number: u32) -> Option<&XrefEntry> {
        self.entries.get(&object_number)
    }

    pub fn iter(&self) -> std::collections::btree_map::Iter<'_, u32, XrefEntry> {
        self.entries.iter()
    }

    pub fn len(&self) -> usize {
        self.entries.len()
    }

    pub fn is_empty(&self) -> bool {
        self.entries.is_empty()
    }

    /// Merge an older xref chain entry; existing entries take precedence
    /// (PDF spec's "newest xref wins" when walking `Prev` chains).
    pub fn merge_older(&mut self, other: Xref) {
        for (k, v) in other.entries {
            self.entries.entry(k).or_insert(v);
        }
        if other.size > self.size {
            self.size = other.size;
        }
    }
}