spectre_parse 1.0.0

Lazy PDF parser — xref-only at open(), objects materialize on demand. Read-only. Powers the spectre_pdf extraction crate.
Documentation
//! Error types for the parser.

use std::fmt;

pub type Result<T> = std::result::Result<T, Error>;

/// Variants are fine-grained so callers can pattern-match — e.g.
/// `NoOutline` downgrades to an empty TOC rather than surfacing.
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum Error {
    Parse(ParseError),
    Xref(XrefError),
    /// Dictionary lookup miss; carries the key name.
    DictKey(String),
    /// Object reference points at an id with no xref entry.
    ObjectNotFound(u32, u16),
    /// A typed accessor was called on the wrong variant.
    Type {
        expected: &'static str,
        found: &'static str,
    },
    /// A stream's `/Filter` chain mentions an unimplemented codec.
    UnsupportedFilter(String),
    /// Stream decompression failed (corrupt, truncated, …).
    Decompression(String),
    /// `/Outlines` not present in the catalog.
    NoOutline,
    Io(String),
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseError {
    InvalidFileHeader,
    MissingStartXref,
    InvalidXrefOffset(usize),
    /// Bytes at the offset didn't parse as the expected production.
    Unexpected {
        offset: usize,
        expected: &'static str,
    },
    InvalidObjectStream,
    Other(String),
}

/// Xref-specific failures are separated so callers can treat them as
/// recoverable (linearized PDFs commonly have a transient xref fault).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum XrefError {
    /// `startxref` value is past the end of the buffer.
    Start,
    /// `/Prev` xref offset is past the end of the buffer or negative.
    PrevStart,
    /// xref-stream stored at an unreachable offset.
    StreamStart,
    /// xref entry generation number doesn't match a reference.
    Generation,
}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Parse(p) => write!(f, "parse error: {p:?}"),
            Self::Xref(x) => write!(f, "xref error: {x:?}"),
            Self::DictKey(k) => write!(f, "missing required dictionary key \"{k}\""),
            Self::ObjectNotFound(n, g) => write!(f, "object {n} {g} R not found in xref"),
            Self::Type { expected, found } => {
                write!(f, "type error: expected {expected}, got {found}")
            }
            Self::UnsupportedFilter(name) => write!(f, "unsupported PDF filter: {name}"),
            Self::Decompression(msg) => write!(f, "decompression failed: {msg}"),
            Self::NoOutline => write!(f, "no document outline"),
            Self::Io(msg) => write!(f, "i/o error: {msg}"),
        }
    }
}

impl std::error::Error for Error {}

impl From<ParseError> for Error {
    fn from(p: ParseError) -> Self {
        Self::Parse(p)
    }
}

impl From<XrefError> for Error {
    fn from(x: XrefError) -> Self {
        Self::Xref(x)
    }
}