pdfox 0.1.0

A pure-Rust PDF library — create, parse, and render PDF documents with zero C dependencies
Documentation
/// Core PDF object types as defined in ISO 32000-1
/// Every value in a PDF document is one of these variants.

use std::collections::BTreeMap;
use std::fmt::Write;

/// A PDF object reference: `{id} {gen} R`
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ObjRef {
    pub id: u32,
    pub gen: u16,
}

impl ObjRef {
    pub fn new(id: u32) -> Self {
        Self { id, gen: 0 }
    }

    pub fn serialize(&self) -> String {
        format!("{} {} R", self.id, self.gen)
    }
}

/// All PDF object variants
#[derive(Debug, Clone)]
pub enum PdfObject {
    Null,
    Boolean(bool),
    Integer(i64),
    Real(f64),
    Name(String),           // /Name
    StringLiteral(Vec<u8>), // (string)
    HexString(Vec<u8>),     // <hex>
    Array(Vec<PdfObject>),
    Dictionary(PdfDict),
    Stream(PdfStream),
    Reference(ObjRef),
}

/// An ordered dictionary (BTreeMap for deterministic output)
#[derive(Debug, Clone, Default)]
pub struct PdfDict(pub BTreeMap<String, PdfObject>);

impl PdfDict {
    pub fn new() -> Self {
        Self(BTreeMap::new())
    }

    pub fn set(&mut self, key: impl Into<String>, value: PdfObject) -> &mut Self {
        self.0.insert(key.into(), value);
        self
    }

    pub fn get(&self, key: &str) -> Option<&PdfObject> {
        self.0.get(key)
    }

    pub fn serialize(&self) -> String {
        let mut out = String::from("<<\n");
        for (k, v) in &self.0 {
            write!(out, "/{} {}\n", k, v.serialize()).unwrap();
        }
        out.push_str(">>");
        out
    }
}

/// A PDF stream: dictionary + raw bytes
#[derive(Debug, Clone)]
pub struct PdfStream {
    pub dict: PdfDict,
    pub data: Vec<u8>,
}

impl PdfStream {
    pub fn new(data: Vec<u8>) -> Self {
        let mut dict = PdfDict::new();
        dict.set("Length", PdfObject::Integer(data.len() as i64));
        Self { dict, data }
    }

    pub fn new_compressed(data: Vec<u8>) -> Self {
        use flate2::write::ZlibEncoder;
        use flate2::Compression;
        use std::io::Write;

        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
        encoder.write_all(&data).unwrap();
        let compressed = encoder.finish().unwrap();

        let mut dict = PdfDict::new();
        dict.set("Length", PdfObject::Integer(compressed.len() as i64));
        dict.set("Filter", PdfObject::Name("FlateDecode".into()));
        Self { dict, data: compressed }
    }

    pub fn serialize(&self) -> Vec<u8> {
        let mut out = self.dict.serialize().into_bytes();
        out.extend_from_slice(b"\nstream\n");
        out.extend_from_slice(&self.data);
        out.extend_from_slice(b"\nendstream");
        out
    }
}

impl PdfObject {
    pub fn name(s: impl Into<String>) -> Self {
        Self::Name(s.into())
    }

    pub fn string(s: impl Into<String>) -> Self {
        Self::StringLiteral(s.into().into_bytes())
    }

    pub fn array(items: Vec<PdfObject>) -> Self {
        Self::Array(items)
    }

    pub fn serialize(&self) -> String {
        match self {
            PdfObject::Null => "null".into(),
            PdfObject::Boolean(b) => if *b { "true".into() } else { "false".into() },
            PdfObject::Integer(n) => n.to_string(),
            PdfObject::Real(f) => {
                // PDF spec: avoid scientific notation, limit precision
                format!("{:.4}", f).trim_end_matches('0').trim_end_matches('.').to_string()
            }
            PdfObject::Name(n) => format!("/{}", pdf_name_escape(n)),
            PdfObject::StringLiteral(bytes) => {
                let s = pdf_string_escape(bytes);
                format!("({})", s)
            }
            PdfObject::HexString(bytes) => {
                let hex: String = bytes.iter().map(|b| format!("{:02X}", b)).collect();
                format!("<{}>", hex)
            }
            PdfObject::Array(items) => {
                let inner: Vec<String> = items.iter().map(|o| o.serialize()).collect();
                format!("[{}]", inner.join(" "))
            }
            PdfObject::Dictionary(dict) => dict.serialize(),
            PdfObject::Stream(s) => String::from_utf8_lossy(&s.serialize()).into_owned(),
            PdfObject::Reference(r) => r.serialize(),
        }
    }
}

fn pdf_name_escape(name: &str) -> String {
    let mut out = String::new();
    for b in name.bytes() {
        if b == b'#' || b < 33 || b > 126 {
            write!(out, "#{:02X}", b).unwrap();
        } else {
            out.push(b as char);
        }
    }
    out
}

fn pdf_string_escape(bytes: &[u8]) -> String {
    let mut out = String::new();
    for &b in bytes {
        match b {
            b'(' => out.push_str("\\("),
            b')' => out.push_str("\\)"),
            b'\\' => out.push_str("\\\\"),
            b'\r' => out.push_str("\\r"),
            b'\n' => out.push_str("\\n"),
            _ => out.push(b as char),
        }
    }
    out
}