pdfox 0.1.0

A pure-Rust PDF library — create, parse, and render PDF documents with zero C dependencies
Documentation
/// Low-level PDF byte writer.
/// Manages object allocation, cross-reference table, and final serialization.

use crate::object::{ObjRef, PdfDict, PdfObject, PdfStream};

/// Tracks byte offset and generation for each object
struct XRefEntry {
    offset: u64,
    gen: u16,
    in_use: bool,
}

pub struct PdfWriter {
    /// Raw output buffer
    buf: Vec<u8>,
    /// xref table: index = object id
    xref: Vec<XRefEntry>,
    /// Next available object id (0 is reserved)
    next_id: u32,
}

impl PdfWriter {
    pub fn new() -> Self {
        let mut w = Self {
            buf: Vec::with_capacity(64 * 1024),
            xref: Vec::new(),
            next_id: 1,
        };
        // Object 0 is always free
        w.xref.push(XRefEntry { offset: 0, gen: 65535, in_use: false });
        w
    }

    /// Reserve a new object ID without writing yet
    pub fn reserve(&mut self) -> ObjRef {
        let id = self.next_id;
        self.next_id += 1;
        self.xref.push(XRefEntry { offset: 0, gen: 0, in_use: false });
        ObjRef::new(id)
    }

    /// Write an indirect object at its current position
    pub fn write_object(&mut self, r: ObjRef, obj: &PdfObject) {
        let offset = self.buf.len() as u64;
        self.xref[r.id as usize] = XRefEntry { offset, gen: r.gen, in_use: true };

        self.emit(format!("{} {} obj\n", r.id, r.gen).as_bytes());
        self.emit(obj.serialize().as_bytes());
        self.emit(b"\nendobj\n\n");
    }

    /// Write a stream object
    pub fn write_stream(&mut self, r: ObjRef, stream: &PdfStream) {
        let offset = self.buf.len() as u64;
        self.xref[r.id as usize] = XRefEntry { offset, gen: r.gen, in_use: true };

        self.emit(format!("{} {} obj\n", r.id, r.gen).as_bytes());
        self.emit(&stream.serialize());
        self.emit(b"\nendobj\n\n");
    }

    /// Allocate and immediately write an object, returning its reference
    pub fn add_object(&mut self, obj: PdfObject) -> ObjRef {
        let r = self.reserve();
        self.write_object(r, &obj);
        r
    }

    /// Allocate and write a stream object
    pub fn add_stream(&mut self, stream: PdfStream) -> ObjRef {
        let r = self.reserve();
        self.write_stream(r, &stream);
        r
    }

    fn emit(&mut self, data: &[u8]) {
        self.buf.extend_from_slice(data);
    }

    /// Write PDF header
    pub fn write_header(&mut self) {
        // %PDF-1.7 + binary comment (4 bytes > 127 to signal binary file)
        self.emit(b"%PDF-1.7\n%\xe2\xe3\xcf\xd3\n\n");
    }

    /// Finalize: write xref table + trailer, return complete PDF bytes
    pub fn finalize(mut self, root: ObjRef, info: Option<ObjRef>) -> Vec<u8> {
        let xref_offset = self.buf.len() as u64;

        // Cross-reference table
        let count = self.xref.len();
        // Collect entries first to avoid borrow conflict with emit()
        let xref_lines: Vec<String> = self.xref.iter().map(|entry| {
            if entry.in_use {
                format!("{:010} {:05} n \n", entry.offset, entry.gen)
            } else {
                format!("{:010} {:05} f \n", entry.offset, entry.gen)
            }
        }).collect();

        self.emit(format!("xref\n0 {}\n", count).as_bytes());
        for line in &xref_lines {
            self.emit(line.as_bytes());
        }

        // Trailer dictionary
        let mut trailer = PdfDict::new();
        trailer.set("Size", PdfObject::Integer(count as i64));
        trailer.set("Root", PdfObject::Reference(root));
        if let Some(info_ref) = info {
            trailer.set("Info", PdfObject::Reference(info_ref));
        }

        self.emit(b"trailer\n");
        self.emit(trailer.serialize().as_bytes());

        self.emit(format!("\nstartxref\n{}\n%%EOF\n", xref_offset).as_bytes());
        self.buf
    }

    /// Like `finalize` but also writes an /Encrypt dictionary reference into the trailer.
    pub fn finalize_with_encrypt(
        mut self,
        root: ObjRef,
        info: Option<ObjRef>,
        encrypt: Option<ObjRef>,
    ) -> Vec<u8> {
        let xref_offset = self.buf.len() as u64;

        let count = self.xref.len();
        let xref_lines: Vec<String> = self.xref.iter().map(|entry| {
            if entry.in_use {
                format!("{:010} {:05} n \n", entry.offset, entry.gen)
            } else {
                format!("{:010} {:05} f \n", entry.offset, entry.gen)
            }
        }).collect();

        self.emit(format!("xref\n0 {}\n", count).as_bytes());
        for line in &xref_lines {
            self.emit(line.as_bytes());
        }

        let mut trailer = PdfDict::new();
        trailer.set("Size", PdfObject::Integer(count as i64));
        trailer.set("Root", PdfObject::Reference(root));
        if let Some(info_ref) = info {
            trailer.set("Info", PdfObject::Reference(info_ref));
        }
        if let Some(enc_ref) = encrypt {
            trailer.set("Encrypt", PdfObject::Reference(enc_ref));
            // Document ID: two identical 16-byte strings (simplified)
            let doc_id: Vec<u8> = (0u8..16).collect();
            let id_obj = PdfObject::HexString(doc_id.clone());
            trailer.set("ID", PdfObject::Array(vec![id_obj.clone(), id_obj]));
        }

        self.emit(b"trailer\n");
        self.emit(trailer.serialize().as_bytes());
        self.emit(format!("\nstartxref\n{}\n%%EOF\n", xref_offset).as_bytes());
        self.buf
    }
}