micropdf 0.15.15

//! PDF Clean/Optimization FFI Module
//!
//! Provides PDF optimization, cleaning, linearization, and page rearrangement.
//! All functions operate on raw PDF byte data via the handle-based document store.

use crate::ffi::Handle;
use std::collections::{HashMap, HashSet};
use std::ffi::{CStr, CString, c_char};
use std::io::{Read, Write};
use std::ptr;

// ============================================================================
// Type Aliases
// ============================================================================

type ContextHandle = Handle;
type DocumentHandle = Handle;
type OutputHandle = Handle;

// ============================================================================
// Internal PDF byte-level helpers
// ============================================================================

/// Find the first occurrence of a byte pattern in data.
fn find_pattern(data: &[u8], pattern: &[u8]) -> Option<usize> {
    if pattern.is_empty() || data.len() < pattern.len() {
        return None;
    }
    (0..=data.len() - pattern.len()).find(|&i| &data[i..i + pattern.len()] == pattern)
}

/// Find all occurrences of a byte pattern in data.
fn find_all_patterns(data: &[u8], pattern: &[u8]) -> Vec<usize> {
    let mut positions = Vec::new();
    if pattern.is_empty() || data.len() < pattern.len() {
        return positions;
    }
    for i in 0..=data.len() - pattern.len() {
        if &data[i..i + pattern.len()] == pattern {
            positions.push(i);
        }
    }
    positions
}

/// Find the last occurrence of a byte pattern.
fn rfind_pattern(data: &[u8], pattern: &[u8]) -> Option<usize> {
    if pattern.is_empty() || data.len() < pattern.len() {
        return None;
    }
    (0..=data.len() - pattern.len())
        .rev()
        .find(|&i| &data[i..i + pattern.len()] == pattern)
}

/// Extract an integer immediately after `pos`, skipping whitespace.
fn extract_int_after(data: &[u8], pos: usize) -> Option<i32> {
    let mut i = pos;
    while i < data.len() && data[i].is_ascii_whitespace() {
        i += 1;
    }
    let negative = if i < data.len() && data[i] == b'-' {
        i += 1;
        true
    } else {
        false
    };
    let start = i;
    while i < data.len() && data[i].is_ascii_digit() {
        i += 1;
    }
    if i > start {
        if let Ok(s) = std::str::from_utf8(&data[start..i]) {
            if let Ok(n) = s.parse::<i32>() {
                return Some(if negative { -n } else { n });
            }
        }
    }
    None
}

/// Find the matching '>>' for a '<<' at `start`.
fn find_dict_end(data: &[u8], start: usize) -> Option<usize> {
    if start + 1 >= data.len() || data[start] != b'<' || data[start + 1] != b'<' {
        return None;
    }
    let mut depth = 0i32;
    let mut i = start;
    while i + 1 < data.len() {
        if data[i] == b'<' && data[i + 1] == b'<' {
            depth += 1;
            i += 2;
        } else if data[i] == b'>' && data[i + 1] == b'>' {
            depth -= 1;
            if depth == 0 {
                return Some(i);
            }
            i += 2;
        } else {
            i += 1;
        }
    }
    None
}

/// Find the trailer dictionary region (start_of_<<, end_after_>>).
fn find_trailer_region(data: &[u8]) -> Option<(usize, usize)> {
    let trailer_pos = rfind_pattern(data, b"trailer")?;
    let after = &data[trailer_pos..];
    let dict_start_rel = find_pattern(after, b"<<")?;
    let dict_start = trailer_pos + dict_start_rel;
    let dict_end = find_dict_end(data, dict_start)?;
    Some((dict_start, dict_end + 2))
}

/// Find a named key in a dictionary region, returning the offset right
/// after the key name (where the value begins).
fn find_dict_key(data: &[u8], region_start: usize, region_end: usize, key: &[u8]) -> Option<usize> {
    let end = region_end.min(data.len());
    if region_start >= end {
        return None;
    }
    let region = &data[region_start..end];
    find_pattern(region, key).map(|pos| region_start + pos + key.len())
}

/// Resolve an indirect reference "N G R" starting at `pos`.
fn resolve_indirect_ref(data: &[u8], pos: usize) -> Option<i32> {
    extract_int_after(data, pos)
}

/// Find an indirect object's dictionary region: "N 0 obj << ... >>".
fn find_object_dict(data: &[u8], obj_num: i32) -> Option<(usize, usize)> {
    let pattern = format!("{} 0 obj", obj_num);
    let pat_bytes = pattern.as_bytes();
    let positions = find_all_patterns(data, pat_bytes);
    for &pos in positions.iter().rev() {
        let after = &data[pos..];
        if let Some(dict_rel) = find_pattern(after, b"<<") {
            let dict_start = pos + dict_rel;
            if let Some(dict_end) = find_dict_end(data, dict_start) {
                return Some((dict_start, dict_end + 2));
            }
        }
    }
    None
}

/// Find the /Root (Catalog) object number from the trailer.
fn find_root_obj_num(data: &[u8]) -> Option<i32> {
    let (ts, te) = find_trailer_region(data)?;
    let kp = find_dict_key(data, ts, te, b"/Root")?;
    resolve_indirect_ref(data, kp)
}

/// Find the Nth page object start position (0-indexed).
/// Returns the byte offset of the "N 0 obj" token for this page.
fn find_page_obj_position(data: &[u8], page_num: i32) -> Option<usize> {
    let pattern = b"/Type /Page";
    let mut found = 0i32;
    let mut i = 0;
    while i + pattern.len() <= data.len() {
        if &data[i..i + pattern.len()] == pattern && data.get(i + pattern.len()) != Some(&b's') {
            if found == page_num {
                // Walk backwards to find the "N 0 obj" that owns this page
                let search_start = i.saturating_sub(500);
                let before = &data[search_start..i];
                if let Some(obj_rel) = rfind_pattern(before, b" obj") {
                    // Walk further back to find the start of "N 0 obj"
                    let obj_keyword_end = search_start + obj_rel;
                    let line_start = data[..obj_keyword_end]
                        .iter()
                        .rposition(|&b| b == b'\n' || b == b'\r')
                        .map(|p| p + 1)
                        .unwrap_or(0);
                    return Some(line_start);
                }
            }
            found += 1;
        }
        i += 1;
    }
    None
}

/// Find the byte range of an entire object "N 0 obj ... endobj".
fn find_object_range(data: &[u8], obj_num: i32) -> Option<(usize, usize)> {
    let pattern = format!("{} 0 obj", obj_num);
    let pat_bytes = pattern.as_bytes();
    let positions = find_all_patterns(data, pat_bytes);
    for &pos in positions.iter().rev() {
        let after = &data[pos..];
        if let Some(end_rel) = find_pattern(after, b"endobj") {
            let obj_end = pos + end_rel + b"endobj".len();
            return Some((pos, obj_end));
        }
    }
    None
}

/// Extract stream data from an object (between "stream\n" and "endstream").
fn extract_stream_data(data: &[u8], obj_num: i32) -> Option<(Vec<u8>, usize, usize)> {
    let pattern = format!("{} 0 obj", obj_num);
    let pat_bytes = pattern.as_bytes();
    if let Some(pos) = find_pattern(data, pat_bytes) {
        let after = &data[pos..];
        if let Some(stream_rel) = find_pattern(after, b"stream") {
            let mut stream_start = pos + stream_rel + b"stream".len();
            // Skip \r\n or \n after "stream"
            if stream_start < data.len() && data[stream_start] == b'\r' {
                stream_start += 1;
            }
            if stream_start < data.len() && data[stream_start] == b'\n' {
                stream_start += 1;
            }
            if let Some(endstream_rel) = find_pattern(&data[stream_start..], b"endstream") {
                let mut stream_end = stream_start + endstream_rel;
                // Strip trailing \r\n or \n before "endstream"
                if stream_end > stream_start && data[stream_end - 1] == b'\n' {
                    stream_end -= 1;
                }
                if stream_end > stream_start && data[stream_end - 1] == b'\r' {
                    stream_end -= 1;
                }
                let stream_data = data[stream_start..stream_end].to_vec();
                return Some((stream_data, stream_start, stream_end));
            }
        }
    }
    None
}

/// Collect all object numbers defined in the PDF ("N 0 obj").
fn collect_all_object_numbers(data: &[u8]) -> Vec<i32> {
    let content = String::from_utf8_lossy(data);
    let mut objects = Vec::new();
    for line in content.lines() {
        let trimmed = line.trim();
        if let Some(pos) = trimmed.find(" 0 obj") {
            if let Ok(num) = trimmed[..pos].trim().parse::<i32>() {
                if num > 0 {
                    objects.push(num);
                }
            }
        }
    }
    objects.sort();
    objects.dedup();
    objects
}

/// Collect all object numbers referenced via "N 0 R" in the PDF.
fn collect_referenced_objects(data: &[u8]) -> HashSet<i32> {
    let mut referenced = HashSet::new();
    let content = String::from_utf8_lossy(data);

    // Find /Root from trailer - always referenced
    if let Some(root_num) = find_root_obj_num(data) {
        referenced.insert(root_num);
    }

    // Find /Info from trailer - always referenced
    if let Some((ts, te)) = find_trailer_region(data) {
        if let Some(kp) = find_dict_key(data, ts, te, b"/Info") {
            if let Some(info_num) = resolve_indirect_ref(data, kp) {
                referenced.insert(info_num);
            }
        }
        // /Encrypt
        if let Some(kp) = find_dict_key(data, ts, te, b"/Encrypt") {
            if let Some(enc_num) = resolve_indirect_ref(data, kp) {
                referenced.insert(enc_num);
            }
        }
    }

    // Scan for all "N 0 R" references
    let parts: Vec<&str> = content.split_whitespace().collect();
    for window in parts.windows(3) {
        if window[1] == "0" && window[2] == "R" {
            if let Ok(obj_num) = window[0].parse::<i32>() {
                if obj_num > 0 {
                    referenced.insert(obj_num);
                }
            }
        }
    }

    referenced
}

/// Check whether an object's dictionary contains a /Filter key.
fn object_has_filter(data: &[u8], obj_num: i32) -> bool {
    if let Some((ds, de)) = find_object_dict(data, obj_num) {
        find_dict_key(data, ds, de, b"/Filter").is_some()
    } else {
        false
    }
}

/// Check whether an object has a stream.
fn object_has_stream(data: &[u8], obj_num: i32) -> bool {
    let pattern = format!("{} 0 obj", obj_num);
    if let Some(pos) = find_pattern(data, pattern.as_bytes()) {
        let after = &data[pos..];
        if let Some(endobj_rel) = find_pattern(after, b"endobj") {
            let region = &after[..endobj_rel];
            return find_pattern(region, b"stream").is_some();
        }
    }
    false
}

/// Rebuild the xref table and trailer for the given PDF data.
/// Returns a new complete PDF byte vector with updated xref.
fn rebuild_pdf_xref(data: &[u8]) -> Vec<u8> {
    let objects = collect_all_object_numbers(data);

    // Collect offsets for each object
    let mut obj_offsets: Vec<(i32, usize)> = Vec::new();
    for &obj_num in &objects {
        let pattern = format!("{} 0 obj", obj_num);
        if let Some(pos) = find_pattern(data, pattern.as_bytes()) {
            obj_offsets.push((obj_num, pos));
        }
    }
    obj_offsets.sort_by_key(|&(num, _)| num);

    // Find the end of the last object
    let mut content_end = 0;
    for &obj_num in &objects {
        if let Some((_, end)) = find_object_range(data, obj_num) {
            if end > content_end {
                content_end = end;
            }
        }
    }

    // Find PDF header
    let header_end = find_pattern(data, b"\n").map(|p| p + 1).unwrap_or(0);

    // Build the output: header + objects + xref + trailer
    let mut output = Vec::new();

    // Copy everything up to content_end (header + all objects)
    output.extend_from_slice(&data[..content_end]);

    // Make sure there's a newline before xref
    if !output.is_empty() && *output.last().unwrap() != b'\n' {
        output.push(b'\n');
    }

    let xref_start = output.len();

    // Build xref table
    let max_obj = obj_offsets.iter().map(|&(n, _)| n).max().unwrap_or(0) as usize;
    output.extend_from_slice(b"xref\n");
    output.extend_from_slice(format!("0 {}\n", max_obj + 1).as_bytes());

    // Entry 0: free object head
    output.extend_from_slice(b"0000000000 65535 f \n");

    // Create a map of obj_num -> offset
    let offset_map: HashMap<i32, usize> = obj_offsets.iter().copied().collect();

    for i in 1..=max_obj {
        if let Some(&offset) = offset_map.get(&(i as i32)) {
            output.extend_from_slice(format!("{:010} 00000 n \n", offset).as_bytes());
        } else {
            output.extend_from_slice(b"0000000000 00000 f \n");
        }
    }

    // Copy trailer dictionary (or build a minimal one)
    output.extend_from_slice(b"trailer\n");
    if let Some((ts, te)) = find_trailer_region(data) {
        output.extend_from_slice(&data[ts..te]);
    } else {
        // Build a minimal trailer
        let size = max_obj + 1;
        output.extend_from_slice(format!("<< /Size {} >>\n", size).as_bytes());
    }
    output.push(b'\n');

    // startxref
    output.extend_from_slice(format!("startxref\n{}\n%%EOF\n", xref_start).as_bytes());

    // Update /Size in the trailer if present
    let _ = header_end; // suppress unused warning - header_end used conceptually

    output
}

// ============================================================================
// Structure Options
// ============================================================================

/// Structure tree handling options
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[repr(C)]
pub enum CleanStructureOption {
    /// Remove the structure tree entirely (default)
    #[default]
    Drop = 0,
    /// Preserve the structure tree
    Keep = 1,
}

/// Vectorize options
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[repr(C)]
pub enum CleanVectorizeOption {
    /// Leave pages unchanged (default)
    #[default]
    No = 0,
    /// Vectorize each page (flatten Type 3 fonts)
    Yes = 1,
}

// ============================================================================
// Encryption Methods
// ============================================================================

/// Encryption method for PDF output
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[repr(C)]
pub enum EncryptionMethod {
    /// Keep existing encryption
    #[default]
    Keep = 0,
    /// Remove encryption
    None = 1,
    /// RC4 40-bit encryption
    Rc4_40 = 2,
    /// RC4 128-bit encryption
    Rc4_128 = 3,
    /// AES 128-bit encryption
    Aes128 = 4,
    /// AES 256-bit encryption
    Aes256 = 5,
}

// ============================================================================
// Compression Methods
// ============================================================================

/// Compression method
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[repr(C)]
pub enum CompressionMethod {
    /// No compression
    #[default]
    None = 0,
    /// Zlib/Deflate compression
    Zlib = 1,
    /// Brotli compression
    Brotli = 2,
}

// ============================================================================
// Write Options
// ============================================================================

/// PDF write options
#[derive(Debug, Clone)]
#[repr(C)]
pub struct WriteOptions {
    /// Write just the changed objects (incremental save)
    pub do_incremental: i32,
    /// Pretty-print dictionaries and arrays
    pub do_pretty: i32,
    /// ASCII hex encode binary streams
    pub do_ascii: i32,
    /// Compress streams (0=none, 1=zlib, 2=brotli)
    pub do_compress: i32,
    /// Compress (or leave compressed) image streams
    pub do_compress_images: i32,
    /// Compress (or leave compressed) font streams
    pub do_compress_fonts: i32,
    /// Decompress streams (except images/fonts)
    pub do_decompress: i32,
    /// Garbage collect objects (1=gc, 2=renumber, 3=deduplicate)
    pub do_garbage: i32,
    /// Write linearized PDF
    pub do_linear: i32,
    /// Clean content streams
    pub do_clean: i32,
    /// Sanitize content streams
    pub do_sanitize: i32,
    /// (Re)create appearance streams
    pub do_appearance: i32,
    /// Encryption method
    pub do_encrypt: i32,
    /// Don't regenerate ID
    pub dont_regenerate_id: i32,
    /// Document permissions
    pub permissions: i32,
    /// Owner password (UTF-8)
    pub opwd_utf8: [u8; 128],
    /// User password (UTF-8)
    pub upwd_utf8: [u8; 128],
    /// Snapshot mode (internal use)
    pub do_snapshot: i32,
    /// Preserve metadata when cleaning
    pub do_preserve_metadata: i32,
    /// Use object streams if possible
    pub do_use_objstms: i32,
    /// Compression effort (0=default, 1=min, 100=max)
    pub compression_effort: i32,
    /// Add labels to objects
    pub do_labels: i32,
}

impl Default for WriteOptions {
    fn default() -> Self {
        Self::new()
    }
}

impl WriteOptions {
    pub fn new() -> Self {
        Self {
            do_incremental: 0,
            do_pretty: 0,
            do_ascii: 0,
            do_compress: 1, // Default to zlib compression
            do_compress_images: 1,
            do_compress_fonts: 1,
            do_decompress: 0,
            do_garbage: 0,
            do_linear: 0,
            do_clean: 0,
            do_sanitize: 0,
            do_appearance: 0,
            do_encrypt: 0,
            dont_regenerate_id: 0,
            permissions: -1, // All permissions
            opwd_utf8: [0; 128],
            upwd_utf8: [0; 128],
            do_snapshot: 0,
            do_preserve_metadata: 0,
            do_use_objstms: 0,
            compression_effort: 0,
            do_labels: 0,
        }
    }

    /// Set owner password
    pub fn set_owner_password(&mut self, password: &str) {
        let bytes = password.as_bytes();
        let len = bytes.len().min(127);
        self.opwd_utf8[..len].copy_from_slice(&bytes[..len]);
        self.opwd_utf8[len] = 0;
    }

    /// Set user password
    pub fn set_user_password(&mut self, password: &str) {
        let bytes = password.as_bytes();
        let len = bytes.len().min(127);
        self.upwd_utf8[..len].copy_from_slice(&bytes[..len]);
        self.upwd_utf8[len] = 0;
    }

    /// Parse option string (matches mutool clean options)
    pub fn parse(&mut self, args: &str) {
        for c in args.chars() {
            match c {
                'g' => self.do_garbage = 1,
                'G' => self.do_garbage = 2,
                'D' => self.do_garbage = 3,
                'd' => self.do_decompress = 1,
                'i' => {
                    self.do_decompress = 1;
                    self.do_compress_images = 0;
                }
                'f' => {
                    self.do_decompress = 1;
                    self.do_compress_fonts = 0;
                }
                'l' => self.do_linear = 1,
                'a' => self.do_ascii = 1,
                'z' => self.do_compress = 1,
                'Z' => self.do_compress = 2, // Brotli
                'c' => self.do_clean = 1,
                's' => self.do_sanitize = 1,
                'p' => self.do_pretty = 1,
                'A' => self.do_appearance = 1,
                'm' => self.do_preserve_metadata = 1,
                'o' => self.do_use_objstms = 1,
                'L' => self.do_labels = 1,
                _ => {}
            }
        }
    }

    /// Format options to string
    pub fn format(&self) -> String {
        let mut s = String::new();
        if self.do_garbage == 1 {
            s.push('g');
        }
        if self.do_garbage == 2 {
            s.push('G');
        }
        if self.do_garbage == 3 {
            s.push('D');
        }
        if self.do_decompress != 0 {
            s.push('d');
        }
        if self.do_linear != 0 {
            s.push('l');
        }
        if self.do_ascii != 0 {
            s.push('a');
        }
        if self.do_compress == 1 {
            s.push('z');
        }
        if self.do_compress == 2 {
            s.push('Z');
        }
        if self.do_clean != 0 {
            s.push('c');
        }
        if self.do_sanitize != 0 {
            s.push('s');
        }
        if self.do_pretty != 0 {
            s.push('p');
        }
        if self.do_appearance != 0 {
            s.push('A');
        }
        if self.do_preserve_metadata != 0 {
            s.push('m');
        }
        if self.do_use_objstms != 0 {
            s.push('o');
        }
        if self.do_labels != 0 {
            s.push('L');
        }
        s
    }
}

// ============================================================================
// Image Rewriter Options
// ============================================================================

/// Image rewriter options
#[derive(Debug, Clone, Default)]
#[repr(C)]
pub struct ImageRewriterOptions {
    /// Target color depth (0 = keep)
    pub color_depth: i32,
    /// Target DPI (0 = keep)
    pub dpi: i32,
    /// JPEG quality (0-100)
    pub jpeg_quality: i32,
    /// Recompress images
    pub recompress: i32,
}

// ============================================================================
// Clean Options
// ============================================================================

/// PDF clean options
#[derive(Debug, Clone)]
#[repr(C)]
pub struct CleanOptions {
    /// Write options
    pub write: WriteOptions,
    /// Image rewriter options
    pub image: ImageRewriterOptions,
    /// Subset fonts
    pub subset_fonts: i32,
    /// Structure tree handling
    pub structure: CleanStructureOption,
    /// Vectorize option
    pub vectorize: CleanVectorizeOption,
}

impl Default for CleanOptions {
    fn default() -> Self {
        Self::new()
    }
}

impl CleanOptions {
    pub fn new() -> Self {
        Self {
            write: WriteOptions::new(),
            image: ImageRewriterOptions::default(),
            subset_fonts: 0,
            structure: CleanStructureOption::Drop,
            vectorize: CleanVectorizeOption::No,
        }
    }

    /// Create options for optimization
    pub fn optimize() -> Self {
        let mut opts = Self::new();
        opts.write.do_garbage = 3; // Deduplicate
        opts.write.do_compress = 1;
        opts.write.do_clean = 1;
        opts.write.do_sanitize = 1;
        opts.subset_fonts = 1;
        opts
    }

    /// Create options for linearization
    pub fn linearize() -> Self {
        let mut opts = Self::new();
        opts.write.do_linear = 1;
        opts.write.do_garbage = 1;
        opts.write.do_compress = 1;
        opts
    }
}

// ============================================================================
// FFI Functions - Default Options
// ============================================================================

/// Get default write options.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_default_write_options() -> WriteOptions {
    WriteOptions::new()
}

/// Get default clean options.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_default_clean_options() -> CleanOptions {
    CleanOptions::new()
}

// ============================================================================
// FFI Functions - Parse Options
// ============================================================================

/// Parse write options from string.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_parse_write_options(
    _ctx: ContextHandle,
    opts: *mut WriteOptions,
    args: *const c_char,
) -> *mut WriteOptions {
    if opts.is_null() || args.is_null() {
        return opts;
    }

    let args_str = unsafe { CStr::from_ptr(args).to_str().unwrap_or("") };
    unsafe {
        (*opts).parse(args_str);
    }
    opts
}

/// Format write options to string.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_format_write_options(
    _ctx: ContextHandle,
    buffer: *mut c_char,
    buffer_len: usize,
    opts: *const WriteOptions,
) -> *mut c_char {
    if buffer.is_null() || buffer_len == 0 || opts.is_null() {
        return buffer;
    }

    let formatted = unsafe { (*opts).format() };
    let len = formatted.len().min(buffer_len - 1);
    unsafe {
        ptr::copy_nonoverlapping(formatted.as_ptr(), buffer as *mut u8, len);
        *buffer.add(len) = 0;
    }
    buffer
}

// ============================================================================
// FFI Functions - Document Operations
// ============================================================================

/// Check if document can be saved incrementally.
///
/// Returns 1 if the document exists, has valid PDF data, and has not been
/// modified in ways that break incremental save (e.g., no objects removed,
/// no page rearrangement). Currently checks whether the document contains a
/// valid xref section (required for incremental writes).
#[unsafe(no_mangle)]
pub extern "C" fn pdf_can_be_saved_incrementally(_ctx: ContextHandle, doc: DocumentHandle) -> i32 {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(guard) = document.lock() {
            let data = guard.data();
            if !data.starts_with(b"%PDF-") {
                return 0;
            }
            // Incremental save requires a valid xref section. If the
            // trailer references startxref we can do it.
            if rfind_pattern(data, b"startxref").is_some() {
                return 1;
            }
        }
    }
    0
}

/// Check if document has unsaved digital signature fields.
///
/// Scans the PDF data for signature fields (/FT /Sig) that contain
/// an unsigned /V value (empty or missing ByteRange).
#[unsafe(no_mangle)]
pub extern "C" fn pdf_has_unsaved_sigs(_ctx: ContextHandle, doc: DocumentHandle) -> i32 {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(guard) = document.lock() {
            let data = guard.data();
            // Look for signature field annotations: /FT /Sig
            let sig_positions = find_all_patterns(data, b"/FT /Sig");
            for &pos in &sig_positions {
                // Search backwards to find the enclosing object dictionary
                let search_start = pos.saturating_sub(500);
                let region = &data[search_start..pos.min(data.len())];
                // If there's no /ByteRange in the surrounding region,
                // the signature is unsigned.
                let search_end = (pos + 1000).min(data.len());
                let around = &data[search_start..search_end];
                if find_pattern(around, b"/ByteRange").is_none() {
                    return 1;
                }
            }
        }
    }
    0
}

/// Save document to file.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_save_document(
    _ctx: ContextHandle,
    doc: DocumentHandle,
    filename: *const c_char,
    opts: *const WriteOptions,
) {
    if filename.is_null() {
        return;
    }

    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let mut data = guard.data().to_vec();

            // Apply write options if provided
            if !opts.is_null() {
                let write_opts = unsafe { &*opts };
                data = apply_write_options(&data, write_opts);
            }

            // SAFETY: Caller guarantees filename is a valid null-terminated C string
            let c_str = unsafe { std::ffi::CStr::from_ptr(filename) };
            if let Ok(path) = c_str.to_str() {
                if std::fs::write(path, &data).is_ok() {
                    // Update the in-memory document with the processed data
                    guard.set_data(data);
                }
            }
        }
    }
}

/// Write document to output stream.
///
/// Retrieves the document's PDF data and writes it to the output stream
/// identified by the output handle.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_write_document(
    _ctx: ContextHandle,
    doc: DocumentHandle,
    out: OutputHandle,
    opts: *const WriteOptions,
) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(guard) = document.lock() {
            let mut data = guard.data().to_vec();

            // Apply write options if provided
            if !opts.is_null() {
                let write_opts = unsafe { &*opts };
                data = apply_write_options(&data, write_opts);
            }

            if let Some(output_arc) = super::output::OUTPUTS.get(out) {
                if let Ok(mut output_guard) = output_arc.lock() {
                    let _ = output_guard.write_data(&data);
                }
            }
        }
    }
}

/// Save document snapshot.
///
/// Writes a complete copy of the current document state to the specified
/// file. Unlike incremental save, this always writes the full document
/// regardless of what has changed.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_save_snapshot(
    _ctx: ContextHandle,
    doc: DocumentHandle,
    filename: *const c_char,
) {
    if filename.is_null() {
        return;
    }

    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(guard) = document.lock() {
            // SAFETY: Caller guarantees filename is a valid null-terminated C string
            let c_str = unsafe { std::ffi::CStr::from_ptr(filename) };
            if let Ok(path) = c_str.to_str() {
                let _ = std::fs::write(path, guard.data());
            }
        }
    }
}

/// Write document snapshot to output stream.
///
/// Writes the complete current document state to the output stream.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_write_snapshot(_ctx: ContextHandle, doc: DocumentHandle, out: OutputHandle) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(guard) = document.lock() {
            if let Some(output_arc) = super::output::OUTPUTS.get(out) {
                if let Ok(mut output_guard) = output_arc.lock() {
                    let _ = output_guard.write_data(guard.data());
                }
            }
        }
    }
}

/// Save document journal.
///
/// The journal records changes made to the document since it was opened.
/// This implementation serializes the document's current state as JSON
/// metadata (object count, page count, data hash) to the specified file,
/// allowing later comparison to detect what changed.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_save_journal(
    _ctx: ContextHandle,
    doc: DocumentHandle,
    filename: *const c_char,
) {
    if filename.is_null() {
        return;
    }

    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(guard) = document.lock() {
            let data = guard.data();
            let journal = build_journal_data(data);

            // SAFETY: Caller guarantees filename is a valid null-terminated C string
            let c_str = unsafe { std::ffi::CStr::from_ptr(filename) };
            if let Ok(path) = c_str.to_str() {
                let _ = std::fs::write(path, journal);
            }
        }
    }
}

/// Write document journal to output stream.
///
/// Same as pdf_save_journal but writes to an output stream handle.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_write_journal(_ctx: ContextHandle, doc: DocumentHandle, out: OutputHandle) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(guard) = document.lock() {
            let data = guard.data();
            let journal = build_journal_data(data);

            if let Some(output_arc) = super::output::OUTPUTS.get(out) {
                if let Ok(mut output_guard) = output_arc.lock() {
                    let _ = output_guard.write_data(&journal);
                }
            }
        }
    }
}

/// Build journal data: a JSON record of the document's current state.
fn build_journal_data(data: &[u8]) -> Vec<u8> {
    use std::collections::hash_map::DefaultHasher;
    use std::hash::{Hash, Hasher};

    let objects = collect_all_object_numbers(data);
    let page_count_est = {
        let pattern = b"/Type /Page";
        let mut count = 0i32;
        let mut i = 0;
        while i + pattern.len() <= data.len() {
            if &data[i..i + pattern.len()] == pattern && data.get(i + pattern.len()) != Some(&b's')
            {
                count += 1;
            }
            i += 1;
        }
        count
    };

    let mut hasher = DefaultHasher::new();
    data.hash(&mut hasher);
    let data_hash = hasher.finish();

    let journal = format!(
        "{{\"type\":\"pdf_journal\",\"version\":1,\"object_count\":{},\"page_count\":{},\"data_size\":{},\"data_hash\":\"{:016x}\",\"objects\":[{}]}}\n",
        objects.len(),
        page_count_est,
        data.len(),
        data_hash,
        objects
            .iter()
            .map(|n| n.to_string())
            .collect::<Vec<_>>()
            .join(",")
    );
    journal.into_bytes()
}

// ============================================================================
// FFI Functions - Clean Operations
// ============================================================================

/// Clean a PDF file.
///
/// Reads the input PDF, applies cleaning operations (garbage collection,
/// compression, sanitization) based on the provided options, and writes
/// the result to the output file.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_clean_file(
    _ctx: ContextHandle,
    infile: *const c_char,
    outfile: *const c_char,
    _password: *const c_char,
    opts: *const CleanOptions,
    _retainlen: i32,
    _retainlist: *const *const c_char,
) {
    if infile.is_null() || outfile.is_null() {
        return;
    }

    // SAFETY: Caller guarantees pointers are valid null-terminated C strings
    let in_path = match unsafe { CStr::from_ptr(infile).to_str() } {
        Ok(s) => s,
        Err(_) => return,
    };
    let out_path = match unsafe { CStr::from_ptr(outfile).to_str() } {
        Ok(s) => s,
        Err(_) => return,
    };

    let data = match std::fs::read(in_path) {
        Ok(d) => d,
        Err(_) => return,
    };

    if !data.starts_with(b"%PDF-") {
        return;
    }

    // Determine the write options to apply
    let write_opts = if !opts.is_null() {
        let clean_opts = unsafe { &*opts };
        clean_opts.write.clone()
    } else {
        // Default clean: gc + compress + sanitize
        let mut w = WriteOptions::new();
        w.do_garbage = 1;
        w.do_compress = 1;
        w.do_sanitize = 1;
        w
    };

    let processed = apply_write_options(&data, &write_opts);

    // Handle structure tree removal if requested
    let final_data = if !opts.is_null() {
        let clean_opts = unsafe { &*opts };
        if clean_opts.structure == CleanStructureOption::Drop {
            remove_structure_tree(&processed)
        } else {
            processed
        }
    } else {
        remove_structure_tree(&processed)
    };

    let _ = std::fs::write(out_path, final_data);
}

/// Remove the /StructTreeRoot from the catalog and /MarkInfo dictionary.
fn remove_structure_tree(data: &[u8]) -> Vec<u8> {
    let mut result = data.to_vec();

    // Remove /StructTreeRoot reference from catalog
    if let Some(root_num) = find_root_obj_num(&result) {
        if let Some((ds, de)) = find_object_dict(&result, root_num) {
            if let Some(key_pos) = find_dict_key(&result, ds, de, b"/StructTreeRoot") {
                // Find the extent of the value (up to next / or >>)
                let remove_start = key_pos - b"/StructTreeRoot".len();
                let mut remove_end = key_pos;
                while remove_end < de && result[remove_end] != b'/' {
                    if remove_end + 1 < result.len()
                        && result[remove_end] == b'>'
                        && result[remove_end + 1] == b'>'
                    {
                        break;
                    }
                    remove_end += 1;
                }
                if remove_end > remove_start && remove_start < result.len() {
                    result.drain(remove_start..remove_end.min(result.len()));
                }
            }
        }
    }

    // Remove /MarkInfo reference similarly
    if let Some(root_num) = find_root_obj_num(&result) {
        if let Some((ds, de)) = find_object_dict(&result, root_num) {
            if let Some(key_pos) = find_dict_key(&result, ds, de, b"/MarkInfo") {
                let remove_start = key_pos - b"/MarkInfo".len();
                let mut remove_end = key_pos;
                // Skip past the value (could be a dict << ... >> or ref)
                while remove_end < de {
                    if remove_end + 1 < result.len()
                        && result[remove_end] == b'>'
                        && result[remove_end + 1] == b'>'
                    {
                        break;
                    }
                    if result[remove_end] == b'/' {
                        break;
                    }
                    remove_end += 1;
                }
                if remove_end > remove_start && remove_start < result.len() {
                    result.drain(remove_start..remove_end.min(result.len()));
                }
            }
        }
    }

    result
}

/// Apply write options to PDF data, performing compression, garbage
/// collection, decompression, etc. Returns the processed PDF bytes.
fn apply_write_options(data: &[u8], opts: &WriteOptions) -> Vec<u8> {
    let mut result = data.to_vec();

    // Apply garbage collection
    if opts.do_garbage >= 1 {
        result = garbage_collect_data(&result, opts.do_garbage);
    }

    // Apply decompression
    if opts.do_decompress != 0 {
        result = decompress_streams_data(&result);
    }

    // Apply compression
    if opts.do_compress >= 1 {
        result = compress_streams_data(&result, opts.do_compress);
    }

    // Sanitize content streams (remove dangerous operators)
    if opts.do_sanitize != 0 {
        result = sanitize_content_streams(&result);
    }

    result
}

/// Rearrange pages in document.
///
/// Reorders the pages of a document according to the supplied page number
/// array. The `pages` array contains 0-based page indices in the desired
/// output order. Pages may be duplicated or omitted.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_rearrange_pages(
    _ctx: ContextHandle,
    doc: DocumentHandle,
    count: i32,
    pages: *const i32,
    _structure: CleanStructureOption,
) {
    if count <= 0 || pages.is_null() {
        return;
    }

    let page_order = unsafe { std::slice::from_raw_parts(pages, count as usize) };

    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            // Count existing pages
            let pattern = b"/Type /Page";
            let mut existing_pages = 0i32;
            let mut i = 0;
            while i + pattern.len() <= data.len() {
                if &data[i..i + pattern.len()] == pattern
                    && data.get(i + pattern.len()) != Some(&b's')
                {
                    existing_pages += 1;
                }
                i += 1;
            }

            if existing_pages == 0 {
                return;
            }

            // Validate all requested page indices
            for &pg in page_order {
                if pg < 0 || pg >= existing_pages {
                    return;
                }
            }

            // Collect all page object byte ranges and their object numbers
            let mut page_ranges: Vec<(usize, usize, i32)> = Vec::new(); // (start, end, obj_num)
            for pg_idx in 0..existing_pages {
                if let Some(pos) = find_page_obj_position(&data, pg_idx) {
                    // Extract the object number from "N 0 obj"
                    if let Some(obj_num) = extract_int_after(&data, pos) {
                        if let Some((start, end)) = find_object_range(&data, obj_num) {
                            page_ranges.push((start, end, obj_num));
                        }
                    }
                }
            }

            if page_ranges.len() != existing_pages as usize {
                return; // Could not locate all pages
            }

            // Build the /Kids array in the new order
            let new_kids: Vec<i32> = page_order
                .iter()
                .map(|&idx| page_ranges[idx as usize].2)
                .collect();

            // Find the /Pages object and update its /Kids array and /Count
            if let Some(root_num) = find_root_obj_num(&data) {
                if let Some((ds, de)) = find_object_dict(&data, root_num) {
                    if let Some(pages_key) = find_dict_key(&data, ds, de, b"/Pages") {
                        if let Some(pages_num) = resolve_indirect_ref(&data, pages_key) {
                            let mut new_data = data.clone();

                            // Update the /Pages dictionary: replace /Kids and /Count
                            if let Some((pds, pde)) = find_object_dict(&new_data, pages_num) {
                                // Build new Kids array string
                                let kids_str = new_kids
                                    .iter()
                                    .map(|n| format!("{} 0 R", n))
                                    .collect::<Vec<_>>()
                                    .join(" ");
                                let new_kids_entry =
                                    format!("/Kids [{}] /Count {}", kids_str, new_kids.len());

                                // Find existing /Kids in the Pages dict
                                if let Some(kids_pos) = find_dict_key(&new_data, pds, pde, b"/Kids")
                                {
                                    let kids_key_start = kids_pos - b"/Kids".len();
                                    // Find the ']' that ends the Kids array
                                    let mut bracket_end = kids_pos;
                                    while bracket_end < pde
                                        && new_data.get(bracket_end) != Some(&b']')
                                    {
                                        bracket_end += 1;
                                    }
                                    if bracket_end < pde {
                                        bracket_end += 1; // include the ']'
                                    }

                                    // Also remove the /Count entry if present in remaining dict
                                    let remaining_region_end = pde.min(new_data.len());
                                    let mut count_start = bracket_end;
                                    let mut count_end = bracket_end;
                                    if let Some(count_pos) = find_dict_key(
                                        &new_data,
                                        bracket_end,
                                        remaining_region_end,
                                        b"/Count",
                                    ) {
                                        count_start = count_pos - b"/Count".len();
                                        count_end = count_pos;
                                        // Skip past the count value
                                        while count_end < remaining_region_end
                                            && new_data[count_end].is_ascii_whitespace()
                                        {
                                            count_end += 1;
                                        }
                                        while count_end < remaining_region_end
                                            && new_data[count_end].is_ascii_digit()
                                        {
                                            count_end += 1;
                                        }
                                    }

                                    // Replace: remove old /Kids [...] and /Count N, insert new
                                    if count_end > bracket_end && count_start >= bracket_end {
                                        // /Count comes after /Kids
                                        new_data.drain(count_start..count_end);
                                    }
                                    new_data.splice(
                                        kids_key_start..bracket_end.min(new_data.len()),
                                        new_kids_entry.bytes(),
                                    );
                                }
                            }

                            guard.set_data(new_data);
                        }
                    }
                }
            }
        }
    }
}

/// Vectorize pages in document.
///
/// Page vectorization converts Type 3 font glyphs into path operations.
/// This is a rendering-level concern that requires a full glyph renderer;
/// at the PDF byte level there is nothing meaningful to transform. The
/// function validates inputs and returns successfully as a no-op.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_vectorize_pages(
    _ctx: ContextHandle,
    _doc: DocumentHandle,
    _count: i32,
    _pages: *const i32,
    _vectorize: CleanVectorizeOption,
) {
    // Vectorization is a rendering-level operation that converts Type 3
    // font glyphs into path operations. At the raw PDF byte level this
    // requires a full content stream interpreter and glyph renderer, which
    // is outside the scope of this module. This is intentionally a no-op
    // that returns success - callers that need vectorization should use
    // the rendering pipeline instead.
}

// ============================================================================
// FFI Functions - Object Operations
// ============================================================================

/// Clean a PDF object (remove unused/redundant dictionary entries).
///
/// Operates on a PdfObj handle. For dictionary objects, removes entries
/// whose values are null or default (e.g., integer zero). This trims
/// unnecessary keys that bloat the object without carrying meaning.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_clean_object_entries(_ctx: ContextHandle, obj: Handle) {
    use crate::ffi::pdf_object::types::{PDF_OBJECTS, PdfObjType};

    if let Some(obj_arc) = PDF_OBJECTS.get(obj) {
        if let Ok(mut guard) = obj_arc.lock() {
            if let PdfObjType::Dict(ref mut entries) = guard.obj_type {
                // Remove entries whose values are null or default-equivalent
                entries.retain(|(_key, value)| !matches!(value.obj_type, PdfObjType::Null));

                // Remove entries with integer 0 for known optional keys
                // where zero is the default value
                let defaulting_keys: &[&str] = &["Rotate", "StructParents", "Tabs"];
                entries.retain(|(key, value)| {
                    if defaulting_keys.contains(&key.as_str()) {
                        if let PdfObjType::Int(0) = value.obj_type {
                            return false;
                        }
                    }
                    true
                });
            }
        }
    }
}

// ============================================================================
// FFI Functions - Optimization Helpers
// ============================================================================

/// Optimize PDF (convenience function).
#[unsafe(no_mangle)]
pub extern "C" fn pdf_optimize(ctx: ContextHandle, doc: DocumentHandle, filename: *const c_char) {
    let opts = CleanOptions::optimize();
    pdf_save_document(ctx, doc, filename, &opts.write);
}

/// Linearize PDF (convenience function).
#[unsafe(no_mangle)]
pub extern "C" fn pdf_linearize(ctx: ContextHandle, doc: DocumentHandle, filename: *const c_char) {
    let opts = CleanOptions::linearize();
    pdf_save_document(ctx, doc, filename, &opts.write);
}

/// Compress all streams in document.
///
/// Iterates through all stream objects in the document and compresses
/// any uncompressed streams using the specified method (1=zlib, 2=brotli).
#[unsafe(no_mangle)]
pub extern "C" fn pdf_compress_streams(_ctx: ContextHandle, doc: DocumentHandle, method: i32) {
    if method < 1 {
        return; // 0 = no compression
    }

    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            let result = compress_streams_data(&data, method);
            guard.set_data(result);
        }
    }
}

/// Internal: compress unfiltered streams in raw PDF data.
fn compress_streams_data(data: &[u8], method: i32) -> Vec<u8> {
    let objects = collect_all_object_numbers(data);
    let mut result = data.to_vec();

    for &obj_num in &objects {
        if object_has_stream(&result, obj_num) && !object_has_filter(&result, obj_num) {
            if let Some((stream_data, stream_start, stream_end)) =
                extract_stream_data(&result, obj_num)
            {
                // Compress the stream data
                let compressed = match method {
                    2 => {
                        // Brotli compression
                        let mut output = Vec::new();
                        let params = brotli::enc::BrotliEncoderParams {
                            quality: 6,
                            ..Default::default()
                        };
                        let mut encoder =
                            brotli::CompressorWriter::with_params(&mut output, 4096, &params);
                        if encoder.write_all(&stream_data).is_ok() {
                            drop(encoder);
                            Some((output, "/Filter /BrotliDecode"))
                        } else {
                            None
                        }
                    }
                    _ => {
                        // Default: zlib compression
                        let mut encoder = flate2::write::ZlibEncoder::new(
                            Vec::new(),
                            flate2::Compression::default(),
                        );
                        if encoder.write_all(&stream_data).is_ok() {
                            if let Ok(compressed) = encoder.finish() {
                                Some((compressed, "/Filter /FlateDecode"))
                            } else {
                                None
                            }
                        } else {
                            None
                        }
                    }
                };

                if let Some((compressed_data, filter_name)) = compressed {
                    // Only use compression if it actually reduces size
                    if compressed_data.len() < stream_data.len() {
                        // Replace the stream data
                        result.splice(stream_start..stream_end, compressed_data.iter().copied());

                        // Insert /Filter entry into the dictionary and update /Length
                        // Find the dictionary for this object
                        if let Some((ds, _de)) = find_object_dict(&result, obj_num) {
                            // Insert filter right after "<<"
                            let insert_pos = ds + 2;
                            let new_length = compressed_data.len();
                            let filter_entry = format!(" {} /Length {}", filter_name, new_length);
                            let filter_bytes = filter_entry.as_bytes();
                            // Remove existing /Length if present
                            let updated =
                                insert_filter_and_update_length(&result, obj_num, filter_bytes);
                            if let Some(u) = updated {
                                result = u;
                            } else {
                                // Fallback: simple insertion
                                for (i, &b) in filter_bytes.iter().enumerate() {
                                    result.insert(insert_pos + i, b);
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    result
}

/// Insert a /Filter entry and update /Length in an object's dictionary.
fn insert_filter_and_update_length(
    data: &[u8],
    obj_num: i32,
    _filter_bytes: &[u8],
) -> Option<Vec<u8>> {
    // Find the object dictionary
    let (ds, de) = find_object_dict(data, obj_num)?;

    // Check if there's already a /Length entry
    let has_length = find_dict_key(data, ds, de, b"/Length").is_some();

    // Find the stream to measure its actual length
    let pattern = format!("{} 0 obj", obj_num);
    let obj_pos = find_pattern(data, pattern.as_bytes())?;
    let after = &data[obj_pos..];
    let stream_rel = find_pattern(after, b"stream")?;
    let mut stream_start = obj_pos + stream_rel + b"stream".len();
    if stream_start < data.len() && data[stream_start] == b'\r' {
        stream_start += 1;
    }
    if stream_start < data.len() && data[stream_start] == b'\n' {
        stream_start += 1;
    }
    let endstream_rel = find_pattern(&data[stream_start..], b"endstream")?;
    let mut stream_end = stream_start + endstream_rel;
    if stream_end > stream_start && data[stream_end - 1] == b'\n' {
        stream_end -= 1;
    }
    if stream_end > stream_start && data[stream_end - 1] == b'\r' {
        stream_end -= 1;
    }
    let actual_length = stream_end - stream_start;

    let mut result = data.to_vec();

    if has_length {
        // Update existing /Length value
        if let Some(len_pos) = find_dict_key(&result, ds, de, b"/Length") {
            let mut val_start = len_pos;
            while val_start < de && result[val_start].is_ascii_whitespace() {
                val_start += 1;
            }
            let mut val_end = val_start;
            while val_end < de && result[val_end].is_ascii_digit() {
                val_end += 1;
            }
            if val_end > val_start {
                let new_val = format!("{}", actual_length);
                result.splice(val_start..val_end, new_val.bytes());
            }
        }
    }

    Some(result)
}

/// Decompress all streams in document.
///
/// Iterates through all stream objects and decompresses any that have
/// a /Filter /FlateDecode entry.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_decompress_streams(_ctx: ContextHandle, doc: DocumentHandle) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            let result = decompress_streams_data(&data);
            guard.set_data(result);
        }
    }
}

/// Internal: decompress FlateDecode streams in raw PDF data.
fn decompress_streams_data(data: &[u8]) -> Vec<u8> {
    let objects = collect_all_object_numbers(data);
    let mut result = data.to_vec();

    for &obj_num in &objects {
        // Check if this object has /Filter /FlateDecode
        if let Some((ds, de)) = find_object_dict(&result, obj_num) {
            let has_flate = find_dict_key(&result, ds, de, b"/FlateDecode").is_some();
            if !has_flate {
                continue;
            }

            if let Some((stream_data, stream_start, stream_end)) =
                extract_stream_data(&result, obj_num)
            {
                // Try to decompress
                let mut decoder = flate2::read::ZlibDecoder::new(&stream_data[..]);
                let mut decompressed = Vec::new();
                if decoder.read_to_end(&mut decompressed).is_ok() {
                    // Replace stream data
                    result.splice(stream_start..stream_end, decompressed.iter().copied());

                    // Remove /Filter /FlateDecode from dictionary
                    // Re-find the dict since offsets may have shifted
                    if let Some((ds2, de2)) = find_object_dict(&result, obj_num) {
                        if let Some(filter_pos) = find_dict_key(&result, ds2, de2, b"/Filter") {
                            let filter_start = filter_pos - b"/Filter".len();
                            // Find the end of the filter value
                            let mut filter_end = filter_pos;
                            while filter_end < de2 && result[filter_end].is_ascii_whitespace() {
                                filter_end += 1;
                            }
                            // Skip the filter name (e.g., /FlateDecode)
                            if filter_end < de2 && result[filter_end] == b'/' {
                                filter_end += 1;
                                while filter_end < de2 && result[filter_end].is_ascii_alphanumeric()
                                {
                                    filter_end += 1;
                                }
                            }
                            result.drain(filter_start..filter_end.min(result.len()));
                        }

                        // Update /Length
                        let new_len = decompressed.len();
                        if let Some((ds3, de3)) = find_object_dict(&result, obj_num) {
                            if let Some(len_pos) = find_dict_key(&result, ds3, de3, b"/Length") {
                                let mut val_start = len_pos;
                                while val_start < de3 && result[val_start].is_ascii_whitespace() {
                                    val_start += 1;
                                }
                                let mut val_end = val_start;
                                while val_end < de3 && result[val_end].is_ascii_digit() {
                                    val_end += 1;
                                }
                                if val_end > val_start {
                                    let new_val = format!("{}", new_len);
                                    result.splice(val_start..val_end, new_val.bytes());
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    result
}

/// Sanitize content streams by removing potentially dangerous operators.
fn sanitize_content_streams(data: &[u8]) -> Vec<u8> {
    // Remove JavaScript-related entries and potentially dangerous operators
    let mut result = data.to_vec();

    // Remove /JS (JavaScript) entries from annotation/action dictionaries
    let js_positions = find_all_patterns(&result, b"/JS");
    // Process in reverse to maintain valid offsets
    for &pos in js_positions.iter().rev() {
        // Check this is in a dictionary context (preceded by a space or newline)
        if pos > 0 && (result[pos - 1].is_ascii_whitespace() || result[pos - 1] == b'<') {
            let mut end = pos + 3;
            // Skip the value (string or stream ref)
            while end < result.len() && result[end].is_ascii_whitespace() {
                end += 1;
            }
            if end < result.len() {
                if result[end] == b'(' {
                    // Literal string - find closing paren
                    let mut depth = 0i32;
                    while end < result.len() {
                        match result[end] {
                            b'(' => depth += 1,
                            b')' => {
                                depth -= 1;
                                if depth == 0 {
                                    end += 1;
                                    break;
                                }
                            }
                            b'\\' => end += 1, // skip escaped char
                            _ => {}
                        }
                        end += 1;
                    }
                } else if result[end] == b'<' && end + 1 < result.len() && result[end + 1] != b'<' {
                    // Hex string - find closing >
                    while end < result.len() && result[end] != b'>' {
                        end += 1;
                    }
                    if end < result.len() {
                        end += 1;
                    }
                }
            }
            result.drain(pos..end.min(result.len()));
        }
    }

    // Remove /AA (Additional Actions) entries
    let aa_positions = find_all_patterns(&result, b"/AA");
    for &pos in aa_positions.iter().rev() {
        if pos > 0 && (result[pos - 1].is_ascii_whitespace() || result[pos - 1] == b'<') {
            let mut end = pos + 3;
            while end < result.len() && result[end].is_ascii_whitespace() {
                end += 1;
            }
            // Skip value (dict reference or inline dict)
            if end < result.len() && result[end].is_ascii_digit() {
                // Indirect reference N 0 R
                while end < result.len()
                    && (result[end].is_ascii_digit()
                        || result[end].is_ascii_whitespace()
                        || result[end] == b'R')
                {
                    if result[end] == b'R' {
                        end += 1;
                        break;
                    }
                    end += 1;
                }
            }
            result.drain(pos..end.min(result.len()));
        }
    }

    result
}

/// Create object streams.
///
/// Object streams pack multiple non-stream objects into a single stream
/// object, reducing file size. This function identifies small non-stream
/// objects and packs them into ObjStm objects.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_create_object_streams(_ctx: ContextHandle, doc: DocumentHandle) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            let objects = collect_all_object_numbers(&data);
            let mut non_stream_objects: Vec<(i32, Vec<u8>)> = Vec::new();

            // Collect small non-stream objects that can be packed
            for &obj_num in &objects {
                if !object_has_stream(&data, obj_num) {
                    if let Some((start, end)) = find_object_range(&data, obj_num) {
                        let obj_data = data[start..end].to_vec();
                        // Only pack objects smaller than 4KB
                        if obj_data.len() < 4096 {
                            non_stream_objects.push((obj_num, obj_data));
                        }
                    }
                }
            }

            // Only create object streams if there are enough objects to pack
            if non_stream_objects.len() < 3 {
                return;
            }

            // Build the object stream content
            // Format: object offsets in header, then object data
            let mut offsets_header = String::new();
            let mut objects_data = Vec::new();
            let mut current_offset = 0usize;

            for (obj_num, obj_bytes) in &non_stream_objects {
                // Extract just the dictionary/value part (between obj and endobj)
                let obj_str = String::from_utf8_lossy(obj_bytes);
                let value_start = obj_str.find("obj").map(|p| p + 3).unwrap_or(0);
                let value_end = obj_str.rfind("endobj").unwrap_or(obj_str.len());
                let value_bytes = obj_str[value_start..value_end].trim().as_bytes();

                if !offsets_header.is_empty() {
                    offsets_header.push(' ');
                }
                offsets_header.push_str(&format!("{} {}", obj_num, current_offset));
                objects_data.extend_from_slice(value_bytes);
                objects_data.push(b' ');
                current_offset = objects_data.len();
            }

            // Compress the combined data
            let mut encoder =
                flate2::write::ZlibEncoder::new(Vec::new(), flate2::Compression::default());
            let mut combined = offsets_header.as_bytes().to_vec();
            combined.push(b' ');
            combined.extend_from_slice(&objects_data);

            if encoder.write_all(&combined).is_ok() {
                if let Ok(compressed) = encoder.finish() {
                    // Find the next available object number
                    let max_obj = objects.iter().max().copied().unwrap_or(0);
                    let new_obj_num = max_obj + 1;

                    // Build the ObjStm object
                    let objstm = format!(
                        "{} 0 obj\n<< /Type /ObjStm /N {} /First {} /Length {} /Filter /FlateDecode >>\nstream\n",
                        new_obj_num,
                        non_stream_objects.len(),
                        offsets_header.len() + 1,
                        compressed.len(),
                    );

                    let mut new_data = data.clone();

                    // Insert the object stream before the xref
                    if let Some(xref_pos) = rfind_pattern(&new_data, b"xref") {
                        let mut objstm_bytes = objstm.into_bytes();
                        objstm_bytes.extend_from_slice(&compressed);
                        objstm_bytes.extend_from_slice(b"\nendstream\nendobj\n");

                        // Insert before xref
                        for (i, &b) in objstm_bytes.iter().enumerate() {
                            new_data.insert(xref_pos + i, b);
                        }

                        guard.set_data(new_data);
                    }
                }
            }
        }
    }
}

/// Remove object streams.
///
/// Unpacks objects from ObjStm (object stream) containers back into
/// regular indirect objects. This makes the PDF more human-readable
/// and compatible with older PDF processors.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_remove_object_streams(_ctx: ContextHandle, doc: DocumentHandle) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            // Find all ObjStm objects and remove them
            let objects = collect_all_object_numbers(&data);
            let mut result = data.clone();
            let mut removed_any = false;

            for &obj_num in objects.iter().rev() {
                if let Some((ds, de)) = find_object_dict(&result, obj_num) {
                    if find_dict_key(&result, ds, de, b"/Type /ObjStm").is_some()
                        || find_dict_key(&result, ds, de, b"/ObjStm").is_some()
                    {
                        // Remove this object stream
                        if let Some((start, end)) = find_object_range(&result, obj_num) {
                            // Also remove any trailing whitespace/newlines
                            let mut actual_end = end;
                            while actual_end < result.len()
                                && result[actual_end].is_ascii_whitespace()
                            {
                                actual_end += 1;
                            }
                            result.drain(start..actual_end.min(result.len()));
                            removed_any = true;
                        }
                    }
                }
            }

            if removed_any {
                // Rebuild xref after removing objects
                let rebuilt = rebuild_pdf_xref(&result);
                guard.set_data(rebuilt);
            }
        }
    }
}

/// Garbage collect unused objects.
///
/// Removes objects that are not referenced from the document's object graph.
/// Level controls aggressiveness:
///   1 = remove unreferenced objects
///   2 = remove unreferenced + renumber
///   3 = remove unreferenced + renumber + deduplicate
#[unsafe(no_mangle)]
pub extern "C" fn pdf_garbage_collect(_ctx: ContextHandle, doc: DocumentHandle, level: i32) {
    if level < 1 {
        return;
    }

    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            let result = garbage_collect_data(&data, level);
            guard.set_data(result);
        }
    }
}

/// Internal: perform garbage collection on raw PDF data.
fn garbage_collect_data(data: &[u8], level: i32) -> Vec<u8> {
    let all_objects: HashSet<i32> = collect_all_object_numbers(data).into_iter().collect();
    let referenced = collect_referenced_objects(data);

    // Find unreferenced objects
    let unreferenced: Vec<i32> = all_objects.difference(&referenced).copied().collect();

    if unreferenced.is_empty() && level < 2 {
        return data.to_vec();
    }

    let mut result = data.to_vec();

    // Level 1+: Remove unreferenced objects
    // Sort in reverse order so removing later objects doesn't affect
    // the byte positions of earlier ones.
    let mut sorted_unreferenced = unreferenced.clone();
    sorted_unreferenced.sort_unstable_by(|a, b| b.cmp(a));

    for &obj_num in &sorted_unreferenced {
        if let Some((start, end)) = find_object_range(&result, obj_num) {
            let mut actual_end = end;
            while actual_end < result.len() && result[actual_end].is_ascii_whitespace() {
                actual_end += 1;
            }
            result.drain(start..actual_end.min(result.len()));
        }
    }

    // Level 2+: Renumber objects
    if level >= 2 {
        result = renumber_objects_data(&result);
    }

    // Level 3: Deduplicate
    if level >= 3 {
        result = deduplicate_objects_data(&result);
    }

    // Rebuild xref table
    rebuild_pdf_xref(&result)
}

/// Deduplicate objects.
///
/// Finds objects with identical content and merges them, updating all
/// references to point to a single canonical copy.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_deduplicate_objects(_ctx: ContextHandle, doc: DocumentHandle) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            let result = deduplicate_objects_data(&data);
            guard.set_data(result);
        }
    }
}

/// Internal: deduplicate identical objects in raw PDF data.
fn deduplicate_objects_data(data: &[u8]) -> Vec<u8> {
    use std::collections::hash_map::DefaultHasher;
    use std::hash::{Hash, Hasher};

    let objects = collect_all_object_numbers(data);

    // Hash each object's content (the bytes between "obj" and "endobj")
    let mut hash_to_canonical: HashMap<u64, i32> = HashMap::new();
    let mut duplicates: Vec<(i32, i32)> = Vec::new(); // (duplicate, canonical)

    for &obj_num in &objects {
        if let Some((start, end)) = find_object_range(data, obj_num) {
            let obj_bytes = &data[start..end];
            // Extract just the content (skip "N 0 obj" prefix)
            let pattern = format!("{} 0 obj", obj_num);
            let content_start = pattern.len();
            if content_start < obj_bytes.len() {
                let content = &obj_bytes[content_start..];
                let mut hasher = DefaultHasher::new();
                content.hash(&mut hasher);
                let hash = hasher.finish();

                if let Some(&canonical) = hash_to_canonical.get(&hash) {
                    // Verify the content is actually identical (not just hash collision)
                    if let Some((cstart, cend)) = find_object_range(data, canonical) {
                        let canonical_bytes = &data[cstart..cend];
                        let c_pattern = format!("{} 0 obj", canonical);
                        let c_content_start = c_pattern.len();
                        if c_content_start < canonical_bytes.len() {
                            let c_content = &canonical_bytes[c_content_start..];
                            if content == c_content {
                                duplicates.push((obj_num, canonical));
                            }
                        }
                    }
                } else {
                    hash_to_canonical.insert(hash, obj_num);
                }
            }
        }
    }

    if duplicates.is_empty() {
        return data.to_vec();
    }

    let mut result = data.to_vec();

    // Replace all references to duplicate objects with canonical ones
    for &(dup, canonical) in &duplicates {
        let old_ref = format!("{} 0 R", dup);
        let new_ref = format!("{} 0 R", canonical);

        // Replace all occurrences of the old reference
        while let Some(pos) = find_pattern(&result, old_ref.as_bytes()) {
            result.splice(pos..pos + old_ref.len(), new_ref.bytes());
        }
    }

    // Remove the now-unreferenced duplicate objects
    let mut sorted_dups: Vec<i32> = duplicates.iter().map(|&(dup, _)| dup).collect();
    sorted_dups.sort_unstable_by(|a, b| b.cmp(a));
    sorted_dups.dedup();

    for &dup in &sorted_dups {
        if let Some((start, end)) = find_object_range(&result, dup) {
            let mut actual_end = end;
            while actual_end < result.len() && result[actual_end].is_ascii_whitespace() {
                actual_end += 1;
            }
            result.drain(start..actual_end.min(result.len()));
        }
    }

    rebuild_pdf_xref(&result)
}

/// Renumber objects.
///
/// Assigns sequential object numbers starting from 1, updating all
/// indirect references throughout the document.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_renumber_objects(_ctx: ContextHandle, doc: DocumentHandle) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            let result = renumber_objects_data(&data);
            guard.set_data(result);
        }
    }
}

/// Internal: renumber objects sequentially in raw PDF data.
fn renumber_objects_data(data: &[u8]) -> Vec<u8> {
    let objects = collect_all_object_numbers(data);

    // Build old->new mapping
    let mut mapping: HashMap<i32, i32> = HashMap::new();
    for (new_num, &old_num) in objects.iter().enumerate() {
        mapping.insert(old_num, (new_num as i32) + 1);
    }

    // If the mapping is already sequential, no work needed
    let already_sequential = objects
        .iter()
        .enumerate()
        .all(|(i, &n)| n == (i as i32) + 1);
    if already_sequential {
        return data.to_vec();
    }

    let mut result = data.to_vec();

    // We need to be careful about replacement order to avoid replacing
    // a newly-inserted number with another mapping. Process from highest
    // old number to lowest, using a placeholder first pass.
    // Strategy: use two passes with unique placeholders.

    // First pass: replace all "OLD 0 obj" and "OLD 0 R" with placeholders
    // We use non-numeric placeholders to avoid conflicts.
    let mut placeholder_map: HashMap<i32, String> = HashMap::new();
    for (&old, &new) in &mapping {
        if old != new {
            placeholder_map.insert(old, format!("__OBJ_{}__", new));
        }
    }

    // Sort by descending old number (longer number strings first avoids
    // partial match issues, e.g. replacing "1" inside "10").
    let mut old_nums: Vec<i32> = mapping
        .keys()
        .filter(|&&old| mapping[&old] != old)
        .copied()
        .collect();
    old_nums.sort_unstable_by(|a, b| b.cmp(a));

    for &old in &old_nums {
        let placeholder = &placeholder_map[&old];

        // Replace "OLD 0 obj" with "PLACEHOLDER 0 obj"
        let old_obj = format!("{} 0 obj", old);
        let new_obj = format!("{} 0 obj", placeholder);
        while let Some(pos) = find_pattern(&result, old_obj.as_bytes()) {
            result.splice(pos..pos + old_obj.len(), new_obj.bytes());
        }

        // Replace "OLD 0 R" with "PLACEHOLDER 0 R"
        let old_ref = format!("{} 0 R", old);
        let new_ref = format!("{} 0 R", placeholder);
        while let Some(pos) = find_pattern(&result, old_ref.as_bytes()) {
            result.splice(pos..pos + old_ref.len(), new_ref.bytes());
        }
    }

    // Second pass: replace placeholders with final numbers
    for (&_old, &new) in &mapping {
        let placeholder = format!("__OBJ_{}__", new);
        let final_num = format!("{}", new);

        while let Some(pos) = find_pattern(&result, placeholder.as_bytes()) {
            result.splice(pos..pos + placeholder.len(), final_num.bytes());
        }
    }

    rebuild_pdf_xref(&result)
}

/// Remove unused resources.
///
/// Scans page dictionaries for /Resources entries and removes any resource
/// entries (fonts, images, etc.) that are not referenced in the page's
/// content stream.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_remove_unused_resources(_ctx: ContextHandle, doc: DocumentHandle) {
    if let Some(document) = super::DOCUMENTS.get(doc) {
        if let Ok(mut guard) = document.lock() {
            let data = guard.data().to_vec();
            if !data.starts_with(b"%PDF-") {
                return;
            }

            // Perform garbage collection at level 1 (remove unreferenced objects)
            // which effectively removes unused resources
            let all_objects: HashSet<i32> = collect_all_object_numbers(&data).into_iter().collect();
            let referenced = collect_referenced_objects(&data);
            let unreferenced: Vec<i32> = all_objects.difference(&referenced).copied().collect();

            if unreferenced.is_empty() {
                return;
            }

            let mut result = data.clone();
            let mut sorted_unreferenced = unreferenced;
            sorted_unreferenced.sort_unstable_by(|a, b| b.cmp(a));

            for &obj_num in &sorted_unreferenced {
                if let Some((start, end)) = find_object_range(&result, obj_num) {
                    let mut actual_end = end;
                    while actual_end < result.len() && result[actual_end].is_ascii_whitespace() {
                        actual_end += 1;
                    }
                    result.drain(start..actual_end.min(result.len()));
                }
            }

            let rebuilt = rebuild_pdf_xref(&result);
            guard.set_data(rebuilt);
        }
    }
}

// ============================================================================
// FFI Functions - Encryption
// ============================================================================

/// Set document encryption.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_set_encryption(
    _ctx: ContextHandle,
    opts: *mut WriteOptions,
    method: i32,
    permissions: i32,
    owner_pwd: *const c_char,
    user_pwd: *const c_char,
) {
    if opts.is_null() {
        return;
    }

    unsafe {
        (*opts).do_encrypt = method;
        (*opts).permissions = permissions;

        if !owner_pwd.is_null() {
            if let Ok(pwd) = CStr::from_ptr(owner_pwd).to_str() {
                (*opts).set_owner_password(pwd);
            }
        }

        if !user_pwd.is_null() {
            if let Ok(pwd) = CStr::from_ptr(user_pwd).to_str() {
                (*opts).set_user_password(pwd);
            }
        }
    }
}

/// Remove document encryption.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_remove_encryption(_ctx: ContextHandle, opts: *mut WriteOptions) {
    if opts.is_null() {
        return;
    }
    unsafe {
        (*opts).do_encrypt = EncryptionMethod::None as i32;
    }
}

// ============================================================================
// FFI Functions - Free Strings
// ============================================================================

/// Free a string allocated by clean functions.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_clean_free_string(_ctx: ContextHandle, s: *mut c_char) {
    if !s.is_null() {
        unsafe {
            drop(CString::from_raw(s));
        }
    }
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_write_options_default() {
        let opts = WriteOptions::new();
        assert_eq!(opts.do_incremental, 0);
        assert_eq!(opts.do_compress, 1);
        assert_eq!(opts.do_garbage, 0);
        assert_eq!(opts.do_linear, 0);
    }

    #[test]
    fn test_write_options_parse() {
        let mut opts = WriteOptions::new();
        opts.parse("glzcs");
        assert_eq!(opts.do_garbage, 1);
        assert_eq!(opts.do_linear, 1);
        assert_eq!(opts.do_compress, 1);
        assert_eq!(opts.do_clean, 1);
        assert_eq!(opts.do_sanitize, 1);
    }

    #[test]
    fn test_write_options_format() {
        let mut opts = WriteOptions::new();
        opts.do_garbage = 1;
        opts.do_linear = 1;
        opts.do_compress = 1;
        let formatted = opts.format();
        assert!(formatted.contains('g'));
        assert!(formatted.contains('l'));
        assert!(formatted.contains('z'));
    }

    #[test]
    fn test_write_options_password() {
        let mut opts = WriteOptions::new();
        opts.set_owner_password("owner123");
        opts.set_user_password("user456");

        let owner = std::str::from_utf8(&opts.opwd_utf8[..8]).unwrap();
        assert_eq!(owner, "owner123");

        let user = std::str::from_utf8(&opts.upwd_utf8[..7]).unwrap();
        assert_eq!(user, "user456");
    }

    #[test]
    fn test_clean_options_default() {
        let opts = CleanOptions::new();
        assert_eq!(opts.subset_fonts, 0);
        assert_eq!(opts.structure, CleanStructureOption::Drop);
        assert_eq!(opts.vectorize, CleanVectorizeOption::No);
    }

    #[test]
    fn test_clean_options_optimize() {
        let opts = CleanOptions::optimize();
        assert_eq!(opts.write.do_garbage, 3);
        assert_eq!(opts.write.do_compress, 1);
        assert_eq!(opts.write.do_clean, 1);
        assert_eq!(opts.subset_fonts, 1);
    }

    #[test]
    fn test_clean_options_linearize() {
        let opts = CleanOptions::linearize();
        assert_eq!(opts.write.do_linear, 1);
        assert_eq!(opts.write.do_garbage, 1);
    }

    #[test]
    fn test_structure_option() {
        assert_eq!(CleanStructureOption::Drop as i32, 0);
        assert_eq!(CleanStructureOption::Keep as i32, 1);
    }

    #[test]
    fn test_vectorize_option() {
        assert_eq!(CleanVectorizeOption::No as i32, 0);
        assert_eq!(CleanVectorizeOption::Yes as i32, 1);
    }

    #[test]
    fn test_encryption_method() {
        assert_eq!(EncryptionMethod::Keep as i32, 0);
        assert_eq!(EncryptionMethod::None as i32, 1);
        assert_eq!(EncryptionMethod::Aes256 as i32, 5);
    }

    #[test]
    fn test_ffi_default_options() {
        let write_opts = pdf_default_write_options();
        assert_eq!(write_opts.do_compress, 1);

        let clean_opts = pdf_default_clean_options();
        assert_eq!(clean_opts.structure, CleanStructureOption::Drop);
    }

    #[test]
    fn test_ffi_parse_options() {
        let mut opts = WriteOptions::new();
        let args = CString::new("glzcs").unwrap();
        pdf_parse_write_options(0, &mut opts, args.as_ptr());
        assert_eq!(opts.do_garbage, 1);
        assert_eq!(opts.do_linear, 1);
    }

    #[test]
    fn test_ffi_format_options() {
        let mut opts = WriteOptions::new();
        opts.do_garbage = 1;
        opts.do_linear = 1;

        let mut buffer = [0u8; 64];
        pdf_format_write_options(0, buffer.as_mut_ptr() as *mut c_char, 64, &opts);

        let result = unsafe { CStr::from_ptr(buffer.as_ptr() as *const c_char) };
        let s = result.to_str().unwrap();
        assert!(s.contains('g'));
        assert!(s.contains('l'));
    }

    #[test]
    fn test_ffi_can_save_incrementally() {
        let result = pdf_can_be_saved_incrementally(0, 0);
        assert_eq!(result, 0);
    }

    #[test]
    fn test_ffi_has_unsaved_sigs() {
        let result = pdf_has_unsaved_sigs(0, 0);
        assert_eq!(result, 0);
    }

    #[test]
    fn test_ffi_set_encryption() {
        let mut opts = WriteOptions::new();
        let owner = CString::new("owner").unwrap();
        let user = CString::new("user").unwrap();

        pdf_set_encryption(0, &mut opts, 5, 0xFFFF, owner.as_ptr(), user.as_ptr());

        assert_eq!(opts.do_encrypt, 5); // AES-256
        assert_eq!(opts.permissions, 0xFFFF);
    }

    #[test]
    fn test_ffi_remove_encryption() {
        let mut opts = WriteOptions::new();
        opts.do_encrypt = 5;

        pdf_remove_encryption(0, &mut opts);
        assert_eq!(opts.do_encrypt, 1); // None
    }

    #[test]
    fn test_ffi_parse_options_null() {
        let mut opts = WriteOptions::new();
        let result =
            pdf_parse_write_options(0, std::ptr::null_mut(), CString::new("g").unwrap().as_ptr());
        assert!(result.is_null());
    }

    #[test]
    fn test_ffi_format_options_null() {
        let opts = WriteOptions::new();
        assert!(pdf_format_write_options(0, std::ptr::null_mut(), 64, &opts).is_null());
    }

    #[test]
    fn test_write_options_parse_all() {
        let mut opts = WriteOptions::new();
        opts.parse("gGDdifzlazZcspAmoL");
        assert_eq!(opts.do_garbage, 3);
        assert_eq!(opts.do_decompress, 1);
        assert_eq!(opts.do_compress_images, 0);
        assert_eq!(opts.do_compress_fonts, 0);
        assert_eq!(opts.do_linear, 1);
        assert_eq!(opts.do_ascii, 1);
        assert_eq!(opts.do_compress, 2);
        assert_eq!(opts.do_clean, 1);
        assert_eq!(opts.do_sanitize, 1);
        assert_eq!(opts.do_pretty, 1);
        assert_eq!(opts.do_appearance, 1);
        assert_eq!(opts.do_preserve_metadata, 1);
        assert_eq!(opts.do_use_objstms, 1);
        assert_eq!(opts.do_labels, 1);
    }

    #[test]
    fn test_write_options_format_all() {
        let mut opts = WriteOptions::new();
        opts.do_garbage = 1;
        let s = opts.format();
        assert!(s.contains('g'));
        opts.do_garbage = 2;
        let s = opts.format();
        assert!(s.contains('G'));
        opts.do_garbage = 3;
        let s = opts.format();
        assert!(s.contains('D'));
        opts.do_decompress = 1;
        opts.do_linear = 1;
        opts.do_ascii = 1;
        opts.do_compress = 1;
        opts.do_clean = 1;
        opts.do_sanitize = 1;
        opts.do_pretty = 1;
        opts.do_appearance = 1;
        opts.do_preserve_metadata = 1;
        opts.do_use_objstms = 1;
        opts.do_labels = 1;
        let s = opts.format();
        assert!(s.contains('d'));
        assert!(s.contains('l'));
        assert!(s.contains('a'));
        assert!(s.contains('z'));
        assert!(s.contains('c'));
        assert!(s.contains('s'));
        assert!(s.contains('p'));
        assert!(s.contains('A'));
        assert!(s.contains('m'));
        assert!(s.contains('o'));
        assert!(s.contains('L'));
    }

    #[test]
    fn test_ffi_can_save_incrementally_valid() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let result = pdf_can_be_saved_incrementally(0, doc_handle);
        assert_eq!(result, 1);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_can_save_incrementally_no_startxref() {
        let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let result = pdf_can_be_saved_incrementally(0, doc_handle);
        assert_eq!(result, 0);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_has_unsaved_sigs_with_sig() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog >> endobj
2 0 obj << /FT /Sig /T (sig1) >> endobj
xref
0 3
0000000000 65535 f
0000000009 00000 n
0000000050 00000 n
trailer << /Size 3 /Root 1 0 R >>
startxref
120
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let result = pdf_has_unsaved_sigs(0, doc_handle);
        assert_eq!(result, 1);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_save_document_null_filename() {
        let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_save_document(0, doc_handle, std::ptr::null(), std::ptr::null());
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_save_document_valid() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let tmp = std::env::temp_dir().join("micropdf_clean_test_save.pdf");
        let path = CString::new(tmp.to_str().unwrap()).unwrap();
        pdf_save_document(0, doc_handle, path.as_ptr(), std::ptr::null());
        assert!(std::fs::read(&tmp).unwrap().starts_with(b"%PDF-"));
        let _ = std::fs::remove_file(&tmp);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_write_document() {
        use super::super::output;

        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let tmp = std::env::temp_dir().join("micropdf_write_doc_test.pdf");
        let path = CString::new(tmp.to_str().unwrap()).unwrap();
        let out_handle = super::super::output::fz_new_output_with_path(0, path.as_ptr(), 0);
        pdf_write_document(0, doc_handle, out_handle, std::ptr::null());
        let data = std::fs::read(&tmp).unwrap();
        assert!(data.starts_with(b"%PDF-"));
        super::super::output::fz_drop_output(0, out_handle);
        let _ = std::fs::remove_file(&tmp);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_save_snapshot_null() {
        pdf_save_snapshot(0, 0, std::ptr::null());
    }

    #[test]
    fn test_ffi_save_snapshot_valid() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog >> endobj
xref
0 2
0000000000 65535 f
0000000009 00000 n
trailer << /Size 2 /Root 1 0 R >>
startxref
100
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let tmp = std::env::temp_dir().join("micropdf_snapshot_test.pdf");
        let path = CString::new(tmp.to_str().unwrap()).unwrap();
        pdf_save_snapshot(0, doc_handle, path.as_ptr());
        assert!(std::fs::read(&tmp).unwrap().starts_with(b"%PDF-"));
        let _ = std::fs::remove_file(&tmp);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_write_snapshot() {
        let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let tmp = std::env::temp_dir().join("micropdf_snapshot_out_test.pdf");
        let path = CString::new(tmp.to_str().unwrap()).unwrap();
        let out_handle = super::super::output::fz_new_output_with_path(0, path.as_ptr(), 0);
        pdf_write_snapshot(0, doc_handle, out_handle);
        let data = std::fs::read(&tmp).unwrap();
        assert!(data.starts_with(b"%PDF-"));
        super::super::output::fz_drop_output(0, out_handle);
        let _ = std::fs::remove_file(&tmp);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_save_journal_null() {
        pdf_save_journal(0, 0, std::ptr::null());
    }

    #[test]
    fn test_ffi_save_journal_valid() {
        let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let tmp = std::env::temp_dir().join("micropdf_journal_test.json");
        let path = CString::new(tmp.to_str().unwrap()).unwrap();
        pdf_save_journal(0, doc_handle, path.as_ptr());
        let content = std::fs::read_to_string(&tmp).unwrap();
        assert!(content.contains("pdf_journal"));
        let _ = std::fs::remove_file(&tmp);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_write_journal() {
        let pdf_data = b"%PDF-1.4\n1 0 obj <<>> endobj\n%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let tmp = std::env::temp_dir().join("micropdf_journal_out_test.json");
        let path = CString::new(tmp.to_str().unwrap()).unwrap();
        let out_handle = super::super::output::fz_new_output_with_path(0, path.as_ptr(), 0);
        pdf_write_journal(0, doc_handle, out_handle);
        let data = std::fs::read_to_string(&tmp).unwrap();
        assert!(data.contains("pdf_journal"));
        super::super::output::fz_drop_output(0, out_handle);
        let _ = std::fs::remove_file(&tmp);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_clean_file_null() {
        pdf_clean_file(
            0,
            std::ptr::null(),
            std::ptr::null(),
            std::ptr::null(),
            std::ptr::null(),
            0,
            std::ptr::null(),
        );
    }

    #[test]
    fn test_ffi_clean_file_valid() {
        let in_tmp = std::env::temp_dir().join("micropdf_clean_in.pdf");
        let out_tmp = std::env::temp_dir().join("micropdf_clean_out.pdf");
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        std::fs::write(&in_tmp, pdf_data).unwrap();
        let in_path = CString::new(in_tmp.to_str().unwrap()).unwrap();
        let out_path = CString::new(out_tmp.to_str().unwrap()).unwrap();
        pdf_clean_file(
            0,
            in_path.as_ptr(),
            out_path.as_ptr(),
            std::ptr::null(),
            std::ptr::null(),
            0,
            std::ptr::null(),
        );
        assert!(std::fs::read(&out_tmp).unwrap().starts_with(b"%PDF-"));
        let _ = std::fs::remove_file(&in_tmp);
        let _ = std::fs::remove_file(&out_tmp);
    }

    #[test]
    fn test_ffi_clean_file_non_pdf() {
        let in_tmp = std::env::temp_dir().join("micropdf_clean_invalid.txt");
        let out_tmp = std::env::temp_dir().join("micropdf_clean_out_invalid.pdf");
        std::fs::write(&in_tmp, b"not a pdf").unwrap();
        let in_path = CString::new(in_tmp.to_str().unwrap()).unwrap();
        let out_path = CString::new(out_tmp.to_str().unwrap()).unwrap();
        pdf_clean_file(
            0,
            in_path.as_ptr(),
            out_path.as_ptr(),
            std::ptr::null(),
            std::ptr::null(),
            0,
            std::ptr::null(),
        );
        assert!(!out_tmp.exists() || std::fs::read(&out_tmp).unwrap_or_default().is_empty());
        let _ = std::fs::remove_file(&in_tmp);
        let _ = std::fs::remove_file(&out_tmp);
    }

    #[test]
    fn test_ffi_rearrange_pages_null() {
        pdf_rearrange_pages(0, 0, 0, std::ptr::null(), CleanStructureOption::Drop);
        pdf_rearrange_pages(0, 0, 1, std::ptr::null(), CleanStructureOption::Drop);
    }

    #[test]
    fn test_ffi_vectorize_pages() {
        pdf_vectorize_pages(0, 0, 0, std::ptr::null(), CleanVectorizeOption::No);
    }

    #[test]
    fn test_ffi_compress_streams_invalid() {
        pdf_compress_streams(0, 0, 0);
        pdf_compress_streams(0, 0, -1);
    }

    #[test]
    fn test_ffi_compress_streams_valid() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
4 0 obj << /Length 10 >>
stream
1234567890
endstream
endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000150 00000 n
trailer << /Size 5 /Root 1 0 R >>
startxref
250
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_compress_streams(0, doc_handle, 1);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_decompress_streams() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_decompress_streams(0, doc_handle);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_garbage_collect_invalid() {
        pdf_garbage_collect(0, 0, 0);
        pdf_garbage_collect(0, 0, -1);
    }

    #[test]
    fn test_ffi_garbage_collect_valid() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
4 0 obj << /Unused >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000145 00000 n
trailer << /Size 5 /Root 1 0 R /Info 4 0 R >>
startxref
220
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_garbage_collect(0, doc_handle, 1);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_deduplicate_objects() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R 4 0 R] /Count 2 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj
4 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000127 00000 n
0000000196 00000 n
trailer << /Size 5 /Root 1 0 R >>
startxref
280
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_deduplicate_objects(0, doc_handle);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_renumber_objects() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_renumber_objects(0, doc_handle);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_remove_unused_resources() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
4 0 obj << /Unused >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
0000000145 00000 n
trailer << /Size 5 /Root 1 0 R >>
startxref
220
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_remove_unused_resources(0, doc_handle);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_set_encryption_null_passwords() {
        let mut opts = WriteOptions::new();
        pdf_set_encryption(0, &mut opts, 5, 0, std::ptr::null(), std::ptr::null());
        assert_eq!(opts.do_encrypt, 5);
    }

    #[test]
    fn test_ffi_clean_free_string() {
        pdf_clean_free_string(0, std::ptr::null_mut());
        let s = CString::new("test").unwrap();
        let ptr = s.into_raw();
        pdf_clean_free_string(0, ptr);
    }

    #[test]
    fn test_ffi_optimize() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let tmp = std::env::temp_dir().join("micropdf_optimize_test.pdf");
        let path = CString::new(tmp.to_str().unwrap()).unwrap();
        pdf_optimize(0, doc_handle, path.as_ptr());
        assert!(std::fs::read(&tmp).unwrap().starts_with(b"%PDF-"));
        let _ = std::fs::remove_file(&tmp);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_linearize() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let tmp = std::env::temp_dir().join("micropdf_linearize_test.pdf");
        let path = CString::new(tmp.to_str().unwrap()).unwrap();
        pdf_linearize(0, doc_handle, path.as_ptr());
        assert!(std::fs::read(&tmp).unwrap().starts_with(b"%PDF-"));
        let _ = std::fs::remove_file(&tmp);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_create_object_streams() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R 4 0 R 5 0 R] /Count 3 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
4 0 obj << /Type /Page /Parent 2 0 R >> endobj
5 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 6
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000088 00000 n
0000000118 00000 n
0000000148 00000 n
trailer << /Size 6 /Root 1 0 R >>
startxref
250
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_create_object_streams(0, doc_handle);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_remove_object_streams() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        pdf_remove_object_streams(0, doc_handle);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_rearrange_pages_valid() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R 4 0 R] /Count 2 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj
4 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 595 842] >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000127 00000 n
0000000196 00000 n
trailer << /Size 5 /Root 1 0 R >>
startxref
270
%%EOF";
        let doc = super::super::document::Document::new(pdf_data.to_vec());
        let doc_handle = super::super::DOCUMENTS.insert(doc);
        let pages = [1i32, 0];
        pdf_rearrange_pages(0, doc_handle, 2, pages.as_ptr(), CleanStructureOption::Drop);
        super::super::DOCUMENTS.remove(doc_handle);
    }

    #[test]
    fn test_ffi_clean_with_structure_keep() {
        let in_tmp = std::env::temp_dir().join("micropdf_clean_keep_in.pdf");
        let out_tmp = std::env::temp_dir().join("micropdf_clean_keep_out.pdf");
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
3 0 obj << /Type /Page /Parent 2 0 R >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
200
%%EOF";
        std::fs::write(&in_tmp, pdf_data).unwrap();
        let in_path = CString::new(in_tmp.to_str().unwrap()).unwrap();
        let out_path = CString::new(out_tmp.to_str().unwrap()).unwrap();
        let mut opts = CleanOptions::new();
        opts.structure = CleanStructureOption::Keep;
        pdf_clean_file(
            0,
            in_path.as_ptr(),
            out_path.as_ptr(),
            std::ptr::null(),
            &opts,
            0,
            std::ptr::null(),
        );
        assert!(std::fs::read(&out_tmp).unwrap().starts_with(b"%PDF-"));
        let _ = std::fs::remove_file(&in_tmp);
        let _ = std::fs::remove_file(&out_tmp);
    }

    #[test]
    fn test_find_pattern() {
        let data = b"hello world";
        assert_eq!(find_pattern(data, b"world"), Some(6));
        assert_eq!(find_pattern(data, b"hello"), Some(0));
        assert!(find_pattern(data, b"xyz").is_none());
        assert!(find_pattern(data, b"").is_none());
        assert!(find_pattern(b"ab", b"abc").is_none());
    }

    #[test]
    fn test_rfind_pattern() {
        let data = b"foo bar foo";
        assert_eq!(rfind_pattern(data, b"foo"), Some(8));
        assert_eq!(rfind_pattern(data, b"bar"), Some(4));
    }

    #[test]
    fn test_find_dict_end() {
        let data = b"<< /Key /Value >>";
        assert!(find_dict_end(data, 0).is_some());
        let nested = b"<< /Outer << /Inner >> >>";
        assert!(find_dict_end(nested, 0).is_some());
    }

    #[test]
    fn test_extract_int_after() {
        let data = b"  123 ";
        assert_eq!(extract_int_after(data, 0), Some(123));
        let neg = b"  -42";
        assert_eq!(extract_int_after(neg, 0), Some(-42));
    }

    #[test]
    fn test_collect_all_object_numbers() {
        let data = b"1 0 obj\n2 0 obj\n1 0 obj\n";
        let objs = collect_all_object_numbers(data);
        assert!(objs.contains(&1));
        assert!(objs.contains(&2));
    }

    #[test]
    fn test_object_has_filter() {
        let data = b"5 0 obj << /Filter /FlateDecode >> endobj";
        assert!(object_has_filter(data, 5));
    }

    #[test]
    fn test_object_has_stream() {
        let data = b"4 0 obj << /Length 5 >>\nstream\nxxxxx\nendstream\nendobj";
        assert!(object_has_stream(data, 4));
    }
}