micropdf 0.16.0

A pure Rust PDF library - A pure Rust PDF library with fz_/pdf_ API compatibility
//! PDF Object Utility Functions (Geometry, Key Access, etc.)

use super::super::Handle;
use super::refcount::with_obj;
use super::types::{PDF_OBJECTS, PdfObj, PdfObjHandle, PdfObjType};
use crate::ffi::DOCUMENTS;
use crate::ffi::pdf_parse::{
    PARSED_OBJECTS, ParsedObject, ParsedValue, pdf_parse_ind_obj, pdf_parsed_obj_drop,
    pdf_parser_new, pdf_parser_set_pos,
};

// ============================================================================
// PDF Geometry Object Creation
// ============================================================================

/// Create a PDF array representing a point [x, y]
#[unsafe(no_mangle)]
pub extern "C" fn pdf_new_point(_ctx: Handle, _doc: Handle, x: f32, y: f32) -> PdfObjHandle {
    let mut arr = PdfObj::new_array(2);
    if let PdfObjType::Array(ref mut a) = arr.obj_type {
        a.push(PdfObj::new_real(x as f64));
        a.push(PdfObj::new_real(y as f64));
    }
    PDF_OBJECTS.insert(arr)
}

/// Create a PDF array representing a rect [x0, y0, x1, y1]
#[unsafe(no_mangle)]
pub extern "C" fn pdf_new_rect(
    _ctx: Handle,
    _doc: Handle,
    x0: f32,
    y0: f32,
    x1: f32,
    y1: f32,
) -> PdfObjHandle {
    let mut arr = PdfObj::new_array(4);
    if let PdfObjType::Array(ref mut a) = arr.obj_type {
        a.push(PdfObj::new_real(x0 as f64));
        a.push(PdfObj::new_real(y0 as f64));
        a.push(PdfObj::new_real(x1 as f64));
        a.push(PdfObj::new_real(y1 as f64));
    }
    PDF_OBJECTS.insert(arr)
}

/// Create a PDF array representing a matrix [a, b, c, d, e, f]
#[unsafe(no_mangle)]
pub extern "C" fn pdf_new_matrix(
    _ctx: Handle,
    _doc: Handle,
    a: f32,
    b: f32,
    c: f32,
    d: f32,
    e: f32,
    f: f32,
) -> PdfObjHandle {
    let mut arr = PdfObj::new_array(6);
    if let PdfObjType::Array(ref mut arr_vec) = arr.obj_type {
        arr_vec.push(PdfObj::new_real(a as f64));
        arr_vec.push(PdfObj::new_real(b as f64));
        arr_vec.push(PdfObj::new_real(c as f64));
        arr_vec.push(PdfObj::new_real(d as f64));
        arr_vec.push(PdfObj::new_real(e as f64));
        arr_vec.push(PdfObj::new_real(f as f64));
    }
    PDF_OBJECTS.insert(arr)
}

/// Create a PDF date string from components
#[unsafe(no_mangle)]
pub extern "C" fn pdf_new_date(
    _ctx: Handle,
    _doc: Handle,
    year: i32,
    month: i32,
    day: i32,
    hour: i32,
    minute: i32,
    second: i32,
) -> PdfObjHandle {
    // PDF date format: D:YYYYMMDDHHmmSS
    let date_str = format!(
        "D:{:04}{:02}{:02}{:02}{:02}{:02}",
        year, month, day, hour, minute, second
    );
    PDF_OBJECTS.insert(PdfObj::new_string(date_str.as_bytes()))
}

// ============================================================================
// PDF Array/Dict Key Access
// ============================================================================

#[unsafe(no_mangle)]
pub extern "C" fn pdf_dict_get_key(_ctx: Handle, dict: PdfObjHandle, index: i32) -> PdfObjHandle {
    let key = with_obj(dict, None, |o| match &o.obj_type {
        PdfObjType::Dict(entries) => {
            let idx = index as usize;
            if idx < entries.len() {
                Some(PdfObj::new_name(&entries[idx].0))
            } else {
                None
            }
        }
        _ => None,
    });

    match key {
        Some(k) => PDF_OBJECTS.insert(k),
        None => 0,
    }
}

#[unsafe(no_mangle)]
pub extern "C" fn pdf_dict_get_val(_ctx: Handle, dict: PdfObjHandle, index: i32) -> PdfObjHandle {
    let val = with_obj(dict, None, |o| match &o.obj_type {
        PdfObjType::Dict(entries) => {
            let idx = index as usize;
            if idx < entries.len() {
                Some(entries[idx].1.clone())
            } else {
                None
            }
        }
        _ => None,
    });

    match val {
        Some(v) => PDF_OBJECTS.insert(v),
        None => 0,
    }
}

// ============================================================================
// PDF Object Resolution and Loading
// ============================================================================

/// Resolve an indirect reference to get the actual object
///
/// If the object is an indirect reference, returns the referenced object.
/// If the object is not indirect, returns the object itself.
///
/// # Arguments
/// * `_ctx` - Context handle (unused)
/// * `_doc` - Document handle (unused in this implementation)
/// * `obj` - Object to resolve
///
/// # Returns
/// Handle to the resolved object, or same handle if not indirect
#[unsafe(no_mangle)]
pub extern "C" fn pdf_resolve_indirect(
    _ctx: Handle,
    _doc: Handle,
    obj: PdfObjHandle,
) -> PdfObjHandle {
    // The FFI layer stores objects directly by handle. Indirect references
    // are resolved at the PDF parsing level (pdf_parse.rs) before objects
    // enter the handle store, so every handle already points at the
    // resolved object — returning it unchanged is correct.

    if obj == 0 {
        return 0;
    }

    if PDF_OBJECTS.get(obj).is_some() {
        obj
    } else {
        0
    }
}

/// Find the byte offset of an indirect object "N G obj" in PDF data.
fn find_object_offset(data: &[u8], num: i32, generation: i32) -> Option<usize> {
    let pattern = format!("{} {} obj", num, generation);
    let pat_bytes = pattern.as_bytes();
    if data.len() < pat_bytes.len() {
        return None;
    }
    for i in 0..=data.len() - pat_bytes.len() {
        if &data[i..i + pat_bytes.len()] == pat_bytes {
            // Ensure we're at a token boundary (preceded by whitespace or start)
            let at_boundary = i == 0
                || data[i - 1].is_ascii_whitespace()
                || data[i - 1] == b'\n'
                || data[i - 1] == b'\r';
            if at_boundary {
                return Some(i);
            }
        }
    }
    None
}

/// Convert ParsedValue to PdfObj.
fn parsed_value_to_pdf_obj(v: &ParsedValue) -> PdfObj {
    match v {
        ParsedValue::Null => PdfObj::new_null(),
        ParsedValue::Bool(b) => PdfObj::new_bool(*b),
        ParsedValue::Int(i) => PdfObj::new_int(*i),
        ParsedValue::Real(f) => PdfObj::new_real(*f as f64),
        ParsedValue::String(s) => PdfObj::new_string(s),
        ParsedValue::Name(n) => PdfObj::new_name(n),
        ParsedValue::Array(arr) => {
            let mut pdf_arr = PdfObj::new_array(arr.len());
            if let PdfObjType::Array(ref mut a) = pdf_arr.obj_type {
                for v in arr {
                    a.push(parsed_value_to_pdf_obj(v));
                }
            }
            pdf_arr
        }
        ParsedValue::Dict(entries) => {
            let mut pdf_dict = PdfObj::new_dict(entries.len());
            if let PdfObjType::Dict(ref mut d) = pdf_dict.obj_type {
                for (k, v) in entries {
                    d.push((k.clone(), parsed_value_to_pdf_obj(v)));
                }
            }
            pdf_dict
        }
        ParsedValue::Reference { num, generation } => PdfObj::new_indirect(*num, *generation),
    }
}

/// Convert ParsedObject to PdfObj.
fn parsed_object_to_pdf_obj(parsed: &ParsedObject) -> Option<PdfObj> {
    match parsed {
        ParsedObject::Value(v) => Some(parsed_value_to_pdf_obj(v)),
        ParsedObject::Array(arr) => {
            let mut pdf_arr = PdfObj::new_array(arr.len());
            if let PdfObjType::Array(ref mut a) = pdf_arr.obj_type {
                for v in arr {
                    a.push(parsed_value_to_pdf_obj(v));
                }
            }
            Some(pdf_arr)
        }
        ParsedObject::Dict(entries) => {
            let mut pdf_dict = PdfObj::new_dict(entries.len());
            if let PdfObjType::Dict(ref mut d) = pdf_dict.obj_type {
                for (k, v) in entries {
                    d.push((k.clone(), parsed_value_to_pdf_obj(v)));
                }
            }
            Some(pdf_dict)
        }
        ParsedObject::IndirectObject { value, .. } => Some(parsed_value_to_pdf_obj(value)),
    }
}

/// Load an object from the PDF document by object number
///
/// Loads an object from the PDF file given its object number and generation.
/// Looks up the object in the document's data, parses it, and returns a handle
/// to the loaded PdfObj in PDF_OBJECTS.
///
/// # Arguments
/// * `_ctx` - Context handle (unused)
/// * `doc` - Document handle (must be valid DOCUMENTS handle)
/// * `num` - Object number to load
/// * `generation` - Generation number
///
/// # Returns
/// Handle to the loaded object, or 0 if not found. Falls back to indirect ref if parsing fails.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_load_object(
    _ctx: Handle,
    doc: Handle,
    num: i32,
    generation: i32,
) -> PdfObjHandle {
    if doc == 0 || num < 0 {
        return PDF_OBJECTS.insert(PdfObj::new_indirect(num, generation));
    }

    let data = if let Some(doc_arc) = DOCUMENTS.get(doc) {
        if let Ok(guard) = doc_arc.lock() {
            guard.data().to_vec()
        } else {
            return PDF_OBJECTS.insert(PdfObj::new_indirect(num, generation));
        }
    } else {
        return PDF_OBJECTS.insert(PdfObj::new_indirect(num, generation));
    };

    let pos = match find_object_offset(&data, num, generation) {
        Some(p) => p,
        None => return PDF_OBJECTS.insert(PdfObj::new_indirect(num, generation)),
    };

    // Create parser and position at object start
    let parser = pdf_parser_new(_ctx, data.as_ptr(), data.len());
    if parser == 0 {
        return PDF_OBJECTS.insert(PdfObj::new_indirect(num, generation));
    }

    pdf_parser_set_pos(_ctx, parser, pos);

    let parsed_handle = pdf_parse_ind_obj(
        _ctx,
        doc,
        parser,
        std::ptr::null_mut(),
        std::ptr::null_mut(),
        std::ptr::null_mut(),
        std::ptr::null_mut(),
    );
    if parsed_handle == 0 {
        let _ = crate::ffi::pdf_parse::PARSERS.remove(parser);
        return PDF_OBJECTS.insert(PdfObj::new_indirect(num, generation));
    }

    let result = if let Some(parsed_arc) = PARSED_OBJECTS.get(parsed_handle) {
        if let Ok(guard) = parsed_arc.lock() {
            parsed_object_to_pdf_obj(&guard).map(|obj| PDF_OBJECTS.insert(obj))
        } else {
            None
        }
    } else {
        None
    };

    pdf_parsed_obj_drop(_ctx, parsed_handle);
    let _ = crate::ffi::pdf_parse::PARSERS.remove(parser);

    result.unwrap_or_else(|| PDF_OBJECTS.insert(PdfObj::new_indirect(num, generation)))
}

/// Check if an indirect reference has been resolved/loaded
///
/// # Arguments
/// * `_ctx` - Context handle (unused)
/// * `_doc` - Document handle (unused)
/// * `obj` - Object to check
///
/// # Returns
/// 1 if the object is loaded and not just an indirect reference, 0 otherwise
#[unsafe(no_mangle)]
pub extern "C" fn pdf_obj_is_resolved(_ctx: Handle, _doc: Handle, obj: PdfObjHandle) -> i32 {
    with_obj(obj, 0, |o| {
        match o.obj_type {
            PdfObjType::Indirect { .. } => 0, // Not resolved, still just a reference
            _ => 1,                           // Resolved to actual object
        }
    })
}