micropdf 0.15.15

//! FFI Compatibility Aliases
//!
//! This module provides compatibility aliases for functions that have different
//! names in different APIs (e.g., Go bindings expecting MicroPDF-style names).
//!
//! These are thin wrappers that call the actual implementations.

use super::buffer::Buffer;
use super::colorspace::{ColorspaceHandle, FZ_COLORSPACE_RGB};
use super::cookie::{
    fz_cookie_abort, fz_cookie_get_progress, fz_cookie_reset, fz_cookie_should_abort,
};
use super::document::{Document, PAGES, fz_count_pages, fz_load_page};
use super::geometry::fz_matrix;
use super::pixmap::Pixmap;
use super::stext::{
    Point, Quad, Rect, STEXT_PAGES, StextBlockType, StextPage, fz_stext_page_as_text,
};
use super::{BUFFERS, DOCUMENTS, Handle, PIXMAPS};
use std::os::raw::c_char;

/// Find byte pattern in slice, starting from given position
fn find_bytes_from(haystack: &[u8], needle: &[u8], start: usize) -> Option<usize> {
    if needle.is_empty() || start >= haystack.len() || needle.len() > haystack.len() - start {
        return None;
    }
    haystack[start..]
        .windows(needle.len())
        .position(|w| w == needle)
        .map(|pos| start + pos)
}

/// Extract all embedded JPEG (DCTDecode) images from PDF data.
/// For scanned PDFs, images typically appear in page order.
/// Handles both raw bytes and decompressed ObjStm content (PDF v2.0).
fn extract_jpeg_images_from_pdf(data: &[u8]) -> Vec<Vec<u8>> {
    let mut images = extract_jpeg_images_from_raw_bytes(data);

    // If no images found in raw bytes, try ObjStm decompressed content (PDF v2.0)
    if images.is_empty() && crate::enhanced::page_ops::has_object_streams(data) {
        if let Ok(objstm_images) =
            crate::enhanced::object_stream::extract_jpeg_images_from_objstm(data)
        {
            images = objstm_images;
        }
    }

    images
}

/// Extract JPEG images by scanning raw PDF bytes for stream objects.
fn extract_jpeg_images_from_raw_bytes(data: &[u8]) -> Vec<Vec<u8>> {
    let mut images = Vec::new();
    let mut pos = 0;

    while let Some(stream_pos) = find_bytes_from(data, b"stream", pos) {
        // Look at the dictionary before "stream" - must be within ~2KB (typical obj size)
        let dict_start = stream_pos.saturating_sub(2048).max(0);
        let dict_section = &data[dict_start..stream_pos];

        // Check if this is a DCTDecode (JPEG) image object
        let has_subtype_image = find_bytes_from(dict_section, b"/Subtype/Image", 0).is_some()
            || find_bytes_from(dict_section, b"/Subtype /Image", 0).is_some();
        let has_dct = find_bytes_from(dict_section, b"/DCTDecode", 0).is_some()
            || find_bytes_from(dict_section, b"/Filter/DCTDecode", 0).is_some()
            || find_bytes_from(dict_section, b"/Filter /DCTDecode", 0).is_some();

        if has_subtype_image && has_dct {
            // Skip newline(s) after "stream"
            let mut data_start = stream_pos + 6;
            while data_start < data.len()
                && (data[data_start] == b'\r' || data[data_start] == b'\n')
            {
                data_start += 1;
            }

            if let Some(endstream_pos) = find_bytes_from(data, b"endstream", data_start) {
                let jpeg_data = &data[data_start..endstream_pos];
                // Verify it looks like JPEG (starts with SOI marker)
                if jpeg_data.len() >= 2 && jpeg_data[0] == 0xFF && jpeg_data[1] == 0xD8 {
                    images.push(jpeg_data.to_vec());
                }
            }
        }

        pos = stream_pos + 1;
    }

    images
}

/// Render decoded image pixels into pixmap, scaling if necessary
fn blit_image_to_pixmap(pixmap: &mut Pixmap, img_rgb: &[u8], img_width: u32, img_height: u32) {
    let pix_w = pixmap.w() as u32;
    let pix_h = pixmap.h() as u32;
    let n = pixmap.n() as usize;
    let stride = pixmap.stride() as usize;

    if pix_w == 0 || pix_h == 0 || img_width == 0 || img_height == 0 {
        return;
    }

    let samples = pixmap.samples_mut();

    for py in 0..pix_h {
        for px in 0..pix_w {
            // Nearest-neighbor sampling
            let src_x = (px as f64 * img_width as f64 / pix_w as f64) as u32;
            let src_y = (py as f64 * img_height as f64 / pix_h as f64) as u32;
            let src_x = src_x.min(img_width.saturating_sub(1));
            let src_y = src_y.min(img_height.saturating_sub(1));

            let src_offset = (src_y * img_width * 3 + src_x * 3) as usize;
            let dst_offset = (py as usize * stride) + (px as usize * n);

            if src_offset + 3 <= img_rgb.len() && dst_offset + n <= samples.len() {
                samples[dst_offset] = img_rgb[src_offset];
                samples[dst_offset + 1] = img_rgb[src_offset + 1];
                samples[dst_offset + 2] = img_rgb[src_offset + 2];
                if n > 3 {
                    samples[dst_offset + 3] = 255;
                }
            }
        }
    }
}

// ============================================================================
// Cookie Compatibility Aliases
// ============================================================================

/// Alias for fz_cookie_abort (MicroPDF naming convention)
#[unsafe(no_mangle)]
pub extern "C" fn fz_abort_cookie(ctx: Handle, cookie: Handle) {
    fz_cookie_abort(ctx, cookie)
}

/// Alias for fz_cookie_should_abort (MicroPDF naming convention)
#[unsafe(no_mangle)]
pub extern "C" fn fz_cookie_is_aborted(ctx: Handle, cookie: Handle) -> i32 {
    fz_cookie_should_abort(ctx, cookie)
}

/// Alias for fz_cookie_get_progress (MicroPDF naming convention)
#[unsafe(no_mangle)]
pub extern "C" fn fz_cookie_progress(ctx: Handle, cookie: Handle) -> i32 {
    fz_cookie_get_progress(ctx, cookie)
}

/// Alias for fz_cookie_reset (MicroPDF naming convention)
#[unsafe(no_mangle)]
pub extern "C" fn fz_reset_cookie(ctx: Handle, cookie: Handle) {
    fz_cookie_reset(ctx, cookie)
}

// ============================================================================
// Document Compatibility Functions
// ============================================================================

/// Open document from a buffer
///
/// # Safety
/// Caller must ensure magic is a valid null-terminated C string.
#[unsafe(no_mangle)]
pub extern "C" fn fz_open_document_with_buffer(
    _ctx: Handle,
    _magic: *const c_char,
    data: *const u8,
    len: usize,
) -> Handle {
    if data.is_null() || len == 0 {
        return 0;
    }

    // SAFETY: Caller guarantees data points to readable memory of len bytes
    let slice = unsafe { std::slice::from_raw_parts(data, len) };
    let vec = slice.to_vec();

    let doc = Document::new(vec);
    DOCUMENTS.insert(doc)
}

/// Lookup a named destination in PDF
///
/// Searches the PDF /Names /Dests name tree for the given destination name
/// and returns the target page number (0-based), or -1 if not found.
///
/// # Safety
/// Caller must ensure name is a valid null-terminated C string.
#[unsafe(no_mangle)]
pub extern "C" fn pdf_lookup_named_dest(_ctx: Handle, doc: Handle, name: *const c_char) -> i32 {
    if name.is_null() {
        return -1;
    }

    let name_str = match unsafe { std::ffi::CStr::from_ptr(name) }.to_str() {
        Ok(s) => s,
        Err(_) => return -1,
    };

    let doc_arc = match DOCUMENTS.get(doc) {
        Some(d) => d,
        None => return -1,
    };

    let guard = match doc_arc.lock() {
        Ok(g) => g,
        Err(_) => return -1,
    };

    let data = guard.data();
    if data.is_empty() {
        return -1;
    }

    // Search for /Names dictionary containing /Dests
    let data_str = String::from_utf8_lossy(data);

    // Look for /Dests name tree entries: /Names [...(name) [page_ref /Fit]...]
    // or /Dests dictionary: /Dests <</name [page_ref /Fit]>>
    // The name could appear as (name) or <hex_encoded_name>

    // Search for the name in parenthesized form
    let search_paren = format!("({})", name_str);
    if let Some(pos) = data_str.find(&search_paren) {
        // After the name, look for a page reference like "N 0 R"
        let after = &data_str[pos + search_paren.len()..];
        // Skip whitespace and look for array or dict value
        let trimmed = after.trim_start();
        // Could be [N 0 R /Fit] or just N 0 R
        let num_start = if trimmed.starts_with('[') {
            trimmed[1..].trim_start()
        } else {
            trimmed
        };

        // Extract page object number
        if let Some(space_pos) = num_start.find(' ') {
            if let Ok(obj_num) = num_start[..space_pos].parse::<i32>() {
                // Now find which page this object corresponds to
                // Search /Kids array for this object reference
                return find_page_index_for_obj(&data_str, obj_num);
            }
        }
    }

    -1
}

/// Find the 0-based page index for a given object number by scanning /Kids arrays
fn find_page_index_for_obj(data: &str, target_obj: i32) -> i32 {
    // Find /Kids array entries
    if let Some(kids_pos) = data.find("/Kids") {
        let after = &data[kids_pos + 5..];
        if let Some(bracket_pos) = after.find('[') {
            let arr_start = &after[bracket_pos + 1..];
            if let Some(bracket_end) = arr_start.find(']') {
                let arr_content = &arr_start[..bracket_end];
                // Parse "N 0 R" entries
                let mut page_idx = 0i32;
                for part in arr_content.split('R') {
                    let trimmed = part.trim();
                    if trimmed.is_empty() {
                        continue;
                    }
                    // Extract object number from "N 0" or "N 0 "
                    if let Some(space) = trimmed.find(' ') {
                        if let Ok(obj_num) = trimmed[..space].parse::<i32>() {
                            if obj_num == target_obj {
                                return page_idx;
                            }
                            page_idx += 1;
                        }
                    }
                }
            }
        }
    }
    // If we can't resolve the page, return the object number as a rough estimate
    // (many simple PDFs use sequential object numbering starting near page objects)
    -1
}

// ============================================================================
// Text Extraction Compatibility Functions
// ============================================================================

/// Create stext page from a document page
///
/// This extracts text from a PDF page into a structured text page.
#[unsafe(no_mangle)]
pub extern "C" fn fz_new_stext_page_from_page(
    _ctx: Handle,
    page: Handle,
    _options: *const std::ffi::c_void,
) -> Handle {
    // Get page info to determine bounds
    let (page_width, page_height) = if let Some(page_arc) = PAGES.get(page) {
        if let Ok(guard) = page_arc.lock() {
            // bounds is [x0, y0, x1, y1]
            let w = guard.bounds[2] - guard.bounds[0];
            let h = guard.bounds[3] - guard.bounds[1];
            (w, h)
        } else {
            (612.0, 792.0) // Default to letter size
        }
    } else {
        (612.0, 792.0)
    };

    // Create a new stext page with the page bounds
    let mut stext_page = StextPage {
        refs: 1,
        mediabox: Rect {
            x0: 0.0,
            y0: 0.0,
            x1: page_width,
            y1: page_height,
        },
        blocks: Vec::new(),
    };

    // Extract text from the page's content stream by looking up the document
    // data and parsing BT/ET text blocks with Tj/TJ operators
    if let Some(page_arc) = PAGES.get(page) {
        if let Ok(page_guard) = page_arc.lock() {
            let doc_handle = page_guard.doc_handle;
            if let Some(doc_arc) = DOCUMENTS.get(doc_handle) {
                if let Ok(doc_guard) = doc_arc.lock() {
                    let data = doc_guard.data();
                    if !data.is_empty() {
                        extract_text_from_page_data(
                            data,
                            page_guard.page_num as usize,
                            &mut stext_page,
                        );
                    }
                }
            }
        }
    }

    STEXT_PAGES.insert(stext_page)
}

/// Extract text from PDF page data by parsing content streams for text operators.
/// Handles both uncompressed page objects (in raw data) and those inside object streams.
fn extract_text_from_page_data(data: &[u8], page_num: usize, stext: &mut StextPage) {
    use super::stext::{StextBlock, StextChar, StextLine};

    // For PDF v2.0 with ObjStm, try ObjStm path first (pages are in compressed streams)
    let all_text = if crate::enhanced::page_ops::has_object_streams(data) {
        extract_text_objstm_path(data, page_num).or_else(|| extract_text_raw_path(data, page_num))
    } else {
        extract_text_raw_path(data, page_num).or_else(|| extract_text_objstm_path(data, page_num))
    }
    .unwrap_or_default();

    if all_text.is_empty() {
        return;
    }

    let mut lines = Vec::new();
    let mut y_pos = 0.0f32;
    for line_text in all_text.lines() {
        if line_text.trim().is_empty() {
            y_pos += 12.0;
            continue;
        }
        let mut chars = Vec::new();
        let mut x_pos = 0.0f32;
        for ch in line_text.chars() {
            chars.push(StextChar {
                c: ch as i32,
                bidi: 0,
                flags: 0,
                argb: 0xFF000000,
                origin: Point { x: x_pos, y: y_pos },
                quad: Quad {
                    ul_x: x_pos,
                    ul_y: y_pos,
                    ur_x: x_pos + 7.0,
                    ur_y: y_pos,
                    ll_x: x_pos,
                    ll_y: y_pos + 12.0,
                    lr_x: x_pos + 7.0,
                    lr_y: y_pos + 12.0,
                },
                size: 12.0,
                font: None,
            });
            x_pos += 7.0;
        }
        lines.push(StextLine {
            wmode: 0,
            flags: 0,
            dir: Point { x: 1.0, y: 0.0 },
            bbox: Rect {
                x0: 0.0,
                y0: y_pos,
                x1: x_pos,
                y1: y_pos + 12.0,
            },
            chars,
        });
        y_pos += 14.0;
    }
    if !lines.is_empty() {
        let last_y = lines.last().map(|l| l.bbox.y1).unwrap_or(0.0);
        stext.blocks.push(StextBlock {
            block_type: StextBlockType::Text,
            id: 0,
            bbox: Rect {
                x0: 0.0,
                y0: 0.0,
                x1: stext.mediabox.x1,
                y1: last_y,
            },
            lines,
            image: None,
            struct_down: None,
            struct_index: 0,
            text_flags: 0,
        });
    }
}

/// Try extracting text from raw (uncompressed) page objects in the PDF data.
fn extract_text_raw_path(data: &[u8], page_num: usize) -> Option<String> {
    let data_str = String::from_utf8_lossy(data);

    let mut page_count = 0usize;
    let mut search_start = 0;

    while let Some(pos) = data_str[search_start..].find("/Type /Page") {
        let abs_pos = search_start + pos;
        let after = &data_str[abs_pos + 11..];
        if after.starts_with('s') || after.starts_with('S') {
            search_start = abs_pos + 12;
            continue;
        }

        if page_count == page_num {
            let mut obj_region_start = data_str[..abs_pos]
                .rfind(" obj")
                .map(|p| p.saturating_sub(20))
                .unwrap_or(abs_pos.saturating_sub(200));
            obj_region_start = obj_region_start.min(data_str.len());
            while obj_region_start > 0 && !data_str.is_char_boundary(obj_region_start) {
                obj_region_start -= 1;
            }
            let obj_region_end = data_str[abs_pos..]
                .find("endobj")
                .map(|p| abs_pos + p + 6)
                .unwrap_or(data_str.len().min(abs_pos + 2000));
            let obj_region_end = obj_region_end.min(data_str.len());
            let page_obj = &data_str[obj_region_start..obj_region_end];

            if let Some(contents_pos) = page_obj.find("/Contents") {
                let after_contents = &page_obj[contents_pos + 9..];
                let trimmed = after_contents.trim_start();

                let obj_nums = if trimmed.starts_with('[') {
                    let bracket_end = trimmed.find(']').unwrap_or(trimmed.len());
                    parse_obj_refs(&trimmed[1..bracket_end])
                } else if let Some(space) = trimmed.find(' ') {
                    trimmed[..space]
                        .parse::<usize>()
                        .ok()
                        .map_or(vec![], |n| vec![n])
                } else {
                    vec![]
                };

                let mut all_text = String::new();
                for obj_num in obj_nums {
                    if let Some(stream_data) = extract_stream_content(data, obj_num) {
                        let text = extract_text_from_content_stream(&stream_data);
                        if !text.is_empty() {
                            if !all_text.is_empty() {
                                all_text.push(' ');
                            }
                            all_text.push_str(&text);
                        }
                    }
                }

                if !all_text.is_empty() {
                    return Some(all_text);
                }
            }
            return None;
        }

        page_count += 1;
        search_start = abs_pos + 12;
    }
    None
}

/// Try extracting text by decompressing object streams first.
/// Handles PDF v2.0 where page objects live inside ObjStm.
fn extract_text_objstm_path(data: &[u8], page_num: usize) -> Option<String> {
    use crate::enhanced::object_stream::parse_all_object_streams;

    let all_objects = parse_all_object_streams(data).ok()?;
    if all_objects.is_empty() {
        return None;
    }

    // Collect all page objects (those with /Type /Page but not /Type /Pages)
    let mut page_objects: Vec<(i32, &str)> = Vec::new();
    for (obj_num, obj_data) in &all_objects {
        let is_page = (obj_data.contains("/Type /Page") || obj_data.contains("/Type/Page"))
            && !obj_data.contains("/Type /Pages")
            && !obj_data.contains("/Type/Pages");
        if is_page {
            page_objects.push((*obj_num, obj_data.as_str()));
        }
    }

    // Sort by object number (approximates page order in most PDFs)
    page_objects.sort_by_key(|(num, _)| *num);

    // Also try to build ordering from /Kids arrays in /Type /Pages objects
    let mut ordered_page_obj_nums: Vec<i32> = Vec::new();
    for obj_data in all_objects.values() {
        if (obj_data.contains("/Type /Pages") || obj_data.contains("/Type/Pages"))
            && obj_data.contains("/Kids")
        {
            if let Some(kids_start) = obj_data.find("/Kids") {
                let after = &obj_data[kids_start + 5..];
                if let Some(bracket_start) = after.find('[') {
                    if let Some(bracket_end) = after[bracket_start..].find(']') {
                        let kids_str = &after[bracket_start + 1..bracket_start + bracket_end];
                        let parts: Vec<&str> = kids_str.split_whitespace().collect();
                        let mut k = 0;
                        while k + 2 < parts.len() {
                            if parts[k + 2] == "R" {
                                if let Ok(n) = parts[k].parse::<i32>() {
                                    ordered_page_obj_nums.push(n);
                                }
                                k += 3;
                            } else {
                                k += 1;
                            }
                        }
                    }
                }
            }
        }
    }

    // Use /Kids ordering if available, otherwise use sorted object numbers
    let target_obj_num =
        if !ordered_page_obj_nums.is_empty() && page_num < ordered_page_obj_nums.len() {
            ordered_page_obj_nums[page_num]
        } else if page_num < page_objects.len() {
            page_objects[page_num].0
        } else {
            return None;
        };

    // Find the page object data
    let page_obj_data = all_objects
        .iter()
        .find(|(num, _)| **num == target_obj_num)?
        .1
        .as_str();

    // Extract /Contents reference from the page object
    let contents_pos = page_obj_data.find("/Contents")?;
    let after_contents = page_obj_data[contents_pos + 9..].trim_start();

    let obj_nums: Vec<usize> = if after_contents.starts_with('[') {
        let bracket_end = after_contents.find(']').unwrap_or(after_contents.len());
        parse_obj_refs(&after_contents[1..bracket_end])
    } else if let Some(space) = after_contents.find(' ') {
        after_contents[..space]
            .parse::<usize>()
            .ok()
            .map_or(vec![], |n| vec![n])
    } else {
        vec![]
    };

    let mut all_text = String::new();

    for obj_num in obj_nums {
        // Try raw data first (content streams are usually not in object streams)
        if let Some(stream_data) = extract_stream_content(data, obj_num) {
            let text = extract_text_from_content_stream(&stream_data);
            if !text.is_empty() {
                if !all_text.is_empty() {
                    all_text.push(' ');
                }
                all_text.push_str(&text);
            }
        }
    }

    if all_text.is_empty() {
        None
    } else {
        Some(all_text)
    }
}

/// Parse object references like "3 0 R 5 0 R" into vec of object numbers
fn parse_obj_refs(s: &str) -> Vec<usize> {
    let mut refs = Vec::new();
    let parts: Vec<&str> = s.split_whitespace().collect();
    let mut i = 0;
    while i + 2 < parts.len() {
        if parts[i + 2] == "R" {
            if let Ok(n) = parts[i].parse::<usize>() {
                refs.push(n);
            }
            i += 3;
        } else {
            i += 1;
        }
    }
    refs
}

/// Find `needle` in `haystack` (byte-exact).
fn find_bytes_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
    if needle.is_empty() || needle.len() > haystack.len() {
        return None;
    }
    haystack.windows(needle.len()).position(|w| w == needle)
}

/// Extract and decompress a stream object's content.
///
/// Uses **raw PDF bytes only**. Previous versions mixed `String::from_utf8_lossy` positions with
/// `raw_data` indices, which desynchronizes when the file contains invalid UTF-8 (common in PDFs)
/// and can panic or return wrong slices.
fn extract_stream_content(raw_data: &[u8], obj_num: usize) -> Option<Vec<u8>> {
    let obj_marker = format!("{} 0 obj", obj_num);
    let marker_bytes = obj_marker.as_bytes();
    let obj_pos = find_bytes_subslice(raw_data, marker_bytes)?;
    let after_obj = &raw_data[obj_pos + marker_bytes.len()..];

    // Find "stream" keyword (byte search — aligns with raw_data indices)
    let stream_keyword_pos = find_bytes_subslice(after_obj, b"stream")?;
    let stream_start_rel = stream_keyword_pos + b"stream".len();
    // Skip \r\n or \n after "stream"
    let abs_stream_start = obj_pos + marker_bytes.len() + stream_start_rel;
    let mut content_start = abs_stream_start;
    if content_start < raw_data.len() && raw_data[content_start] == b'\r' {
        content_start += 1;
    }
    if content_start < raw_data.len() && raw_data[content_start] == b'\n' {
        content_start += 1;
    }

    // Find "endstream"
    let after_stream_kw = &after_obj[stream_start_rel..];
    let endstream_rel = find_bytes_subslice(after_stream_kw, b"endstream")?;
    let content_end = obj_pos + marker_bytes.len() + stream_start_rel + endstream_rel;
    // Trim trailing whitespace
    let mut actual_end = content_end;
    while actual_end > content_start
        && (raw_data[actual_end - 1] == b'\n' || raw_data[actual_end - 1] == b'\r')
    {
        actual_end -= 1;
    }

    if content_start >= actual_end || content_start >= raw_data.len() {
        return None;
    }

    let stream_bytes = &raw_data[content_start..actual_end.min(raw_data.len())];

    // Check if FlateDecode filter is used (scan object dict bytes only)
    let obj_dict = &after_obj[..stream_keyword_pos];
    if find_bytes_subslice(obj_dict, b"/FlateDecode").is_some()
        || find_bytes_subslice(obj_dict, b"/Fl").is_some()
    {
        // Decompress with flate2
        use flate2::read::ZlibDecoder;
        use std::io::Read;
        let mut decoder = ZlibDecoder::new(stream_bytes);
        let mut decompressed = Vec::new();
        if decoder.read_to_end(&mut decompressed).is_ok() {
            return Some(decompressed);
        }
        // Try raw deflate as fallback
        use flate2::read::DeflateDecoder;
        let mut decoder = DeflateDecoder::new(stream_bytes);
        let mut fallback = Vec::new();
        if decoder.read_to_end(&mut fallback).is_ok() {
            return Some(fallback);
        }
        None
    } else {
        Some(stream_bytes.to_vec())
    }
}

/// Extract text strings from a PDF content stream by parsing text operators
fn extract_text_from_content_stream(content: &[u8]) -> String {
    let content_str = String::from_utf8_lossy(content);
    let mut result = String::new();
    let mut in_text = false;

    for line in content_str.lines() {
        let trimmed = line.trim();

        if trimmed == "BT" {
            in_text = true;
            continue;
        }
        if trimmed == "ET" {
            in_text = false;
            continue;
        }

        if !in_text {
            continue;
        }

        // Handle Tj operator: (text) Tj or <hex> Tj
        if trimmed.ends_with("Tj") {
            if let Some(text) =
                extract_paren_string(trimmed).or_else(|| extract_hex_string(trimmed))
            {
                if !result.is_empty() {
                    result.push(' ');
                }
                result.push_str(&text);
            }
        }

        // Handle TJ operator: [(text) N (text)] TJ
        if trimmed.ends_with("TJ") {
            let texts = extract_tj_array(trimmed);
            for text in texts {
                result.push_str(&text);
            }
        }

        // Handle ' operator (move to next line and show text): (text) '
        if trimmed.ends_with('\'') && trimmed.len() > 1 {
            if let Some(text) = extract_paren_string(trimmed) {
                if !result.is_empty() {
                    result.push('\n');
                }
                result.push_str(&text);
            }
        }

        // Handle Td/TD (text position move) with large Y change as newline hint
        if trimmed.ends_with("Td") || trimmed.ends_with("TD") {
            let parts: Vec<&str> = trimmed.split_whitespace().collect();
            if parts.len() >= 3 {
                if let Ok(ty) = parts[parts.len() - 2].parse::<f32>() {
                    if ty.abs() > 1.0 && !result.is_empty() && !result.ends_with('\n') {
                        result.push('\n');
                    }
                }
            }
        }
    }

    result
}

/// Extract a parenthesized string from a PDF operator line
fn extract_paren_string(s: &str) -> Option<String> {
    let start = s.find('(')?;
    let mut depth = 0i32;
    let mut end = start;
    for (i, ch) in s[start..].chars().enumerate() {
        match ch {
            '(' => depth += 1,
            ')' => {
                depth -= 1;
                if depth == 0 {
                    end = start + i;
                    break;
                }
            }
            _ => {}
        }
    }
    if end > start {
        // Unescape PDF string escapes
        let raw = &s[start + 1..end];
        let mut result = String::new();
        let mut chars = raw.chars();
        while let Some(ch) = chars.next() {
            if ch == '\\' {
                match chars.next() {
                    Some('n') => result.push('\n'),
                    Some('r') => result.push('\r'),
                    Some('t') => result.push('\t'),
                    Some('b') => result.push('\u{08}'),
                    Some('f') => result.push('\u{0C}'),
                    Some('(') => result.push('('),
                    Some(')') => result.push(')'),
                    Some('\\') => result.push('\\'),
                    Some(c) => {
                        result.push('\\');
                        result.push(c);
                    }
                    None => result.push('\\'),
                }
            } else {
                result.push(ch);
            }
        }
        Some(result)
    } else {
        None
    }
}

/// Decode a PDF hex string `<4865...>` to a UTF-8 string.
fn decode_hex_bytes(hex: &str) -> String {
    let hex = hex.trim();
    let mut bytes = Vec::with_capacity(hex.len() / 2);
    let chars: Vec<char> = hex.chars().filter(|c| !c.is_whitespace()).collect();
    let mut i = 0;
    while i + 1 < chars.len() {
        if let Ok(b) = u8::from_str_radix(&format!("{}{}", chars[i], chars[i + 1]), 16) {
            bytes.push(b);
        }
        i += 2;
    }
    // If odd number of hex digits, treat last as "X0"
    if i < chars.len() {
        if let Ok(b) = u8::from_str_radix(&format!("{}0", chars[i]), 16) {
            bytes.push(b);
        }
    }

    // Try UTF-16 BE first (many PDFs use this for CID fonts)
    if bytes.len() >= 2 && bytes.len() % 2 == 0 {
        let has_null_bytes = bytes.iter().step_by(2).any(|&b| b == 0)
            || bytes.iter().skip(1).step_by(2).any(|&b| b == 0);
        if has_null_bytes {
            let u16_vals: Vec<u16> = bytes
                .chunks(2)
                .map(|c| u16::from_be_bytes([c[0], c[1]]))
                .collect();
            if let Ok(s) = String::from_utf16(&u16_vals) {
                if !s.is_empty() {
                    return s;
                }
            }
        }
    }

    String::from_utf8_lossy(&bytes).to_string()
}

/// Extract a hex string from a PDF operator line, e.g. `<48656C6C6F> Tj`
fn extract_hex_string(s: &str) -> Option<String> {
    let start = s.find('<')?;
    // Make sure it's not a dict `<<`
    if s.as_bytes().get(start + 1) == Some(&b'<') {
        return None;
    }
    let end = s[start + 1..].find('>')? + start + 1;
    let hex_content = &s[start + 1..end];
    let decoded = decode_hex_bytes(hex_content);
    if decoded.is_empty() {
        None
    } else {
        Some(decoded)
    }
}

/// Extract text strings from a TJ array like [(Hello) -10 (World)] or [<hex> -10 <hex>]
fn extract_tj_array(s: &str) -> Vec<String> {
    let mut texts = Vec::new();
    let bracket_start = match s.find('[') {
        Some(p) => p + 1,
        None => return texts,
    };
    let bracket_end = s.rfind(']').unwrap_or(s.len());
    let arr = &s[bracket_start..bracket_end];

    let mut i = 0;
    let bytes = arr.as_bytes();
    while i < bytes.len() {
        if bytes[i] == b'(' {
            let mut depth = 1i32;
            let start = i + 1;
            i += 1;
            while i < bytes.len() && depth > 0 {
                if bytes[i] == b'(' && (i == 0 || bytes[i - 1] != b'\\') {
                    depth += 1;
                } else if bytes[i] == b')' && (i == 0 || bytes[i - 1] != b'\\') {
                    depth -= 1;
                }
                if depth > 0 {
                    i += 1;
                }
            }
            if i > start {
                let text = String::from_utf8_lossy(&bytes[start..i]).to_string();
                texts.push(text);
            }
            i += 1;
        } else if bytes[i] == b'<' && bytes.get(i + 1) != Some(&b'<') {
            let start = i + 1;
            i += 1;
            while i < bytes.len() && bytes[i] != b'>' {
                i += 1;
            }
            if i > start {
                let hex = String::from_utf8_lossy(&bytes[start..i]).to_string();
                let decoded = decode_hex_bytes(&hex);
                if !decoded.is_empty() {
                    texts.push(decoded);
                }
            }
            i += 1;
        } else {
            i += 1;
        }
    }

    texts
}

/// Convert stext page to buffer
///
/// Extracts text from stext page and stores in a buffer.
#[unsafe(no_mangle)]
pub extern "C" fn fz_new_buffer_from_stext_page(ctx: Handle, stext: Handle) -> Handle {
    // Get text from stext page
    let text_ptr = fz_stext_page_as_text(ctx, stext);

    if text_ptr.is_null() {
        // Return empty buffer
        let buffer = Buffer::new(0);
        return BUFFERS.insert(buffer);
    }

    // SAFETY: fz_stext_page_as_text returns a valid C string
    let c_str = unsafe { std::ffi::CStr::from_ptr(text_ptr) };
    let text_bytes = c_str.to_bytes();

    // Create buffer with the text data
    let buffer = Buffer::from_data(text_bytes);

    BUFFERS.insert(buffer)
}

// ============================================================================
// Pixmap Compatibility Functions
// ============================================================================

/// Create pixmap from page
///
/// Renders a page to a pixmap with the given transformation and colorspace.
/// For scanned PDFs (single embedded JPEG per page), extracts and decodes
/// the image. Falls back to white background if no image found.
#[unsafe(no_mangle)]
pub extern "C" fn fz_new_pixmap_from_page(
    _ctx: Handle,
    page: Handle,
    ctm: fz_matrix,
    cs: ColorspaceHandle,
    alpha: i32,
) -> Handle {
    // Get page dimensions and document handle
    let (width, height, doc_handle, page_num) = if let Some(page_arc) = PAGES.get(page) {
        if let Ok(guard) = page_arc.lock() {
            let page_w = guard.bounds[2] - guard.bounds[0];
            let page_h = guard.bounds[3] - guard.bounds[1];
            // Apply transformation to get final dimensions
            let w = (page_w * ctm.a.abs() + page_h * ctm.c.abs()).ceil() as i32;
            let h = (page_w * ctm.b.abs() + page_h * ctm.d.abs()).ceil() as i32;
            (w.max(1), h.max(1), guard.doc_handle, guard.page_num)
        } else {
            return 0;
        }
    } else {
        return 0;
    };

    // Use provided colorspace or default to RGB
    let colorspace = if cs != 0 { cs } else { FZ_COLORSPACE_RGB };

    // Create pixmap (initialized white by default)
    let mut pixmap = Pixmap::new(colorspace, width, height, alpha != 0);

    // Try to render embedded JPEG for scanned PDFs (one image per page)
    if let Some(doc_arc) = DOCUMENTS.get(doc_handle) {
        if let Ok(doc_guard) = doc_arc.lock() {
            let pdf_data = doc_guard.data();
            let images = extract_jpeg_images_from_pdf(pdf_data);

            if (page_num as usize) < images.len() {
                if let Ok(decoded) = image::load_from_memory(&images[page_num as usize]) {
                    let rgb = decoded.to_rgb8();
                    blit_image_to_pixmap(&mut pixmap, rgb.as_raw(), rgb.width(), rgb.height());
                }
            }
        }
    }

    PIXMAPS.insert(pixmap)
}

#[cfg(test)]
mod tests {
    use super::super::cookie::fz_new_cookie;
    use super::super::document::fz_drop_page;
    use super::*;

    #[test]
    fn test_cookie_aliases() {
        let cookie = fz_new_cookie(0);
        assert_ne!(cookie, 0);

        // Test aliases work
        assert_eq!(fz_cookie_is_aborted(0, cookie), 0);
        assert_eq!(fz_cookie_progress(0, cookie), 0);

        fz_abort_cookie(0, cookie);
        assert_eq!(fz_cookie_is_aborted(0, cookie), 1);

        fz_reset_cookie(0, cookie);
        assert_eq!(fz_cookie_is_aborted(0, cookie), 0);
    }

    #[test]
    fn test_open_document_with_buffer() {
        // Minimal valid PDF
        let pdf_data = b"%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n2 0 obj<</Type/Pages/Count 1/Kids[3 0 R]>>endobj\n3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R>>endobj\nxref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n0000000052 00000 n \n0000000101 00000 n \ntrailer<</Size 4/Root 1 0 R>>\nstartxref\n178\n%%EOF";

        let doc =
            fz_open_document_with_buffer(0, std::ptr::null(), pdf_data.as_ptr(), pdf_data.len());
        assert_ne!(doc, 0);

        let page_count = fz_count_pages(0, doc);
        assert!(page_count >= 1);
    }

    #[test]
    fn test_new_stext_page_from_page() {
        // Create a document and page first
        let pdf_data = b"%PDF-1.4\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj\n2 0 obj<</Type/Pages/Count 1/Kids[3 0 R]>>endobj\n3 0 obj<</Type/Page/MediaBox[0 0 612 792]/Parent 2 0 R>>endobj\nxref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n0000000052 00000 n \n0000000101 00000 n \ntrailer<</Size 4/Root 1 0 R>>\nstartxref\n178\n%%EOF";

        let doc =
            fz_open_document_with_buffer(0, std::ptr::null(), pdf_data.as_ptr(), pdf_data.len());
        let page = fz_load_page(0, doc, 0);

        let stext = fz_new_stext_page_from_page(0, page, std::ptr::null());
        assert_ne!(stext, 0);
    }

    #[test]
    fn test_new_buffer_from_stext_page() {
        use super::super::stext::fz_new_stext_page;

        let stext = fz_new_stext_page(0, 0.0, 0.0, 612.0, 792.0);
        assert_ne!(stext, 0);

        let buf = fz_new_buffer_from_stext_page(0, stext);
        assert_ne!(buf, 0);
    }

    #[test]
    fn test_open_document_with_buffer_null_data() {
        let doc = fz_open_document_with_buffer(0, std::ptr::null(), std::ptr::null(), 100);
        assert_eq!(doc, 0);
    }

    #[test]
    fn test_open_document_with_buffer_zero_len() {
        let data = b"%PDF-1.4";
        let doc = fz_open_document_with_buffer(0, std::ptr::null(), data.as_ptr(), 0);
        assert_eq!(doc, 0);
    }

    #[test]
    fn test_pdf_lookup_named_dest_null_name() {
        let pdf_data = b"%PDF-1.4\n%%EOF";
        let doc =
            fz_open_document_with_buffer(0, std::ptr::null(), pdf_data.as_ptr(), pdf_data.len());
        if doc != 0 {
            let page = pdf_lookup_named_dest(0, doc, std::ptr::null());
            assert_eq!(page, -1);
            crate::ffi::DOCUMENTS.remove(doc);
        }
    }

    #[test]
    fn test_pdf_lookup_named_dest_invalid_doc() {
        let name = std::ffi::CString::new("dest").unwrap();
        assert_eq!(pdf_lookup_named_dest(0, 99999, name.as_ptr()), -1);
    }

    #[test]
    fn test_new_stext_page_from_page_invalid() {
        let stext = fz_new_stext_page_from_page(0, 0, std::ptr::null());
        assert_ne!(stext, 0);
    }

    #[test]
    fn test_find_bytes_from() {
        let data = b"hello world";
        assert_eq!(find_bytes_from(data, b"world", 0), Some(6));
        assert_eq!(find_bytes_from(data, b"world", 10), None);
        assert!(find_bytes_from(data, b"xyz", 0).is_none());
    }

    #[test]
    fn test_find_bytes_from_empty_needle() {
        assert!(find_bytes_from(b"hello", b"", 0).is_none());
    }

    #[test]
    fn test_find_bytes_from_start_past_len() {
        assert!(find_bytes_from(b"ab", b"cd", 5).is_none());
    }

    #[test]
    fn test_new_buffer_from_stext_page_empty() {
        use crate::ffi::stext::fz_new_stext_page;
        let stext = fz_new_stext_page(0, 0.0, 0.0, 612.0, 792.0);
        let buf = fz_new_buffer_from_stext_page(0, stext);
        assert_ne!(buf, 0);
        crate::ffi::BUFFERS.remove(buf);
    }

    #[test]
    fn test_pdf_lookup_named_dest_found() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Count 1 /Kids [3 0 R] >> endobj
3 0 obj << /Type /Page /MediaBox [0 0 612 792] >> endobj
4 0 obj << /Names << /Dests << /MyDest [3 0 R /XYZ 0 0 null] >> >> >> endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000127 00000 n
0000000200 00000 n
trailer << /Size 5 /Root 1 0 R /Names 4 0 R >>
startxref
273
%%EOF";
        let doc =
            fz_open_document_with_buffer(0, std::ptr::null(), pdf_data.as_ptr(), pdf_data.len());
        if doc != 0 {
            let name = std::ffi::CString::new("MyDest").unwrap();
            let page = pdf_lookup_named_dest(0, doc, name.as_ptr());
            crate::ffi::DOCUMENTS.remove(doc);
            assert!(page >= -1);
        }
    }

    #[test]
    fn test_pdf_lookup_named_dest_not_found() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Count 1 /Kids [3 0 R] >> endobj
3 0 obj << /Type /Page /MediaBox [0 0 612 792] >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
188
%%EOF";
        let doc =
            fz_open_document_with_buffer(0, std::ptr::null(), pdf_data.as_ptr(), pdf_data.len());
        if doc != 0 {
            let name = std::ffi::CString::new("NonExistent").unwrap();
            let page = pdf_lookup_named_dest(0, doc, name.as_ptr());
            crate::ffi::DOCUMENTS.remove(doc);
            assert_eq!(page, -1);
        }
    }

    #[test]
    fn test_pdf_lookup_named_dest_invalid_utf8() {
        let pdf_data = b"%PDF-1.4\n%%EOF";
        let doc =
            fz_open_document_with_buffer(0, std::ptr::null(), pdf_data.as_ptr(), pdf_data.len());
        if doc != 0 {
            let invalid =
                unsafe { std::ffi::CStr::from_bytes_with_nul_unchecked(&[0xc3, 0x28, 0]) };
            let page = pdf_lookup_named_dest(0, doc, invalid.as_ptr());
            crate::ffi::DOCUMENTS.remove(doc);
            assert_eq!(page, -1);
        }
    }

    #[test]
    fn test_pdf_lookup_named_dest_empty_doc() {
        let pdf_data = b"%PDF-1.4\n%%EOF";
        let doc =
            fz_open_document_with_buffer(0, std::ptr::null(), pdf_data.as_ptr(), pdf_data.len());
        if doc != 0 {
            let name = std::ffi::CString::new("X").unwrap();
            let page = pdf_lookup_named_dest(0, doc, name.as_ptr());
            crate::ffi::DOCUMENTS.remove(doc);
            assert_eq!(page, -1);
        }
    }

    #[test]
    fn test_pdf_lookup_named_dest_lock_fail() {
        let name = std::ffi::CString::new("dest").unwrap();
        assert_eq!(pdf_lookup_named_dest(0, 99999, name.as_ptr()), -1);
    }

    #[test]
    fn test_new_pixmap_from_page() {
        let pdf_data = b"%PDF-1.4
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
2 0 obj << /Type /Pages /Count 1 /Kids [3 0 R] >> endobj
3 0 obj << /Type /Page /MediaBox [0 0 612 792] >> endobj
xref
0 4
0000000000 65535 f
0000000009 00000 n
0000000058 00000 n
0000000115 00000 n
trailer << /Size 4 /Root 1 0 R >>
startxref
188
%%EOF";
        let doc =
            fz_open_document_with_buffer(0, std::ptr::null(), pdf_data.as_ptr(), pdf_data.len());
        if doc != 0 {
            let page = fz_load_page(0, doc, 0);
            if page != 0 {
                let ctm = super::super::geometry::fz_matrix::identity();
                let pixmap = fz_new_pixmap_from_page(0, page, ctm, 0, 0);
                fz_drop_page(0, page);
                if pixmap != 0 {
                    super::super::PIXMAPS.remove(pixmap);
                }
            }
            crate::ffi::DOCUMENTS.remove(doc);
        }
    }

    #[test]
    fn test_new_stext_page_from_page_no_page_arc() {
        let stext = fz_new_stext_page_from_page(0, 0, std::ptr::null());
        assert_ne!(stext, 0);
    }
}