pdfium-render 0.7.27

A high-level idiomatic Rust wrapper around Pdfium, the C++ PDF library used by the Google Chromium project.
Documentation
//! Defines the [PdfPageTextObject] struct, exposing functionality related to a single
//! page object defining a piece of formatted text.

use crate::bindgen::{
    FPDF_ANNOTATION, FPDF_DOCUMENT, FPDF_FONT, FPDF_PAGE, FPDF_PAGEOBJECT, FPDF_TEXT_RENDERMODE,
    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP, FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP,
    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE,
    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE,
    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP,
    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN, FPDF_WCHAR,
};
use crate::bindings::PdfiumLibraryBindings;
use crate::document::PdfDocument;
use crate::error::{PdfiumError, PdfiumInternalError};
use crate::font::PdfFont;
use crate::page::PdfPoints;
use crate::page_object::PdfPageObjectCommon;
use crate::page_object_private::internal::PdfPageObjectPrivate;
use crate::page_text::PdfPageText;
use crate::page_text_chars::PdfPageTextChars;
use crate::utils::mem::create_byte_buffer;
use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;

/// The text rendering modes supported by the PDF standard, as listed in table 5.3
/// on page 402 in the PDF Reference manual version 1.7.
#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
pub enum PdfPageTextRenderMode {
    /// The text render mode is not recognized by Pdfium.
    Unknown = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN as isize,

    /// The text will be filled, but not stroked.
    FilledUnstroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL as isize,

    /// The text will be stroked, but not filled.
    StrokedUnfilled = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE as isize,

    /// The text will be filled, then stroked.
    FilledThenStroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE as isize,

    /// The text will be neither filled nor stroked. It will still take up size in the layout, however.
    Invisible = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE as isize,

    /// The text will be filled and added to the path for clipping.
    FilledUnstrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP as isize,

    /// The text will be stroked and added to the path for clipping.
    StrokedUnfilledClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP as isize,

    /// The text will be filled, then stroked, and added to the path for clipping.
    FilledThenStrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP as isize,

    /// The text will be neither filled nor stroked, only added to the path for clipping.
    InvisibleClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP as isize,
}

impl PdfPageTextRenderMode {
    #[inline]
    pub(crate) fn from_pdfium(value: u32) -> Result<PdfPageTextRenderMode, PdfiumError> {
        match value as i32 {
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN => Ok(PdfPageTextRenderMode::Unknown),
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL => {
                Ok(PdfPageTextRenderMode::FilledUnstroked)
            }
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE => {
                Ok(PdfPageTextRenderMode::StrokedUnfilled)
            }
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE => {
                Ok(PdfPageTextRenderMode::FilledThenStroked)
            }
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE => {
                Ok(PdfPageTextRenderMode::Invisible)
            }
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP => {
                Ok(PdfPageTextRenderMode::FilledUnstrokedClipping)
            }
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP => {
                Ok(PdfPageTextRenderMode::StrokedUnfilledClipping)
            }
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP => {
                Ok(PdfPageTextRenderMode::FilledThenStrokedClipping)
            }
            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP => {
                Ok(PdfPageTextRenderMode::InvisibleClipping)
            }
            _ => Err(PdfiumError::UnknownPdfPageTextRenderMode),
        }
    }

    #[inline]
    #[allow(dead_code)]
    // The as_pdfium() function is not currently used, but we expect it to be in future
    pub(crate) fn as_pdfium(&self) -> FPDF_TEXT_RENDERMODE {
        match self {
            PdfPageTextRenderMode::Unknown => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN,
            PdfPageTextRenderMode::FilledUnstroked => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
            PdfPageTextRenderMode::StrokedUnfilled => {
                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE
            }
            PdfPageTextRenderMode::FilledThenStroked => {
                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE
            }
            PdfPageTextRenderMode::Invisible => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
            PdfPageTextRenderMode::FilledUnstrokedClipping => {
                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP
            }
            PdfPageTextRenderMode::StrokedUnfilledClipping => {
                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP
            }
            PdfPageTextRenderMode::FilledThenStrokedClipping => {
                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP
            }
            PdfPageTextRenderMode::InvisibleClipping => {
                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP
            }
        }
    }
}

/// A single `PdfPageObject` of type `PdfPageObjectType::Text`. The page object defines a single
/// piece of formatted text.
///
/// Page objects can be created either attached to a `PdfPage` (in which case the page object's
/// memory is owned by the containing page) or detached from any page (in which case the page
/// object's memory is owned by the object). Page objects are not rendered until they are
/// attached to a page; page objects that are never attached to a page will be lost when they
/// fall out of scope.
///
/// The simplest way to create a page text object that is immediately attached to a page
/// is to call the `PdfPageObjects::create_text_object()` function.
///
/// Creating a detached page text object offers more scope for customization, but you must
/// add the object to a containing `PdfPage` manually. To create a detached page text object,
/// use the [PdfPageTextObject::new()] function. The detached page text object can later
/// be attached to a page by using the `PdfPageObjects::add_text_object()` function.
pub struct PdfPageTextObject<'a> {
    object_handle: FPDF_PAGEOBJECT,
    page_handle: Option<FPDF_PAGE>,
    annotation_handle: Option<FPDF_ANNOTATION>,
    bindings: &'a dyn PdfiumLibraryBindings,
}

impl<'a> PdfPageTextObject<'a> {
    #[inline]
    pub(crate) fn from_pdfium(
        object_handle: FPDF_PAGEOBJECT,
        page_handle: Option<FPDF_PAGE>,
        annotation_handle: Option<FPDF_ANNOTATION>,
        bindings: &'a dyn PdfiumLibraryBindings,
    ) -> Self {
        PdfPageTextObject {
            object_handle,
            page_handle,
            annotation_handle,
            bindings,
        }
    }

    /// Creates a new [PdfPageTextObject] from the given arguments. The returned page object
    /// will not be rendered until it is added to a `PdfPage` using the
    /// `PdfPageObjects::add_text_object()` function.
    ///
    /// A single space will be used if the given text is empty, in order to avoid
    /// unexpected behaviour from Pdfium when dealing with empty strings.
    // Specifically, FPDFPageObj_SetText() will crash if we try to apply an empty string to a
    // text object, and FPDFText_LoadPage() will crash if any text object on the page contains
    // an empty string (so it isn't enough to avoid calling FPDFPageObj_SetText() for an empty
    // text object, we _have_ to set a non-empty string to avoid segfaults).
    #[inline]
    pub fn new(
        document: &PdfDocument<'a>,
        text: impl ToString,
        font: &PdfFont,
        font_size: PdfPoints,
    ) -> Result<Self, PdfiumError> {
        Self::new_from_handles(
            *document.handle(),
            text,
            *font.handle(),
            font_size,
            document.bindings(),
        )
    }

    // Take raw FPDF_DOCUMENT and FPDF_FONT handles to avoid cascading lifetime problems
    // associated with borrowing PdfDocument<'a> and/or PdfFont<'a>.
    pub(crate) fn new_from_handles(
        document: FPDF_DOCUMENT,
        text: impl ToString,
        font: FPDF_FONT,
        font_size: PdfPoints,
        bindings: &'a dyn PdfiumLibraryBindings,
    ) -> Result<Self, PdfiumError> {
        let handle = bindings.FPDFPageObj_CreateTextObj(document, font, font_size.value);

        if handle.is_null() {
            if let Some(error) = bindings.get_pdfium_last_error() {
                Err(PdfiumError::PdfiumLibraryInternalError(error))
            } else {
                // This would be an unusual situation; a null handle indicating failure,
                // yet Pdfium's error code indicates success.

                Err(PdfiumError::PdfiumLibraryInternalError(
                    PdfiumInternalError::Unknown,
                ))
            }
        } else {
            let mut result = PdfPageTextObject {
                object_handle: handle,
                page_handle: None,
                annotation_handle: None,
                bindings,
            };

            result.set_text(text)?;

            Ok(result)
        }
    }

    /// Returns the text rendering mode for the text contained within this [PdfPageTextObject].
    pub fn render_mode(&self) -> PdfPageTextRenderMode {
        PdfPageTextRenderMode::from_pdfium(
            self.bindings()
                .FPDFTextObj_GetTextRenderMode(self.object_handle) as u32,
        )
        .unwrap_or(PdfPageTextRenderMode::Unknown)
    }

    /// Returns the effective size of the text when rendered, taking into account both the
    /// font size specified in this text object as well as any vertical scale factor applied
    /// to the text object's transformation matrix.
    ///
    /// To retrieve only the specified font size, ignoring any vertical scaling, use the
    /// [PdfPageTextObject::unscaled_font_size()] function.
    #[inline]
    pub fn scaled_font_size(&self) -> PdfPoints {
        PdfPoints::new(self.unscaled_font_size().value * (self.get_vertical_scale() as f32))
    }

    /// Returns the font size of the text specified in this [PdfPageTextObject].
    ///
    /// Note that the effective size of the text when rendered may differ from the font size
    /// if a scaling factor has been applied to this text object's transformation matrix.
    /// To retrieve the effective font size, taking vertical scaling into account, use the
    /// [PdfPageTextObject::scaled_font_size()] function.
    pub fn unscaled_font_size(&self) -> PdfPoints {
        let mut result = 0.0;

        if self.bindings().is_true(
            self.bindings()
                .FPDFTextObj_GetFontSize(self.object_handle, &mut result),
        ) {
            PdfPoints::new(result)
        } else {
            PdfPoints::ZERO
        }
    }

    /// Returns the [PdfFont] used to render the text contained within this [PdfPageTextObject].
    pub fn font(&self) -> PdfFont {
        PdfFont::from_pdfium(
            self.bindings().FPDFTextObj_GetFont(self.object_handle),
            self.bindings(),
        )
    }

    /// Returns the text contained within this [PdfPageTextObject].
    ///
    /// Text retrieval in Pdfium is handled by the [PdfPageText] object owned by the `PdfPage`
    /// containing this [PdfPageTextObject]. If this text object has not been attached to a page
    /// then text retrieval will be unavailable and an empty string will be returned.
    ///
    /// When retrieving the text from many [PdfPageTextObject] objects (for instance, as part of
    /// a loop or an iterator), it may be faster to open the [PdfPageText] object once and keep
    /// it open while processing the text objects, like so:
    ///
    /// ```
    /// let text_page = page.text()?; // Opens the text page once.
    ///
    /// for object in <some object iterator> {
    ///     let object_text = text_page.for_object(object)?;
    /// }
    /// ```
    ///
    /// The [PdfPageText] object will be closed when the binding to it (`text_page` in the example above)
    /// falls out of scope.
    pub fn text(&self) -> String {
        // Retrieving the text from Pdfium is a two-step operation. First, we call
        // FPDFTextObj_GetText() with a null buffer; this will retrieve the length of
        // the text in bytes. If the length is zero, then there is no text associated
        // with the page object.

        // If the length is non-zero, then we reserve a byte buffer of the given
        // length and call FPDFTextObj_GetText() again with a pointer to the buffer;
        // this will write the text to the buffer in UTF16-LE format.

        if let Some(page_handle) = self.page_handle {
            let text_handle = self.bindings.FPDFText_LoadPage(page_handle);

            if !text_handle.is_null() {
                let buffer_length = self.bindings().FPDFTextObj_GetText(
                    self.object_handle,
                    text_handle,
                    std::ptr::null_mut(),
                    0,
                );

                if buffer_length == 0 {
                    // There is no text.

                    return String::new();
                }

                let mut buffer = create_byte_buffer(buffer_length as usize);

                let result = self.bindings().FPDFTextObj_GetText(
                    self.object_handle,
                    text_handle,
                    buffer.as_mut_ptr() as *mut FPDF_WCHAR,
                    buffer_length,
                );

                assert_eq!(result, buffer_length);

                self.bindings.FPDFText_ClosePage(text_handle);

                get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
            } else {
                // The PdfPage containing this page object does not have an associated
                // FPDF_TEXTPAGE object.

                String::new()
            }
        } else {
            // This page object is not contained by a PdfPage.

            String::new()
        }
    }

    /// Sets the text contained within this [PdfPageTextObject], replacing any existing text.
    ///
    /// A single space will be used if the given text is empty, in order to avoid
    /// unexpected behaviour from Pdfium when dealing with an empty string.
    pub fn set_text(&mut self, text: impl ToString) -> Result<(), PdfiumError> {
        let text = text.to_string();

        let text = if text.is_empty() { " " } else { text.as_str() };

        if self.bindings().is_true(
            self.bindings()
                .FPDFText_SetText_str(self.object_handle, text),
        ) {
            Ok(())
        } else {
            Err(PdfiumError::PdfiumLibraryInternalError(
                self.bindings()
                    .get_pdfium_last_error()
                    .unwrap_or(PdfiumInternalError::Unknown),
            ))
        }
    }

    /// Sets the text rendering mode for the text contained within this [PdfPageTextObject].
    pub fn set_render_mode(
        &mut self,
        render_mode: PdfPageTextRenderMode,
    ) -> Result<(), PdfiumError> {
        if self.bindings().is_true(
            self.bindings()
                .FPDFTextObj_SetTextRenderMode(self.object_handle, render_mode.as_pdfium()),
        ) {
            Ok(())
        } else {
            Err(PdfiumError::PdfiumLibraryInternalError(
                self.bindings()
                    .get_pdfium_last_error()
                    .unwrap_or(PdfiumInternalError::Unknown),
            ))
        }
    }

    /// Returns a collection of the characters contained within this [PdfPageTextObject],
    /// using character retrieval functionality provided by the given [PdfPageText] object.
    #[inline]
    pub fn chars(&self, text: &'a PdfPageText<'a>) -> Result<PdfPageTextChars<'a>, PdfiumError> {
        text.chars_for_object(self)
    }

    /// Returns `true` if any of the characters contained within this [PdfPageTextObject] have a
    /// glyph shape that descends below the font baseline.
    ///
    /// Character retrieval functionality is provided by the given [PdfPageText] object.
    #[inline]
    pub fn has_descenders(&self, text: &PdfPageText) -> Result<bool, PdfiumError> {
        self.chars(text)
            .map(|chars| chars.iter().any(|char| char.has_descender()))
    }

    /// Returns the descent of this [PdfPageTextObject]. The descent is the maximum distance below
    /// the baseline reached by any glyph in any of the characters contained in this text object,
    /// expressed as a negative points value.
    ///
    /// Character retrieval and bounds measurement is provided by the given [PdfPageText] object.
    pub fn descent(&self, text: &PdfPageText) -> Result<PdfPoints, PdfiumError> {
        let object_bottom = self.get_vertical_translation();

        let mut maximum_descent = object_bottom;

        for char in self.chars(text)?.iter() {
            let char_bottom = char.tight_bounds()?.bottom;

            if char_bottom < maximum_descent {
                maximum_descent = char_bottom;
            }
        }

        Ok(maximum_descent - object_bottom)
    }
}

impl<'a> PdfPageObjectPrivate<'a> for PdfPageTextObject<'a> {
    #[inline]
    fn get_object_handle(&self) -> &FPDF_PAGEOBJECT {
        &self.object_handle
    }

    #[inline]
    fn get_page_handle(&self) -> &Option<FPDF_PAGE> {
        &self.page_handle
    }

    #[inline]
    fn set_page_handle(&mut self, page: FPDF_PAGE) {
        self.page_handle = Some(page);
    }

    #[inline]
    fn clear_page_handle(&mut self) {
        self.page_handle = None;
    }

    #[inline]
    fn get_annotation_handle(&self) -> &Option<FPDF_ANNOTATION> {
        &self.annotation_handle
    }

    #[inline]
    fn set_annotation_handle(&mut self, annotation: FPDF_ANNOTATION) {
        self.annotation_handle = Some(annotation);
    }

    #[inline]
    fn clear_annotation_handle(&mut self) {
        self.annotation_handle = None;
    }

    #[inline]
    fn bindings(&self) -> &dyn PdfiumLibraryBindings {
        self.bindings
    }
}