pdfium_render/pdf/document/page/object/
text.rs

1//! Defines the [PdfPageTextObject] struct, exposing functionality related to a single
2//! page object defining a piece of formatted text.
3
4use crate::bindgen::{
5    FPDF_DOCUMENT, FPDF_FONT, FPDF_PAGEOBJECT, FPDF_TEXT_RENDERMODE,
6    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP, FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
7    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP,
8    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE,
9    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
10    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
11    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE,
12    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP,
13    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN, FPDF_WCHAR,
14};
15use crate::bindings::PdfiumLibraryBindings;
16use crate::error::{PdfiumError, PdfiumInternalError};
17use crate::pdf::document::fonts::ToPdfFontToken;
18use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
19use crate::pdf::document::page::object::{
20    PdfPageObject, PdfPageObjectCommon, PdfPageObjectOwnership,
21};
22use crate::pdf::document::page::text::chars::PdfPageTextChars;
23use crate::pdf::document::page::text::PdfPageText;
24use crate::pdf::document::PdfDocument;
25use crate::pdf::font::PdfFont;
26use crate::pdf::matrix::{PdfMatrix, PdfMatrixValue};
27use crate::pdf::points::PdfPoints;
28use crate::utils::mem::create_byte_buffer;
29use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;
30use crate::{create_transform_getters, create_transform_setters};
31
32/// The text rendering modes supported by the PDF standard, as listed in table 5.3
33/// on page 402 in the PDF Reference manual version 1.7.
34#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
35pub enum PdfPageTextRenderMode {
36    /// The text render mode is not recognized by Pdfium.
37    Unknown = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN as isize,
38
39    /// The text will be filled, but not stroked.
40    FilledUnstroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL as isize,
41
42    /// The text will be stroked, but not filled.
43    StrokedUnfilled = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE as isize,
44
45    /// The text will be filled, then stroked.
46    FilledThenStroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE as isize,
47
48    /// The text will be neither filled nor stroked. It will still take up size in the layout, however.
49    Invisible = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE as isize,
50
51    /// The text will be filled and added to the path for clipping.
52    FilledUnstrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP as isize,
53
54    /// The text will be stroked and added to the path for clipping.
55    StrokedUnfilledClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP as isize,
56
57    /// The text will be filled, then stroked, and added to the path for clipping.
58    FilledThenStrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP as isize,
59
60    /// The text will be neither filled nor stroked, only added to the path for clipping.
61    InvisibleClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP as isize,
62}
63
64impl PdfPageTextRenderMode {
65    #[inline]
66    pub(crate) fn from_pdfium(value: u32) -> Result<PdfPageTextRenderMode, PdfiumError> {
67        match value as i32 {
68            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN => Ok(PdfPageTextRenderMode::Unknown),
69            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL => {
70                Ok(PdfPageTextRenderMode::FilledUnstroked)
71            }
72            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE => {
73                Ok(PdfPageTextRenderMode::StrokedUnfilled)
74            }
75            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE => {
76                Ok(PdfPageTextRenderMode::FilledThenStroked)
77            }
78            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE => {
79                Ok(PdfPageTextRenderMode::Invisible)
80            }
81            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP => {
82                Ok(PdfPageTextRenderMode::FilledUnstrokedClipping)
83            }
84            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP => {
85                Ok(PdfPageTextRenderMode::StrokedUnfilledClipping)
86            }
87            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP => {
88                Ok(PdfPageTextRenderMode::FilledThenStrokedClipping)
89            }
90            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP => {
91                Ok(PdfPageTextRenderMode::InvisibleClipping)
92            }
93            _ => Err(PdfiumError::UnknownPdfPageTextRenderMode),
94        }
95    }
96
97    #[inline]
98    #[allow(dead_code)]
99    // The as_pdfium() function is not currently used, but we expect it to be in future
100    pub(crate) fn as_pdfium(&self) -> FPDF_TEXT_RENDERMODE {
101        match self {
102            PdfPageTextRenderMode::Unknown => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN,
103            PdfPageTextRenderMode::FilledUnstroked => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
104            PdfPageTextRenderMode::StrokedUnfilled => {
105                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE
106            }
107            PdfPageTextRenderMode::FilledThenStroked => {
108                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE
109            }
110            PdfPageTextRenderMode::Invisible => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
111            PdfPageTextRenderMode::FilledUnstrokedClipping => {
112                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP
113            }
114            PdfPageTextRenderMode::StrokedUnfilledClipping => {
115                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP
116            }
117            PdfPageTextRenderMode::FilledThenStrokedClipping => {
118                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP
119            }
120            PdfPageTextRenderMode::InvisibleClipping => {
121                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP
122            }
123        }
124    }
125}
126
127/// A single `PdfPageObject` of type `PdfPageObjectType::Text`. The page object defines a single
128/// piece of formatted text.
129///
130/// Page objects can be created either attached to a `PdfPage` (in which case the page object's
131/// memory is owned by the containing page) or detached from any page (in which case the page
132/// object's memory is owned by the object). Page objects are not rendered until they are
133/// attached to a page; page objects that are never attached to a page will be lost when they
134/// fall out of scope.
135///
136/// The simplest way to create a page text object that is immediately attached to a page
137/// is to call the `PdfPageObjects::create_text_object()` function.
138///
139/// Creating a detached page text object offers more scope for customization, but you must
140/// add the object to a containing `PdfPage` manually. To create a detached page text object,
141/// use the [PdfPageTextObject::new()] function. The detached page text object can later
142/// be attached to a page by using the `PdfPageObjects::add_text_object()` function.
143pub struct PdfPageTextObject<'a> {
144    object_handle: FPDF_PAGEOBJECT,
145    ownership: PdfPageObjectOwnership,
146    bindings: &'a dyn PdfiumLibraryBindings,
147}
148
149impl<'a> PdfPageTextObject<'a> {
150    #[inline]
151    pub(crate) fn from_pdfium(
152        object_handle: FPDF_PAGEOBJECT,
153        ownership: PdfPageObjectOwnership,
154        bindings: &'a dyn PdfiumLibraryBindings,
155    ) -> Self {
156        PdfPageTextObject {
157            object_handle,
158            ownership,
159            bindings,
160        }
161    }
162
163    /// Creates a new [PdfPageTextObject] from the given arguments. The returned page object
164    /// will not be rendered until it is added to a `PdfPage` using the
165    /// `PdfPageObjects::add_text_object()` function.
166    ///
167    /// A single space will be used if the given text is empty, in order to avoid
168    /// unexpected behaviour from Pdfium when dealing with empty strings.
169    // Specifically, `FPDFPageObj_SetText()` will crash if we try to apply an empty string to a
170    // text object, and `FPDFText_LoadPage()` will crash if any text object on the page contains
171    // an empty string (so it isn't enough to avoid calling `FPDFPageObj_SetText()` for an empty
172    // text object, we _have_ to set a non-empty string to avoid segfaults).
173    #[inline]
174    pub fn new(
175        document: &PdfDocument<'a>,
176        text: impl ToString,
177        font: impl ToPdfFontToken,
178        font_size: PdfPoints,
179    ) -> Result<Self, PdfiumError> {
180        Self::new_from_handles(
181            document.handle(),
182            text,
183            font.token().handle(),
184            font_size,
185            document.bindings(),
186        )
187    }
188
189    // Take raw FPDF_DOCUMENT and FPDF_FONT handles to avoid cascading lifetime problems
190    // associated with borrowing PdfDocument<'a> and/or PdfFont<'a>.
191    pub(crate) fn new_from_handles(
192        document: FPDF_DOCUMENT,
193        text: impl ToString,
194        font: FPDF_FONT,
195        font_size: PdfPoints,
196        bindings: &'a dyn PdfiumLibraryBindings,
197    ) -> Result<Self, PdfiumError> {
198        let handle = bindings.FPDFPageObj_CreateTextObj(document, font, font_size.value);
199
200        if handle.is_null() {
201            Err(PdfiumError::PdfiumLibraryInternalError(
202                PdfiumInternalError::Unknown,
203            ))
204        } else {
205            let mut result = PdfPageTextObject {
206                object_handle: handle,
207                ownership: PdfPageObjectOwnership::unowned(),
208                bindings,
209            };
210
211            result.set_text(text)?;
212
213            Ok(result)
214        }
215    }
216
217    /// Returns the text rendering mode for the text contained within this [PdfPageTextObject].
218    pub fn render_mode(&self) -> PdfPageTextRenderMode {
219        PdfPageTextRenderMode::from_pdfium(
220            self.bindings()
221                .FPDFTextObj_GetTextRenderMode(self.object_handle) as u32,
222        )
223        .unwrap_or(PdfPageTextRenderMode::Unknown)
224    }
225
226    /// Returns the effective size of the text when rendered, taking into account both the
227    /// font size specified in this text object as well as any vertical scale factor applied
228    /// to the text object's transformation matrix.
229    ///
230    /// To retrieve only the specified font size, ignoring any vertical scaling, use the
231    /// [PdfPageTextObject::unscaled_font_size()] function.
232    #[inline]
233    pub fn scaled_font_size(&self) -> PdfPoints {
234        PdfPoints::new(self.unscaled_font_size().value * self.get_vertical_scale())
235    }
236
237    /// Returns the font size of the text specified in this [PdfPageTextObject].
238    ///
239    /// Note that the effective size of the text when rendered may differ from the font size
240    /// if a scaling factor has been applied to this text object's transformation matrix.
241    /// To retrieve the effective font size, taking vertical scaling into account, use the
242    /// [PdfPageTextObject::scaled_font_size()] function.
243    pub fn unscaled_font_size(&self) -> PdfPoints {
244        let mut result = 0.0;
245
246        if self.bindings().is_true(
247            self.bindings()
248                .FPDFTextObj_GetFontSize(self.object_handle, &mut result),
249        ) {
250            PdfPoints::new(result)
251        } else {
252            PdfPoints::ZERO
253        }
254    }
255
256    /// Returns the [PdfFont] used to render the text contained within this [PdfPageTextObject].
257    pub fn font(&self) -> PdfFont {
258        PdfFont::from_pdfium(
259            self.bindings().FPDFTextObj_GetFont(self.object_handle),
260            self.bindings(),
261            None,
262            false,
263        )
264    }
265
266    /// Returns the text contained within this [PdfPageTextObject].
267    ///
268    /// Text retrieval in Pdfium is handled by the [PdfPageText] object owned by the `PdfPage`
269    /// containing this [PdfPageTextObject]. If this text object has not been attached to a page
270    /// then text retrieval will be unavailable and an empty string will be returned.
271    ///
272    /// When retrieving the text from many [PdfPageTextObject] objects (for instance, as part of
273    /// a loop or an iterator), it may be faster to open the [PdfPageText] object once and keep
274    /// it open while processing the text objects, like so:
275    ///
276    /// ```
277    /// let text_page = page.text()?; // Opens the text page once.
278    ///
279    /// for object in <some object iterator> {
280    ///     let object_text = text_page.for_object(object)?;
281    /// }
282    /// ```
283    ///
284    /// The [PdfPageText] object will be closed when the binding to it (`text_page` in the example above)
285    /// falls out of scope.
286    pub fn text(&self) -> String {
287        // Retrieving the text from Pdfium is a two-step operation. First, we call
288        // FPDFTextObj_GetText() with a null buffer; this will retrieve the length of
289        // the text in bytes. If the length is zero, then there is no text associated
290        // with the page object.
291
292        // If the length is non-zero, then we reserve a byte buffer of the given
293        // length and call FPDFTextObj_GetText() again with a pointer to the buffer;
294        // this will write the text to the buffer in UTF16-LE format.
295
296        let page_handle = match self.ownership() {
297            PdfPageObjectOwnership::Page(ownership) => Some(ownership.page_handle()),
298            PdfPageObjectOwnership::AttachedAnnotation(ownership) => Some(ownership.page_handle()),
299            _ => None,
300        };
301
302        if let Some(page_handle) = page_handle {
303            let text_handle = self.bindings().FPDFText_LoadPage(page_handle);
304
305            if !text_handle.is_null() {
306                let buffer_length = self.bindings().FPDFTextObj_GetText(
307                    self.object_handle(),
308                    text_handle,
309                    std::ptr::null_mut(),
310                    0,
311                );
312
313                if buffer_length == 0 {
314                    // There is no text.
315
316                    return String::new();
317                }
318
319                let mut buffer = create_byte_buffer(buffer_length as usize);
320
321                let result = self.bindings().FPDFTextObj_GetText(
322                    self.object_handle(),
323                    text_handle,
324                    buffer.as_mut_ptr() as *mut FPDF_WCHAR,
325                    buffer_length,
326                );
327
328                assert_eq!(result, buffer_length);
329
330                self.bindings.FPDFText_ClosePage(text_handle);
331
332                get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
333            } else {
334                // The PdfPage containing this page object does not have an associated
335                // FPDF_TEXTPAGE object.
336
337                String::new()
338            }
339        } else {
340            // This page object is not contained by a PdfPage.
341
342            String::new()
343        }
344    }
345
346    /// Sets the text contained within this [PdfPageTextObject], replacing any existing text.
347    ///
348    /// A single space will be used if the given text is empty, in order to avoid
349    /// unexpected behaviour from Pdfium when dealing with an empty string.
350    pub fn set_text(&mut self, text: impl ToString) -> Result<(), PdfiumError> {
351        let text = text.to_string();
352
353        let text = if text.is_empty() { " " } else { text.as_str() };
354
355        if self.bindings().is_true(
356            self.bindings()
357                .FPDFText_SetText_str(self.object_handle(), text),
358        ) {
359            Ok(())
360        } else {
361            Err(PdfiumError::PdfiumLibraryInternalError(
362                PdfiumInternalError::Unknown,
363            ))
364        }
365    }
366
367    /// Sets the text rendering mode for the text contained within this [PdfPageTextObject].
368    pub fn set_render_mode(
369        &mut self,
370        render_mode: PdfPageTextRenderMode,
371    ) -> Result<(), PdfiumError> {
372        if self.bindings().is_true(
373            self.bindings()
374                .FPDFTextObj_SetTextRenderMode(self.object_handle(), render_mode.as_pdfium()),
375        ) {
376            Ok(())
377        } else {
378            Err(PdfiumError::PdfiumLibraryInternalError(
379                PdfiumInternalError::Unknown,
380            ))
381        }
382    }
383
384    /// Returns a collection of the characters contained within this [PdfPageTextObject],
385    /// using character retrieval functionality provided by the given [PdfPageText] object.
386    #[inline]
387    pub fn chars(&self, text: &'a PdfPageText<'a>) -> Result<PdfPageTextChars<'a>, PdfiumError> {
388        text.chars_for_object(self)
389    }
390
391    /// Returns `true` if any of the characters contained within this [PdfPageTextObject] have a
392    /// glyph shape that descends below the font baseline.
393    ///
394    /// Character retrieval functionality is provided by the given [PdfPageText] object.
395    #[inline]
396    pub fn has_descenders(&self, text: &PdfPageText) -> Result<bool, PdfiumError> {
397        self.chars(text)
398            .map(|chars| chars.iter().any(|char| char.has_descender()))
399    }
400
401    /// Returns the descent of this [PdfPageTextObject]. The descent is the maximum distance below
402    /// the baseline reached by any glyph in any of the characters contained in this text object,
403    /// expressed as a negative points value.
404    ///
405    /// Character retrieval and bounds measurement is provided by the given [PdfPageText] object.
406    pub fn descent(&self, text: &PdfPageText) -> Result<PdfPoints, PdfiumError> {
407        let object_bottom = self.get_vertical_translation();
408
409        let mut maximum_descent = object_bottom;
410
411        for char in self.chars(text)?.iter() {
412            let char_bottom = char.tight_bounds()?.bottom();
413
414            if char_bottom < maximum_descent {
415                maximum_descent = char_bottom;
416            }
417        }
418
419        Ok(maximum_descent - object_bottom)
420    }
421
422    create_transform_setters!(
423        &mut Self,
424        Result<(), PdfiumError>,
425        "this [PdfPageTextObject]",
426        "this [PdfPageTextObject].",
427        "this [PdfPageTextObject],"
428    );
429
430    // The transform_impl() function required by the create_transform_setters!() macro
431    // is provided by the PdfPageObjectPrivate trait.
432
433    create_transform_getters!(
434        "this [PdfPageTextObject]",
435        "this [PdfPageTextObject].",
436        "this [PdfPageTextObject],"
437    );
438
439    // The get_matrix_impl() function required by the create_transform_getters!() macro
440    // is provided by the PdfPageObjectPrivate trait.
441}
442
443impl<'a> PdfPageObjectPrivate<'a> for PdfPageTextObject<'a> {
444    #[inline]
445    fn object_handle(&self) -> FPDF_PAGEOBJECT {
446        self.object_handle
447    }
448
449    #[inline]
450    fn ownership(&self) -> &PdfPageObjectOwnership {
451        &self.ownership
452    }
453
454    #[inline]
455    fn set_ownership(&mut self, ownership: PdfPageObjectOwnership) {
456        self.ownership = ownership;
457    }
458
459    #[inline]
460    fn bindings(&self) -> &dyn PdfiumLibraryBindings {
461        self.bindings
462    }
463
464    #[inline]
465    fn is_copyable_impl(&self) -> bool {
466        true
467    }
468
469    #[inline]
470    fn try_copy_impl<'b>(
471        &self,
472        document: FPDF_DOCUMENT,
473        bindings: &'b dyn PdfiumLibraryBindings,
474    ) -> Result<PdfPageObject<'b>, PdfiumError> {
475        let mut copy = PdfPageTextObject::new_from_handles(
476            document,
477            self.text(),
478            self.font().handle(),
479            self.unscaled_font_size(),
480            bindings,
481        )?;
482
483        copy.set_fill_color(self.fill_color()?)?;
484        copy.set_stroke_color(self.stroke_color()?)?;
485        copy.set_stroke_width(self.stroke_width()?)?;
486        copy.set_line_join(self.line_join()?)?;
487        copy.set_line_cap(self.line_cap()?)?;
488        copy.reset_matrix(self.matrix()?)?;
489
490        Ok(PdfPageObject::Text(copy))
491    }
492}