pdfium_render/pdf/document/page/object/
text.rs

1//! Defines the [PdfPageTextObject] struct, exposing functionality related to a single
2//! page object defining a piece of formatted text.
3
4use crate::bindgen::{
5    FPDF_DOCUMENT, FPDF_FONT, FPDF_PAGEOBJECT, FPDF_TEXT_RENDERMODE,
6    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP, FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
7    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP,
8    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE,
9    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
10    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
11    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE,
12    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP,
13    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN, FPDF_WCHAR,
14};
15use crate::bindings::PdfiumLibraryBindings;
16use crate::error::{PdfiumError, PdfiumInternalError};
17use crate::pdf::document::fonts::ToPdfFontToken;
18use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
19use crate::pdf::document::page::object::{
20    PdfPageObject, PdfPageObjectCommon, PdfPageObjectOwnership,
21};
22
23use crate::pdf::document::PdfDocument;
24use crate::pdf::font::PdfFont;
25use crate::pdf::matrix::{PdfMatrix, PdfMatrixValue};
26use crate::pdf::points::PdfPoints;
27use crate::utils::mem::create_byte_buffer;
28use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;
29use crate::{create_transform_getters, create_transform_setters};
30
31#[cfg(any(
32    feature = "pdfium_future",
33    feature = "pdfium_7215",
34    feature = "pdfium_7123",
35    feature = "pdfium_6996",
36    feature = "pdfium_6721",
37    feature = "pdfium_6666",
38    feature = "pdfium_6611",
39))]
40use {
41    crate::pdf::document::page::text::chars::PdfPageTextChars,
42    crate::pdf::document::page::text::PdfPageText,
43};
44
45#[cfg(doc)]
46use {
47    crate::pdf::document::page::object::PdfPageObjectType,
48    crate::pdf::document::page::objects::common::PdfPageObjectsCommon,
49    crate::pdf::document::page::PdfPage,
50};
51
52/// The text rendering modes supported by the PDF standard, as listed in table 5.3
53/// on page 402 in the PDF Reference manual version 1.7.
54#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
55pub enum PdfPageTextRenderMode {
56    /// The text render mode is not recognized by Pdfium.
57    Unknown = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN as isize,
58
59    /// The text will be filled, but not stroked.
60    FilledUnstroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL as isize,
61
62    /// The text will be stroked, but not filled.
63    StrokedUnfilled = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE as isize,
64
65    /// The text will be filled, then stroked.
66    FilledThenStroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE as isize,
67
68    /// The text will be neither filled nor stroked. It will still take up size in the layout, however.
69    Invisible = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE as isize,
70
71    /// The text will be filled and added to the path for clipping.
72    FilledUnstrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP as isize,
73
74    /// The text will be stroked and added to the path for clipping.
75    StrokedUnfilledClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP as isize,
76
77    /// The text will be filled, then stroked, and added to the path for clipping.
78    FilledThenStrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP as isize,
79
80    /// The text will be neither filled nor stroked, only added to the path for clipping.
81    InvisibleClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP as isize,
82}
83
84impl PdfPageTextRenderMode {
85    #[inline]
86    pub(crate) fn from_pdfium(value: i32) -> Result<PdfPageTextRenderMode, PdfiumError> {
87        match value {
88            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN => Ok(PdfPageTextRenderMode::Unknown),
89            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL => {
90                Ok(PdfPageTextRenderMode::FilledUnstroked)
91            }
92            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE => {
93                Ok(PdfPageTextRenderMode::StrokedUnfilled)
94            }
95            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE => {
96                Ok(PdfPageTextRenderMode::FilledThenStroked)
97            }
98            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE => {
99                Ok(PdfPageTextRenderMode::Invisible)
100            }
101            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP => {
102                Ok(PdfPageTextRenderMode::FilledUnstrokedClipping)
103            }
104            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP => {
105                Ok(PdfPageTextRenderMode::StrokedUnfilledClipping)
106            }
107            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP => {
108                Ok(PdfPageTextRenderMode::FilledThenStrokedClipping)
109            }
110            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP => {
111                Ok(PdfPageTextRenderMode::InvisibleClipping)
112            }
113            _ => Err(PdfiumError::UnknownPdfPageTextRenderMode),
114        }
115    }
116
117    #[inline]
118    #[allow(dead_code)]
119    // The as_pdfium() function is not currently used, but we expect it to be in future
120    pub(crate) fn as_pdfium(&self) -> FPDF_TEXT_RENDERMODE {
121        match self {
122            PdfPageTextRenderMode::Unknown => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN,
123            PdfPageTextRenderMode::FilledUnstroked => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
124            PdfPageTextRenderMode::StrokedUnfilled => {
125                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE
126            }
127            PdfPageTextRenderMode::FilledThenStroked => {
128                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE
129            }
130            PdfPageTextRenderMode::Invisible => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
131            PdfPageTextRenderMode::FilledUnstrokedClipping => {
132                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP
133            }
134            PdfPageTextRenderMode::StrokedUnfilledClipping => {
135                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP
136            }
137            PdfPageTextRenderMode::FilledThenStrokedClipping => {
138                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP
139            }
140            PdfPageTextRenderMode::InvisibleClipping => {
141                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP
142            }
143        }
144    }
145}
146
147/// A single [PdfPageObject] of type [PdfPageObjectType::Text]. The page object defines a single
148/// piece of formatted text.
149///
150/// Page objects can be created either attached to a [PdfPage] (in which case the page object's
151/// memory is owned by the containing page) or detached from any page (in which case the page
152/// object's memory is owned by the object). Page objects are not rendered until they are
153/// attached to a page; page objects that are never attached to a page will be lost when they
154/// fall out of scope.
155///
156/// The simplest way to create a page text object that is immediately attached to a page
157/// is to call the [PdfPageObjectsCommon::create_text_object()] function.
158///
159/// Creating a detached page text object offers more scope for customization, but you must
160/// add the object to a containing [PdfPage] manually. To create a detached page text object,
161/// use the [PdfPageTextObject::new()] function. The detached page text object can later
162/// be attached to a page by using the [PdfPageObjectsCommon::add_text_object()] function.
163pub struct PdfPageTextObject<'a> {
164    object_handle: FPDF_PAGEOBJECT,
165    ownership: PdfPageObjectOwnership,
166    bindings: &'a dyn PdfiumLibraryBindings,
167}
168
169impl<'a> PdfPageTextObject<'a> {
170    #[inline]
171    pub(crate) fn from_pdfium(
172        object_handle: FPDF_PAGEOBJECT,
173        ownership: PdfPageObjectOwnership,
174        bindings: &'a dyn PdfiumLibraryBindings,
175    ) -> Self {
176        PdfPageTextObject {
177            object_handle,
178            ownership,
179            bindings,
180        }
181    }
182
183    /// Creates a new [PdfPageTextObject] from the given arguments. The returned page object
184    /// will not be rendered until it is added to a [PdfPage] using the
185    /// [PdfPageObjectsCommon::add_text_object()] function.
186    ///
187    /// A single space will be used if the given text is empty, in order to avoid
188    /// unexpected behaviour from Pdfium when dealing with empty strings.
189    // Specifically, `FPDFPageObj_SetText()` will crash if we try to apply an empty string to a
190    // text object, and `FPDFText_LoadPage()` will crash if any text object on the page contains
191    // an empty string (so it isn't enough to avoid calling `FPDFPageObj_SetText()` for an empty
192    // text object, we _have_ to set a non-empty string to avoid segfaults).
193    #[inline]
194    pub fn new(
195        document: &PdfDocument<'a>,
196        text: impl ToString,
197        font: impl ToPdfFontToken,
198        font_size: PdfPoints,
199    ) -> Result<Self, PdfiumError> {
200        Self::new_from_handles(
201            document.handle(),
202            text,
203            font.token().handle(),
204            font_size,
205            document.bindings(),
206        )
207    }
208
209    // Take raw `FPDF_DOCUMENT` and `FPDF_FONT` handles to avoid cascading lifetime problems
210    // associated with borrowing `PdfDocument<'a>` and/or `PdfFont<'a>`.
211    pub(crate) fn new_from_handles(
212        document: FPDF_DOCUMENT,
213        text: impl ToString,
214        font: FPDF_FONT,
215        font_size: PdfPoints,
216        bindings: &'a dyn PdfiumLibraryBindings,
217    ) -> Result<Self, PdfiumError> {
218        let handle = bindings.FPDFPageObj_CreateTextObj(document, font, font_size.value);
219
220        if handle.is_null() {
221            Err(PdfiumError::PdfiumLibraryInternalError(
222                PdfiumInternalError::Unknown,
223            ))
224        } else {
225            let mut result = PdfPageTextObject {
226                object_handle: handle,
227                ownership: PdfPageObjectOwnership::unowned(),
228                bindings,
229            };
230
231            result.set_text(text)?;
232
233            Ok(result)
234        }
235    }
236
237    /// Returns the text rendering mode for the text contained within this [PdfPageTextObject].
238    pub fn render_mode(&self) -> PdfPageTextRenderMode {
239        PdfPageTextRenderMode::from_pdfium(
240            self.bindings()
241                .FPDFTextObj_GetTextRenderMode(self.object_handle),
242        )
243        .unwrap_or(PdfPageTextRenderMode::Unknown)
244    }
245
246    /// Returns the effective size of the text when rendered, taking into account both the
247    /// font size specified in this text object as well as any vertical scale factor applied
248    /// to the text object's transformation matrix.
249    ///
250    /// To retrieve only the specified font size, ignoring any vertical scaling, use the
251    /// [PdfPageTextObject::unscaled_font_size()] function.
252    #[inline]
253    pub fn scaled_font_size(&self) -> PdfPoints {
254        PdfPoints::new(self.unscaled_font_size().value * self.get_vertical_scale())
255    }
256
257    /// Returns the font size of the text specified in this [PdfPageTextObject].
258    ///
259    /// Note that the effective size of the text when rendered may differ from the font size
260    /// if a scaling factor has been applied to this text object's transformation matrix.
261    /// To retrieve the effective font size, taking vertical scaling into account, use the
262    /// [PdfPageTextObject::scaled_font_size()] function.
263    pub fn unscaled_font_size(&self) -> PdfPoints {
264        let mut result = 0.0;
265
266        if self.bindings().is_true(
267            self.bindings()
268                .FPDFTextObj_GetFontSize(self.object_handle, &mut result),
269        ) {
270            PdfPoints::new(result)
271        } else {
272            PdfPoints::ZERO
273        }
274    }
275
276    /// Returns the [PdfFont] used to render the text contained within this [PdfPageTextObject].
277    pub fn font(&self) -> PdfFont {
278        PdfFont::from_pdfium(
279            self.bindings().FPDFTextObj_GetFont(self.object_handle),
280            self.bindings(),
281            None,
282            false,
283        )
284    }
285
286    /// Returns the text contained within this [PdfPageTextObject].
287    ///
288    /// Text retrieval in Pdfium is handled by the [PdfPageText] object owned by the [PdfPage]
289    /// containing this [PdfPageTextObject]. If this text object has not been attached to a page
290    /// then text retrieval will be unavailable and an empty string will be returned.
291    ///
292    /// When retrieving the text from many [PdfPageTextObject] objects (for instance, as part of
293    /// a loop or an iterator), it may be faster to open the [PdfPageText] object once and keep
294    /// it open while processing the text objects, like so:
295    ///
296    /// ```
297    /// let text_page = page.text()?; // Opens the text page once.
298    ///
299    /// for object in <some object iterator> {
300    ///     let object_text = text_page.for_object(object)?;
301    /// }
302    /// ```
303    ///
304    /// The [PdfPageText] object will be closed when the binding to it (`text_page` in the example above)
305    /// falls out of scope.
306    pub fn text(&self) -> String {
307        // Retrieving the text from Pdfium is a two-step operation. First, we call
308        // FPDFTextObj_GetText() with a null buffer; this will retrieve the length of
309        // the text in bytes. If the length is zero, then there is no text associated
310        // with the page object.
311
312        // If the length is non-zero, then we reserve a byte buffer of the given
313        // length and call FPDFTextObj_GetText() again with a pointer to the buffer;
314        // this will write the text to the buffer in UTF16-LE format.
315
316        let page_handle = match self.ownership() {
317            PdfPageObjectOwnership::Page(ownership) => Some(ownership.page_handle()),
318            PdfPageObjectOwnership::AttachedAnnotation(ownership) => Some(ownership.page_handle()),
319            _ => None,
320        };
321
322        if let Some(page_handle) = page_handle {
323            let text_handle = self.bindings().FPDFText_LoadPage(page_handle);
324
325            if !text_handle.is_null() {
326                let buffer_length = self.bindings().FPDFTextObj_GetText(
327                    self.object_handle(),
328                    text_handle,
329                    std::ptr::null_mut(),
330                    0,
331                );
332
333                if buffer_length == 0 {
334                    // There is no text.
335
336                    return String::new();
337                }
338
339                let mut buffer = create_byte_buffer(buffer_length as usize);
340
341                let result = self.bindings().FPDFTextObj_GetText(
342                    self.object_handle(),
343                    text_handle,
344                    buffer.as_mut_ptr() as *mut FPDF_WCHAR,
345                    buffer_length,
346                );
347
348                assert_eq!(result, buffer_length);
349
350                self.bindings.FPDFText_ClosePage(text_handle);
351
352                get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
353            } else {
354                // The PdfPage containing this page object does not have an associated
355                // FPDF_TEXTPAGE object.
356
357                String::new()
358            }
359        } else {
360            // This page object is not contained by a PdfPage.
361
362            String::new()
363        }
364    }
365
366    /// Sets the text contained within this [PdfPageTextObject], replacing any existing text.
367    ///
368    /// A single space will be used if the given text is empty, in order to avoid
369    /// unexpected behaviour from Pdfium when dealing with an empty string.
370    pub fn set_text(&mut self, text: impl ToString) -> Result<(), PdfiumError> {
371        let text = text.to_string();
372
373        let text = if text.is_empty() { " " } else { text.as_str() };
374
375        if self.bindings().is_true(
376            self.bindings()
377                .FPDFText_SetText_str(self.object_handle(), text),
378        ) {
379            Ok(())
380        } else {
381            Err(PdfiumError::PdfiumLibraryInternalError(
382                PdfiumInternalError::Unknown,
383            ))
384        }
385    }
386
387    /// Sets the text rendering mode for the text contained within this [PdfPageTextObject].
388    pub fn set_render_mode(
389        &mut self,
390        render_mode: PdfPageTextRenderMode,
391    ) -> Result<(), PdfiumError> {
392        if self.bindings().is_true(
393            self.bindings()
394                .FPDFTextObj_SetTextRenderMode(self.object_handle(), render_mode.as_pdfium()),
395        ) {
396            Ok(())
397        } else {
398            Err(PdfiumError::PdfiumLibraryInternalError(
399                PdfiumInternalError::Unknown,
400            ))
401        }
402    }
403
404    #[cfg(any(
405        feature = "pdfium_future",
406        feature = "pdfium_7215",
407        feature = "pdfium_7123",
408        feature = "pdfium_6996",
409        feature = "pdfium_6721",
410        feature = "pdfium_6666",
411        feature = "pdfium_6611",
412    ))]
413    /// Returns a collection of the characters contained within this [PdfPageTextObject],
414    /// using character retrieval functionality provided by the given [PdfPageText] object.
415    #[inline]
416    pub fn chars(&self, text: &'a PdfPageText<'a>) -> Result<PdfPageTextChars<'a>, PdfiumError> {
417        text.chars_for_object(self)
418    }
419
420    #[cfg(any(
421        feature = "pdfium_future",
422        feature = "pdfium_7215",
423        feature = "pdfium_7123",
424        feature = "pdfium_6996",
425        feature = "pdfium_6721",
426        feature = "pdfium_6666",
427        feature = "pdfium_6611",
428    ))]
429    /// Returns `true` if any of the characters contained within this [PdfPageTextObject] have a
430    /// glyph shape that descends below the font baseline.
431    ///
432    /// Character retrieval functionality is provided by the given [PdfPageText] object.
433    #[inline]
434    pub fn has_descenders(&self, text: &PdfPageText) -> Result<bool, PdfiumError> {
435        self.chars(text)
436            .map(|chars| chars.iter().any(|char| char.has_descender()))
437    }
438
439    #[cfg(any(
440        feature = "pdfium_future",
441        feature = "pdfium_7215",
442        feature = "pdfium_7123",
443        feature = "pdfium_6996",
444        feature = "pdfium_6721",
445        feature = "pdfium_6666",
446        feature = "pdfium_6611",
447    ))]
448    /// Returns the descent of this [PdfPageTextObject]. The descent is the maximum distance below
449    /// the baseline reached by any glyph in any of the characters contained in this text object,
450    /// expressed as a negative points value.
451    ///
452    /// Character retrieval and bounds measurement is provided by the given [PdfPageText] object.
453    pub fn descent(&self, text: &PdfPageText) -> Result<PdfPoints, PdfiumError> {
454        let object_bottom = self.get_vertical_translation();
455
456        let mut maximum_descent = object_bottom;
457
458        for char in self.chars(text)?.iter() {
459            let char_bottom = char.tight_bounds()?.bottom();
460
461            if char_bottom < maximum_descent {
462                maximum_descent = char_bottom;
463            }
464        }
465
466        Ok(maximum_descent - object_bottom)
467    }
468
469    create_transform_setters!(
470        &mut Self,
471        Result<(), PdfiumError>,
472        "this [PdfPageTextObject]",
473        "this [PdfPageTextObject].",
474        "this [PdfPageTextObject],"
475    );
476
477    // The transform_impl() function required by the create_transform_setters!() macro
478    // is provided by the PdfPageObjectPrivate trait.
479
480    create_transform_getters!(
481        "this [PdfPageTextObject]",
482        "this [PdfPageTextObject].",
483        "this [PdfPageTextObject],"
484    );
485
486    // The get_matrix_impl() function required by the create_transform_getters!() macro
487    // is provided by the PdfPageObjectPrivate trait.
488}
489
490impl<'a> PdfPageObjectPrivate<'a> for PdfPageTextObject<'a> {
491    #[inline]
492    fn object_handle(&self) -> FPDF_PAGEOBJECT {
493        self.object_handle
494    }
495
496    #[inline]
497    fn ownership(&self) -> &PdfPageObjectOwnership {
498        &self.ownership
499    }
500
501    #[inline]
502    fn set_ownership(&mut self, ownership: PdfPageObjectOwnership) {
503        self.ownership = ownership;
504    }
505
506    #[inline]
507    fn bindings(&self) -> &dyn PdfiumLibraryBindings {
508        self.bindings
509    }
510
511    #[inline]
512    fn is_copyable_impl(&self) -> bool {
513        true
514    }
515
516    #[inline]
517    fn try_copy_impl<'b>(
518        &self,
519        document: FPDF_DOCUMENT,
520        bindings: &'b dyn PdfiumLibraryBindings,
521    ) -> Result<PdfPageObject<'b>, PdfiumError> {
522        let mut copy = PdfPageTextObject::new_from_handles(
523            document,
524            self.text(),
525            self.font().handle(),
526            self.unscaled_font_size(),
527            bindings,
528        )?;
529
530        copy.set_fill_color(self.fill_color()?)?;
531        copy.set_stroke_color(self.stroke_color()?)?;
532        copy.set_stroke_width(self.stroke_width()?)?;
533        copy.set_line_join(self.line_join()?)?;
534        copy.set_line_cap(self.line_cap()?)?;
535        copy.reset_matrix(self.matrix()?)?;
536
537        Ok(PdfPageObject::Text(copy))
538    }
539}