Skip to main content

pdfium_render/pdf/document/page/object/
text.rs

1//! Defines the [PdfPageTextObject] struct, exposing functionality related to a single
2//! page object defining a piece of formatted text.
3
4use crate::bindgen::{
5    FPDF_DOCUMENT, FPDF_FONT, FPDF_PAGEOBJECT, FPDF_TEXT_RENDERMODE,
6    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP, FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
7    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP,
8    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE,
9    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
10    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
11    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE,
12    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP,
13    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN, FPDF_WCHAR,
14};
15use crate::bindings::PdfiumLibraryBindings;
16use crate::error::{PdfiumError, PdfiumInternalError};
17use crate::pdf::document::fonts::ToPdfFontToken;
18use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
19use crate::pdf::document::page::object::PdfPageObjectOwnership;
20use crate::pdf::document::PdfDocument;
21use crate::pdf::font::PdfFont;
22use crate::pdf::matrix::{PdfMatrix, PdfMatrixValue};
23use crate::pdf::points::PdfPoints;
24use crate::pdfium::PdfiumLibraryBindingsAccessor;
25use crate::utils::mem::create_byte_buffer;
26use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;
27use crate::{create_transform_getters, create_transform_setters};
28use std::marker::PhantomData;
29
30#[cfg(any(
31    feature = "pdfium_future",
32    feature = "pdfium_7763",
33    feature = "pdfium_7543",
34    feature = "pdfium_7350",
35    feature = "pdfium_7215",
36    feature = "pdfium_7123",
37    feature = "pdfium_6996",
38    feature = "pdfium_6721",
39    feature = "pdfium_6666",
40    feature = "pdfium_6611",
41))]
42use {
43    crate::pdf::document::page::text::chars::PdfPageTextChars,
44    crate::pdf::document::page::text::PdfPageText,
45};
46
47#[cfg(doc)]
48use {
49    crate::pdf::document::page::object::PdfPageObject,
50    crate::pdf::document::page::object::PdfPageObjectType,
51    crate::pdf::document::page::objects::common::PdfPageObjectsCommon,
52    crate::pdf::document::page::PdfPage,
53};
54
55/// The text rendering modes supported by the PDF standard, as listed in table 5.3
56/// on page 402 in the PDF Reference manual version 1.7.
57#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
58pub enum PdfPageTextRenderMode {
59    /// The text render mode is not recognized by Pdfium.
60    Unknown = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN as isize,
61
62    /// The text will be filled, but not stroked.
63    FilledUnstroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL as isize,
64
65    /// The text will be stroked, but not filled.
66    StrokedUnfilled = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE as isize,
67
68    /// The text will be filled, then stroked.
69    FilledThenStroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE as isize,
70
71    /// The text will be neither filled nor stroked. It will still take up size in the layout, however.
72    Invisible = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE as isize,
73
74    /// The text will be filled and added to the path for clipping.
75    FilledUnstrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP as isize,
76
77    /// The text will be stroked and added to the path for clipping.
78    StrokedUnfilledClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP as isize,
79
80    /// The text will be filled, then stroked, and added to the path for clipping.
81    FilledThenStrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP as isize,
82
83    /// The text will be neither filled nor stroked, only added to the path for clipping.
84    InvisibleClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP as isize,
85}
86
87impl PdfPageTextRenderMode {
88    #[inline]
89    pub(crate) fn from_pdfium(value: i32) -> Result<PdfPageTextRenderMode, PdfiumError> {
90        match value {
91            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN => Ok(PdfPageTextRenderMode::Unknown),
92            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL => {
93                Ok(PdfPageTextRenderMode::FilledUnstroked)
94            }
95            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE => {
96                Ok(PdfPageTextRenderMode::StrokedUnfilled)
97            }
98            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE => {
99                Ok(PdfPageTextRenderMode::FilledThenStroked)
100            }
101            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE => {
102                Ok(PdfPageTextRenderMode::Invisible)
103            }
104            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP => {
105                Ok(PdfPageTextRenderMode::FilledUnstrokedClipping)
106            }
107            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP => {
108                Ok(PdfPageTextRenderMode::StrokedUnfilledClipping)
109            }
110            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP => {
111                Ok(PdfPageTextRenderMode::FilledThenStrokedClipping)
112            }
113            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP => {
114                Ok(PdfPageTextRenderMode::InvisibleClipping)
115            }
116            _ => Err(PdfiumError::UnknownPdfPageTextRenderMode),
117        }
118    }
119
120    #[inline]
121    #[allow(dead_code)]
122    // The as_pdfium() function is not currently used, but we expect it to be in future
123    pub(crate) fn as_pdfium(&self) -> FPDF_TEXT_RENDERMODE {
124        match self {
125            PdfPageTextRenderMode::Unknown => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN,
126            PdfPageTextRenderMode::FilledUnstroked => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
127            PdfPageTextRenderMode::StrokedUnfilled => {
128                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE
129            }
130            PdfPageTextRenderMode::FilledThenStroked => {
131                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE
132            }
133            PdfPageTextRenderMode::Invisible => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
134            PdfPageTextRenderMode::FilledUnstrokedClipping => {
135                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP
136            }
137            PdfPageTextRenderMode::StrokedUnfilledClipping => {
138                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP
139            }
140            PdfPageTextRenderMode::FilledThenStrokedClipping => {
141                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP
142            }
143            PdfPageTextRenderMode::InvisibleClipping => {
144                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP
145            }
146        }
147    }
148}
149
150/// A single [PdfPageObject] of type [PdfPageObjectType::Text]. The page object defines a single
151/// piece of formatted text.
152///
153/// Page objects can be created either attached to a [PdfPage] (in which case the page object's
154/// memory is owned by the containing page) or detached from any page (in which case the page
155/// object's memory is owned by the object). Page objects are not rendered until they are
156/// attached to a page; page objects that are never attached to a page will be lost when they
157/// fall out of scope.
158///
159/// The simplest way to create a page text object that is immediately attached to a page
160/// is to call the [PdfPageObjectsCommon::create_text_object()] function.
161///
162/// Creating a detached page text object offers more scope for customization, but you must
163/// add the object to a containing [PdfPage] manually. To create a detached page text object,
164/// use the [PdfPageTextObject::new()] function. The detached page text object can later
165/// be attached to a page by using the [PdfPageObjectsCommon::add_text_object()] function.
166pub struct PdfPageTextObject<'a> {
167    object_handle: FPDF_PAGEOBJECT,
168    ownership: PdfPageObjectOwnership,
169    lifetime: PhantomData<&'a FPDF_PAGEOBJECT>,
170}
171
172impl<'a> PdfPageTextObject<'a> {
173    #[inline]
174    pub(crate) fn from_pdfium(
175        object_handle: FPDF_PAGEOBJECT,
176        ownership: PdfPageObjectOwnership,
177    ) -> Self {
178        PdfPageTextObject {
179            object_handle,
180            ownership,
181            lifetime: PhantomData,
182        }
183    }
184
185    /// Creates a new [PdfPageTextObject] from the given arguments. The returned page object
186    /// will not be rendered until it is added to a [PdfPage] using the
187    /// [PdfPageObjectsCommon::add_text_object()] function.
188    ///
189    /// A single space will be used if the given text is empty, in order to avoid
190    /// unexpected behaviour from Pdfium when dealing with empty strings.
191    // Specifically, `FPDFPageObj_SetText()` will crash if we try to apply an empty string to a
192    // text object, and `FPDFText_LoadPage()` will crash if any text object on the page contains
193    // an empty string (so it isn't enough to avoid calling `FPDFPageObj_SetText()` for an empty
194    // text object, we _have_ to set a non-empty string to avoid segfaults).
195    #[inline]
196    pub fn new(
197        document: &PdfDocument<'a>,
198        text: impl ToString,
199        font: impl ToPdfFontToken,
200        font_size: PdfPoints,
201    ) -> Result<Self, PdfiumError> {
202        Self::new_from_handles(
203            document.handle(),
204            text,
205            font.token().handle(),
206            font_size,
207            document.bindings(),
208        )
209    }
210
211    // Take raw `FPDF_DOCUMENT` and `FPDF_FONT` handles to avoid cascading lifetime problems
212    // associated with borrowing `PdfDocument<'a>` and/or `PdfFont<'a>`.
213    pub(crate) fn new_from_handles(
214        document: FPDF_DOCUMENT,
215        text: impl ToString,
216        font: FPDF_FONT,
217        font_size: PdfPoints,
218        bindings: &'a dyn PdfiumLibraryBindings,
219    ) -> Result<Self, PdfiumError> {
220        let handle = unsafe { bindings.FPDFPageObj_CreateTextObj(document, font, font_size.value) };
221
222        if handle.is_null() {
223            Err(PdfiumError::PdfiumLibraryInternalError(
224                PdfiumInternalError::Unknown,
225            ))
226        } else {
227            let mut result = PdfPageTextObject {
228                object_handle: handle,
229                ownership: PdfPageObjectOwnership::unowned(),
230                lifetime: PhantomData,
231            };
232
233            result.set_text(text)?;
234
235            Ok(result)
236        }
237    }
238
239    /// Returns the text rendering mode for the text contained within this [PdfPageTextObject].
240    pub fn render_mode(&self) -> PdfPageTextRenderMode {
241        PdfPageTextRenderMode::from_pdfium(unsafe {
242            self.bindings()
243                .FPDFTextObj_GetTextRenderMode(self.object_handle)
244        })
245        .unwrap_or(PdfPageTextRenderMode::Unknown)
246    }
247
248    /// Returns `true` if the text rendering mode for the text contained within this
249    /// [PdfPageTextObject] is set to any value other than [PdfPageTextRenderMode::Invisible]
250    /// or [PdfPageTextRenderMode::InvisibleClipping].
251    #[inline]
252    pub fn is_visible(&self) -> bool {
253        match self.render_mode() {
254            PdfPageTextRenderMode::Invisible | PdfPageTextRenderMode::InvisibleClipping => false,
255            _ => true,
256        }
257    }
258
259    /// Returns the effective size of the text when rendered, taking into account both the
260    /// font size specified in this text object as well as any vertical scale factor applied
261    /// to the text object's transformation matrix.
262    ///
263    /// To retrieve only the specified font size, ignoring any vertical scaling, use the
264    /// [PdfPageTextObject::unscaled_font_size()] function.
265    #[inline]
266    pub fn scaled_font_size(&self) -> PdfPoints {
267        PdfPoints::new(self.unscaled_font_size().value * self.get_vertical_scale())
268    }
269
270    /// Returns the font size of the text specified in this [PdfPageTextObject].
271    ///
272    /// Note that the effective size of the text when rendered may differ from the font size
273    /// if a scaling factor has been applied to this text object's transformation matrix.
274    /// To retrieve the effective font size, taking vertical scaling into account, use the
275    /// [PdfPageTextObject::scaled_font_size()] function.
276    pub fn unscaled_font_size(&self) -> PdfPoints {
277        let mut result = 0.0;
278
279        if self.bindings().is_true(unsafe {
280            self.bindings()
281                .FPDFTextObj_GetFontSize(self.object_handle, &mut result)
282        }) {
283            PdfPoints::new(result)
284        } else {
285            PdfPoints::ZERO
286        }
287    }
288
289    /// Returns the [PdfFont] used to render the text contained within this [PdfPageTextObject].
290    pub fn font(&self) -> PdfFont<'_> {
291        PdfFont::from_pdfium(
292            unsafe { self.bindings().FPDFTextObj_GetFont(self.object_handle) },
293            None,
294            false,
295        )
296    }
297
298    /// Returns the text contained within this [PdfPageTextObject].
299    ///
300    /// Text retrieval in Pdfium is handled by the [PdfPageText] object owned by the [PdfPage]
301    /// containing this [PdfPageTextObject]. If this text object has not been attached to a page
302    /// then text retrieval will be unavailable and an empty string will be returned.
303    ///
304    /// When retrieving the text from many [PdfPageTextObject] objects (for instance, as part of
305    /// a loop or an iterator), it may be faster to open the [PdfPageText] object once and keep
306    /// it open while processing the text objects, like so:
307    ///
308    /// ```
309    /// let text_page = page.text()?; // Opens the text page once.
310    ///
311    /// for object in <some object iterator> {
312    ///     let object_text = text_page.for_object(object)?;
313    /// }
314    /// ```
315    ///
316    /// The [PdfPageText] object will be closed when the binding to it (`text_page` in the example above)
317    /// falls out of scope.
318    pub fn text(&self) -> String {
319        // Retrieving the text from Pdfium is a two-step operation. First, we call
320        // FPDFTextObj_GetText() with a null buffer; this will retrieve the length of
321        // the text in bytes. If the length is zero, then there is no text associated
322        // with the page object.
323
324        // If the length is non-zero, then we reserve a byte buffer of the given
325        // length and call FPDFTextObj_GetText() again with a pointer to the buffer;
326        // this will write the text to the buffer in UTF16-LE format.
327
328        let page_handle = match self.ownership() {
329            PdfPageObjectOwnership::Page(ownership) => Some(ownership.page_handle()),
330            PdfPageObjectOwnership::AttachedAnnotation(ownership) => Some(ownership.page_handle()),
331            _ => None,
332        };
333
334        if let Some(page_handle) = page_handle {
335            let text_handle = unsafe { self.bindings().FPDFText_LoadPage(page_handle) };
336
337            if !text_handle.is_null() {
338                let buffer_length = unsafe {
339                    self.bindings().FPDFTextObj_GetText(
340                        self.object_handle(),
341                        text_handle,
342                        std::ptr::null_mut(),
343                        0,
344                    )
345                };
346
347                if buffer_length == 0 {
348                    // There is no text.
349
350                    return String::new();
351                }
352
353                let mut buffer = create_byte_buffer(buffer_length as usize);
354
355                let result = unsafe {
356                    self.bindings().FPDFTextObj_GetText(
357                        self.object_handle(),
358                        text_handle,
359                        buffer.as_mut_ptr() as *mut FPDF_WCHAR,
360                        buffer_length,
361                    )
362                };
363
364                assert_eq!(result, buffer_length);
365
366                unsafe {
367                    self.bindings().FPDFText_ClosePage(text_handle);
368                }
369
370                get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
371            } else {
372                // The PdfPage containing this page object does not have an associated
373                // FPDF_TEXTPAGE object.
374
375                String::new()
376            }
377        } else {
378            // This page object is not contained by a PdfPage.
379
380            String::new()
381        }
382    }
383
384    /// Sets the text contained within this [PdfPageTextObject], replacing any existing text.
385    ///
386    /// A single space will be used if the given text is empty, in order to avoid
387    /// unexpected behaviour from Pdfium when dealing with an empty string.
388    pub fn set_text(&mut self, text: impl ToString) -> Result<(), PdfiumError> {
389        let text = text.to_string();
390
391        let text = if text.is_empty() { " " } else { text.as_str() };
392
393        if self.bindings().is_true(unsafe {
394            self.bindings()
395                .FPDFText_SetText_str(self.object_handle(), text)
396        }) {
397            Ok(())
398        } else {
399            Err(PdfiumError::PdfiumLibraryInternalError(
400                PdfiumInternalError::Unknown,
401            ))
402        }
403    }
404
405    /// Sets the text rendering mode for the text contained within this [PdfPageTextObject].
406    pub fn set_render_mode(
407        &mut self,
408        render_mode: PdfPageTextRenderMode,
409    ) -> Result<(), PdfiumError> {
410        if self.bindings().is_true(unsafe {
411            self.bindings()
412                .FPDFTextObj_SetTextRenderMode(self.object_handle(), render_mode.as_pdfium())
413        }) {
414            Ok(())
415        } else {
416            Err(PdfiumError::PdfiumLibraryInternalError(
417                PdfiumInternalError::Unknown,
418            ))
419        }
420    }
421
422    #[cfg(any(
423        feature = "pdfium_future",
424        feature = "pdfium_7763",
425        feature = "pdfium_7543",
426        feature = "pdfium_7350",
427        feature = "pdfium_7215",
428        feature = "pdfium_7123",
429        feature = "pdfium_6996",
430        feature = "pdfium_6721",
431        feature = "pdfium_6666",
432        feature = "pdfium_6611",
433    ))]
434    /// Returns a collection of the characters contained within this [PdfPageTextObject],
435    /// using character retrieval functionality provided by the given [PdfPageText] object.
436    #[inline]
437    pub fn chars(&self, text: &'a PdfPageText<'a>) -> Result<PdfPageTextChars<'a>, PdfiumError> {
438        text.chars_for_object(self)
439    }
440
441    #[cfg(any(
442        feature = "pdfium_future",
443        feature = "pdfium_7763",
444        feature = "pdfium_7543",
445        feature = "pdfium_7350",
446        feature = "pdfium_7215",
447        feature = "pdfium_7123",
448        feature = "pdfium_6996",
449        feature = "pdfium_6721",
450        feature = "pdfium_6666",
451        feature = "pdfium_6611",
452    ))]
453    /// Returns `true` if any of the characters contained within this [PdfPageTextObject] have a
454    /// glyph shape that descends below the font baseline.
455    ///
456    /// Character retrieval functionality is provided by the given [PdfPageText] object.
457    #[inline]
458    pub fn has_descenders(&self, text: &PdfPageText) -> Result<bool, PdfiumError> {
459        self.chars(text)
460            .map(|chars| chars.iter().any(|char| char.has_descender()))
461    }
462
463    #[cfg(any(
464        feature = "pdfium_future",
465        feature = "pdfium_7763",
466        feature = "pdfium_7543",
467        feature = "pdfium_7350",
468        feature = "pdfium_7215",
469        feature = "pdfium_7123",
470        feature = "pdfium_6996",
471        feature = "pdfium_6721",
472        feature = "pdfium_6666",
473        feature = "pdfium_6611",
474    ))]
475    /// Returns the descent of this [PdfPageTextObject]. The descent is the maximum distance below
476    /// the baseline reached by any glyph in any of the characters contained in this text object,
477    /// expressed as a negative points value.
478    ///
479    /// Character retrieval and bounds measurement is provided by the given [PdfPageText] object.
480    pub fn descent(&self, text: &PdfPageText) -> Result<PdfPoints, PdfiumError> {
481        let object_bottom = self.get_vertical_translation();
482
483        let mut maximum_descent = object_bottom;
484
485        for char in self.chars(text)?.iter() {
486            let char_bottom = char.tight_bounds()?.bottom();
487
488            if char_bottom < maximum_descent {
489                maximum_descent = char_bottom;
490            }
491        }
492
493        Ok(maximum_descent - object_bottom)
494    }
495
496    create_transform_setters!(
497        &mut Self,
498        Result<(), PdfiumError>,
499        "this [PdfPageTextObject]",
500        "this [PdfPageTextObject].",
501        "this [PdfPageTextObject],"
502    );
503
504    // The transform_impl() function required by the create_transform_setters!() macro
505    // is provided by the PdfPageObjectPrivate trait.
506
507    create_transform_getters!(
508        "this [PdfPageTextObject]",
509        "this [PdfPageTextObject].",
510        "this [PdfPageTextObject],"
511    );
512
513    // The get_matrix_impl() function required by the create_transform_getters!() macro
514    // is provided by the PdfPageObjectPrivate trait.
515}
516
517impl<'a> PdfPageObjectPrivate<'a> for PdfPageTextObject<'a> {
518    #[inline]
519    fn object_handle(&self) -> FPDF_PAGEOBJECT {
520        self.object_handle
521    }
522
523    #[inline]
524    fn ownership(&self) -> &PdfPageObjectOwnership {
525        &self.ownership
526    }
527
528    #[inline]
529    fn set_ownership(&mut self, ownership: PdfPageObjectOwnership) {
530        self.ownership = ownership;
531    }
532}
533
534impl<'a> Drop for PdfPageTextObject<'a> {
535    /// Closes this [PdfPageTextObject], releasing held memory.
536    fn drop(&mut self) {
537        self.drop_impl();
538    }
539}
540
541impl<'a> PdfiumLibraryBindingsAccessor<'a> for PdfPageTextObject<'a> {}
542
543#[cfg(feature = "thread_safe")]
544unsafe impl<'a> Send for PdfPageTextObject<'a> {}
545
546#[cfg(feature = "thread_safe")]
547unsafe impl<'a> Sync for PdfPageTextObject<'a> {}