Skip to main content

pdfium_render/pdf/document/page/object/
text.rs

1//! Defines the [PdfPageTextObject] struct, exposing functionality related to a single
2//! page object defining a piece of formatted text.
3
4use crate::bindgen::{
5    FPDF_DOCUMENT, FPDF_FONT, FPDF_PAGEOBJECT, FPDF_TEXT_RENDERMODE,
6    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP, FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
7    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP,
8    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE,
9    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
10    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
11    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE,
12    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP,
13    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN, FPDF_WCHAR,
14};
15use crate::bindings::PdfiumLibraryBindings;
16use crate::error::{PdfiumError, PdfiumInternalError};
17use crate::pdf::document::fonts::ToPdfFontToken;
18use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
19use crate::pdf::document::page::object::PdfPageObjectOwnership;
20use crate::pdf::document::PdfDocument;
21use crate::pdf::font::PdfFont;
22use crate::pdf::matrix::{PdfMatrix, PdfMatrixValue};
23use crate::pdf::points::PdfPoints;
24use crate::pdfium::PdfiumLibraryBindingsAccessor;
25use crate::utils::mem::create_byte_buffer;
26use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;
27use crate::{create_transform_getters, create_transform_setters};
28use std::marker::PhantomData;
29
30#[cfg(any(
31    feature = "pdfium_future",
32    feature = "pdfium_7543",
33    feature = "pdfium_7350",
34    feature = "pdfium_7215",
35    feature = "pdfium_7123",
36    feature = "pdfium_6996",
37    feature = "pdfium_6721",
38    feature = "pdfium_6666",
39    feature = "pdfium_6611",
40))]
41use {
42    crate::pdf::document::page::text::chars::PdfPageTextChars,
43    crate::pdf::document::page::text::PdfPageText,
44};
45
46#[cfg(doc)]
47use {
48    crate::pdf::document::page::object::PdfPageObject,
49    crate::pdf::document::page::object::PdfPageObjectType,
50    crate::pdf::document::page::objects::common::PdfPageObjectsCommon,
51    crate::pdf::document::page::PdfPage,
52};
53
54/// The text rendering modes supported by the PDF standard, as listed in table 5.3
55/// on page 402 in the PDF Reference manual version 1.7.
56#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
57pub enum PdfPageTextRenderMode {
58    /// The text render mode is not recognized by Pdfium.
59    Unknown = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN as isize,
60
61    /// The text will be filled, but not stroked.
62    FilledUnstroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL as isize,
63
64    /// The text will be stroked, but not filled.
65    StrokedUnfilled = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE as isize,
66
67    /// The text will be filled, then stroked.
68    FilledThenStroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE as isize,
69
70    /// The text will be neither filled nor stroked. It will still take up size in the layout, however.
71    Invisible = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE as isize,
72
73    /// The text will be filled and added to the path for clipping.
74    FilledUnstrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP as isize,
75
76    /// The text will be stroked and added to the path for clipping.
77    StrokedUnfilledClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP as isize,
78
79    /// The text will be filled, then stroked, and added to the path for clipping.
80    FilledThenStrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP as isize,
81
82    /// The text will be neither filled nor stroked, only added to the path for clipping.
83    InvisibleClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP as isize,
84}
85
86impl PdfPageTextRenderMode {
87    #[inline]
88    pub(crate) fn from_pdfium(value: i32) -> Result<PdfPageTextRenderMode, PdfiumError> {
89        match value {
90            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN => Ok(PdfPageTextRenderMode::Unknown),
91            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL => {
92                Ok(PdfPageTextRenderMode::FilledUnstroked)
93            }
94            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE => {
95                Ok(PdfPageTextRenderMode::StrokedUnfilled)
96            }
97            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE => {
98                Ok(PdfPageTextRenderMode::FilledThenStroked)
99            }
100            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE => {
101                Ok(PdfPageTextRenderMode::Invisible)
102            }
103            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP => {
104                Ok(PdfPageTextRenderMode::FilledUnstrokedClipping)
105            }
106            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP => {
107                Ok(PdfPageTextRenderMode::StrokedUnfilledClipping)
108            }
109            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP => {
110                Ok(PdfPageTextRenderMode::FilledThenStrokedClipping)
111            }
112            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP => {
113                Ok(PdfPageTextRenderMode::InvisibleClipping)
114            }
115            _ => Err(PdfiumError::UnknownPdfPageTextRenderMode),
116        }
117    }
118
119    #[inline]
120    #[allow(dead_code)]
121    // The as_pdfium() function is not currently used, but we expect it to be in future
122    pub(crate) fn as_pdfium(&self) -> FPDF_TEXT_RENDERMODE {
123        match self {
124            PdfPageTextRenderMode::Unknown => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN,
125            PdfPageTextRenderMode::FilledUnstroked => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
126            PdfPageTextRenderMode::StrokedUnfilled => {
127                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE
128            }
129            PdfPageTextRenderMode::FilledThenStroked => {
130                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE
131            }
132            PdfPageTextRenderMode::Invisible => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
133            PdfPageTextRenderMode::FilledUnstrokedClipping => {
134                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP
135            }
136            PdfPageTextRenderMode::StrokedUnfilledClipping => {
137                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP
138            }
139            PdfPageTextRenderMode::FilledThenStrokedClipping => {
140                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP
141            }
142            PdfPageTextRenderMode::InvisibleClipping => {
143                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP
144            }
145        }
146    }
147}
148
149/// A single [PdfPageObject] of type [PdfPageObjectType::Text]. The page object defines a single
150/// piece of formatted text.
151///
152/// Page objects can be created either attached to a [PdfPage] (in which case the page object's
153/// memory is owned by the containing page) or detached from any page (in which case the page
154/// object's memory is owned by the object). Page objects are not rendered until they are
155/// attached to a page; page objects that are never attached to a page will be lost when they
156/// fall out of scope.
157///
158/// The simplest way to create a page text object that is immediately attached to a page
159/// is to call the [PdfPageObjectsCommon::create_text_object()] function.
160///
161/// Creating a detached page text object offers more scope for customization, but you must
162/// add the object to a containing [PdfPage] manually. To create a detached page text object,
163/// use the [PdfPageTextObject::new()] function. The detached page text object can later
164/// be attached to a page by using the [PdfPageObjectsCommon::add_text_object()] function.
165pub struct PdfPageTextObject<'a> {
166    object_handle: FPDF_PAGEOBJECT,
167    ownership: PdfPageObjectOwnership,
168    lifetime: PhantomData<&'a FPDF_PAGEOBJECT>,
169}
170
171impl<'a> PdfPageTextObject<'a> {
172    #[inline]
173    pub(crate) fn from_pdfium(
174        object_handle: FPDF_PAGEOBJECT,
175        ownership: PdfPageObjectOwnership,
176    ) -> Self {
177        PdfPageTextObject {
178            object_handle,
179            ownership,
180            lifetime: PhantomData,
181        }
182    }
183
184    /// Creates a new [PdfPageTextObject] from the given arguments. The returned page object
185    /// will not be rendered until it is added to a [PdfPage] using the
186    /// [PdfPageObjectsCommon::add_text_object()] function.
187    ///
188    /// A single space will be used if the given text is empty, in order to avoid
189    /// unexpected behaviour from Pdfium when dealing with empty strings.
190    // Specifically, `FPDFPageObj_SetText()` will crash if we try to apply an empty string to a
191    // text object, and `FPDFText_LoadPage()` will crash if any text object on the page contains
192    // an empty string (so it isn't enough to avoid calling `FPDFPageObj_SetText()` for an empty
193    // text object, we _have_ to set a non-empty string to avoid segfaults).
194    #[inline]
195    pub fn new(
196        document: &PdfDocument<'a>,
197        text: impl ToString,
198        font: impl ToPdfFontToken,
199        font_size: PdfPoints,
200    ) -> Result<Self, PdfiumError> {
201        Self::new_from_handles(
202            document.handle(),
203            text,
204            font.token().handle(),
205            font_size,
206            document.bindings(),
207        )
208    }
209
210    // Take raw `FPDF_DOCUMENT` and `FPDF_FONT` handles to avoid cascading lifetime problems
211    // associated with borrowing `PdfDocument<'a>` and/or `PdfFont<'a>`.
212    pub(crate) fn new_from_handles(
213        document: FPDF_DOCUMENT,
214        text: impl ToString,
215        font: FPDF_FONT,
216        font_size: PdfPoints,
217        bindings: &'a dyn PdfiumLibraryBindings,
218    ) -> Result<Self, PdfiumError> {
219        let handle = unsafe { bindings.FPDFPageObj_CreateTextObj(document, font, font_size.value) };
220
221        if handle.is_null() {
222            Err(PdfiumError::PdfiumLibraryInternalError(
223                PdfiumInternalError::Unknown,
224            ))
225        } else {
226            let mut result = PdfPageTextObject {
227                object_handle: handle,
228                ownership: PdfPageObjectOwnership::unowned(),
229                lifetime: PhantomData,
230            };
231
232            result.set_text(text)?;
233
234            Ok(result)
235        }
236    }
237
238    /// Returns the text rendering mode for the text contained within this [PdfPageTextObject].
239    pub fn render_mode(&self) -> PdfPageTextRenderMode {
240        PdfPageTextRenderMode::from_pdfium(unsafe {
241            self.bindings()
242                .FPDFTextObj_GetTextRenderMode(self.object_handle)
243        })
244        .unwrap_or(PdfPageTextRenderMode::Unknown)
245    }
246
247    /// Returns `true` if the text rendering mode for the text contained within this
248    /// [PdfPageTextObject] is set to any value other than [PdfPageTextRenderMode::Invisible]
249    /// or [PdfPageTextRenderMode::InvisibleClipping].
250    #[inline]
251    pub fn is_visible(&self) -> bool {
252        match self.render_mode() {
253            PdfPageTextRenderMode::Invisible | PdfPageTextRenderMode::InvisibleClipping => false,
254            _ => true,
255        }
256    }
257
258    /// Returns the effective size of the text when rendered, taking into account both the
259    /// font size specified in this text object as well as any vertical scale factor applied
260    /// to the text object's transformation matrix.
261    ///
262    /// To retrieve only the specified font size, ignoring any vertical scaling, use the
263    /// [PdfPageTextObject::unscaled_font_size()] function.
264    #[inline]
265    pub fn scaled_font_size(&self) -> PdfPoints {
266        PdfPoints::new(self.unscaled_font_size().value * self.get_vertical_scale())
267    }
268
269    /// Returns the font size of the text specified in this [PdfPageTextObject].
270    ///
271    /// Note that the effective size of the text when rendered may differ from the font size
272    /// if a scaling factor has been applied to this text object's transformation matrix.
273    /// To retrieve the effective font size, taking vertical scaling into account, use the
274    /// [PdfPageTextObject::scaled_font_size()] function.
275    pub fn unscaled_font_size(&self) -> PdfPoints {
276        let mut result = 0.0;
277
278        if self.bindings().is_true(unsafe {
279            self.bindings()
280                .FPDFTextObj_GetFontSize(self.object_handle, &mut result)
281        }) {
282            PdfPoints::new(result)
283        } else {
284            PdfPoints::ZERO
285        }
286    }
287
288    /// Returns the [PdfFont] used to render the text contained within this [PdfPageTextObject].
289    pub fn font(&self) -> PdfFont<'_> {
290        PdfFont::from_pdfium(
291            unsafe { self.bindings().FPDFTextObj_GetFont(self.object_handle) },
292            None,
293            false,
294        )
295    }
296
297    /// Returns the text contained within this [PdfPageTextObject].
298    ///
299    /// Text retrieval in Pdfium is handled by the [PdfPageText] object owned by the [PdfPage]
300    /// containing this [PdfPageTextObject]. If this text object has not been attached to a page
301    /// then text retrieval will be unavailable and an empty string will be returned.
302    ///
303    /// When retrieving the text from many [PdfPageTextObject] objects (for instance, as part of
304    /// a loop or an iterator), it may be faster to open the [PdfPageText] object once and keep
305    /// it open while processing the text objects, like so:
306    ///
307    /// ```
308    /// let text_page = page.text()?; // Opens the text page once.
309    ///
310    /// for object in <some object iterator> {
311    ///     let object_text = text_page.for_object(object)?;
312    /// }
313    /// ```
314    ///
315    /// The [PdfPageText] object will be closed when the binding to it (`text_page` in the example above)
316    /// falls out of scope.
317    pub fn text(&self) -> String {
318        // Retrieving the text from Pdfium is a two-step operation. First, we call
319        // FPDFTextObj_GetText() with a null buffer; this will retrieve the length of
320        // the text in bytes. If the length is zero, then there is no text associated
321        // with the page object.
322
323        // If the length is non-zero, then we reserve a byte buffer of the given
324        // length and call FPDFTextObj_GetText() again with a pointer to the buffer;
325        // this will write the text to the buffer in UTF16-LE format.
326
327        let page_handle = match self.ownership() {
328            PdfPageObjectOwnership::Page(ownership) => Some(ownership.page_handle()),
329            PdfPageObjectOwnership::AttachedAnnotation(ownership) => Some(ownership.page_handle()),
330            _ => None,
331        };
332
333        if let Some(page_handle) = page_handle {
334            let text_handle = unsafe { self.bindings().FPDFText_LoadPage(page_handle) };
335
336            if !text_handle.is_null() {
337                let buffer_length = unsafe {
338                    self.bindings().FPDFTextObj_GetText(
339                        self.object_handle(),
340                        text_handle,
341                        std::ptr::null_mut(),
342                        0,
343                    )
344                };
345
346                if buffer_length == 0 {
347                    // There is no text.
348
349                    return String::new();
350                }
351
352                let mut buffer = create_byte_buffer(buffer_length as usize);
353
354                let result = unsafe {
355                    self.bindings().FPDFTextObj_GetText(
356                        self.object_handle(),
357                        text_handle,
358                        buffer.as_mut_ptr() as *mut FPDF_WCHAR,
359                        buffer_length,
360                    )
361                };
362
363                assert_eq!(result, buffer_length);
364
365                unsafe {
366                    self.bindings().FPDFText_ClosePage(text_handle);
367                }
368
369                get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
370            } else {
371                // The PdfPage containing this page object does not have an associated
372                // FPDF_TEXTPAGE object.
373
374                String::new()
375            }
376        } else {
377            // This page object is not contained by a PdfPage.
378
379            String::new()
380        }
381    }
382
383    /// Sets the text contained within this [PdfPageTextObject], replacing any existing text.
384    ///
385    /// A single space will be used if the given text is empty, in order to avoid
386    /// unexpected behaviour from Pdfium when dealing with an empty string.
387    pub fn set_text(&mut self, text: impl ToString) -> Result<(), PdfiumError> {
388        let text = text.to_string();
389
390        let text = if text.is_empty() { " " } else { text.as_str() };
391
392        if self.bindings().is_true(unsafe {
393            self.bindings()
394                .FPDFText_SetText_str(self.object_handle(), text)
395        }) {
396            Ok(())
397        } else {
398            Err(PdfiumError::PdfiumLibraryInternalError(
399                PdfiumInternalError::Unknown,
400            ))
401        }
402    }
403
404    /// Sets the text rendering mode for the text contained within this [PdfPageTextObject].
405    pub fn set_render_mode(
406        &mut self,
407        render_mode: PdfPageTextRenderMode,
408    ) -> Result<(), PdfiumError> {
409        if self.bindings().is_true(unsafe {
410            self.bindings()
411                .FPDFTextObj_SetTextRenderMode(self.object_handle(), render_mode.as_pdfium())
412        }) {
413            Ok(())
414        } else {
415            Err(PdfiumError::PdfiumLibraryInternalError(
416                PdfiumInternalError::Unknown,
417            ))
418        }
419    }
420
421    #[cfg(any(
422        feature = "pdfium_future",
423        feature = "pdfium_7543",
424        feature = "pdfium_7350",
425        feature = "pdfium_7215",
426        feature = "pdfium_7123",
427        feature = "pdfium_6996",
428        feature = "pdfium_6721",
429        feature = "pdfium_6666",
430        feature = "pdfium_6611",
431    ))]
432    /// Returns a collection of the characters contained within this [PdfPageTextObject],
433    /// using character retrieval functionality provided by the given [PdfPageText] object.
434    #[inline]
435    pub fn chars(&self, text: &'a PdfPageText<'a>) -> Result<PdfPageTextChars<'a>, PdfiumError> {
436        text.chars_for_object(self)
437    }
438
439    #[cfg(any(
440        feature = "pdfium_future",
441        feature = "pdfium_7543",
442        feature = "pdfium_7350",
443        feature = "pdfium_7215",
444        feature = "pdfium_7123",
445        feature = "pdfium_6996",
446        feature = "pdfium_6721",
447        feature = "pdfium_6666",
448        feature = "pdfium_6611",
449    ))]
450    /// Returns `true` if any of the characters contained within this [PdfPageTextObject] have a
451    /// glyph shape that descends below the font baseline.
452    ///
453    /// Character retrieval functionality is provided by the given [PdfPageText] object.
454    #[inline]
455    pub fn has_descenders(&self, text: &PdfPageText) -> Result<bool, PdfiumError> {
456        self.chars(text)
457            .map(|chars| chars.iter().any(|char| char.has_descender()))
458    }
459
460    #[cfg(any(
461        feature = "pdfium_future",
462        feature = "pdfium_7543",
463        feature = "pdfium_7350",
464        feature = "pdfium_7215",
465        feature = "pdfium_7123",
466        feature = "pdfium_6996",
467        feature = "pdfium_6721",
468        feature = "pdfium_6666",
469        feature = "pdfium_6611",
470    ))]
471    /// Returns the descent of this [PdfPageTextObject]. The descent is the maximum distance below
472    /// the baseline reached by any glyph in any of the characters contained in this text object,
473    /// expressed as a negative points value.
474    ///
475    /// Character retrieval and bounds measurement is provided by the given [PdfPageText] object.
476    pub fn descent(&self, text: &PdfPageText) -> Result<PdfPoints, PdfiumError> {
477        let object_bottom = self.get_vertical_translation();
478
479        let mut maximum_descent = object_bottom;
480
481        for char in self.chars(text)?.iter() {
482            let char_bottom = char.tight_bounds()?.bottom();
483
484            if char_bottom < maximum_descent {
485                maximum_descent = char_bottom;
486            }
487        }
488
489        Ok(maximum_descent - object_bottom)
490    }
491
492    create_transform_setters!(
493        &mut Self,
494        Result<(), PdfiumError>,
495        "this [PdfPageTextObject]",
496        "this [PdfPageTextObject].",
497        "this [PdfPageTextObject],"
498    );
499
500    // The transform_impl() function required by the create_transform_setters!() macro
501    // is provided by the PdfPageObjectPrivate trait.
502
503    create_transform_getters!(
504        "this [PdfPageTextObject]",
505        "this [PdfPageTextObject].",
506        "this [PdfPageTextObject],"
507    );
508
509    // The get_matrix_impl() function required by the create_transform_getters!() macro
510    // is provided by the PdfPageObjectPrivate trait.
511}
512
513impl<'a> PdfPageObjectPrivate<'a> for PdfPageTextObject<'a> {
514    #[inline]
515    fn object_handle(&self) -> FPDF_PAGEOBJECT {
516        self.object_handle
517    }
518
519    #[inline]
520    fn ownership(&self) -> &PdfPageObjectOwnership {
521        &self.ownership
522    }
523
524    #[inline]
525    fn set_ownership(&mut self, ownership: PdfPageObjectOwnership) {
526        self.ownership = ownership;
527    }
528}
529
530impl<'a> Drop for PdfPageTextObject<'a> {
531    /// Closes this [PdfPageTextObject], releasing held memory.
532    fn drop(&mut self) {
533        self.drop_impl();
534    }
535}
536
537impl<'a> PdfiumLibraryBindingsAccessor<'a> for PdfPageTextObject<'a> {}
538
539#[cfg(feature = "thread_safe")]
540unsafe impl<'a> Send for PdfPageTextObject<'a> {}
541
542#[cfg(feature = "thread_safe")]
543unsafe impl<'a> Sync for PdfPageTextObject<'a> {}