pdfium_render/pdf/document/page/object/
text.rs

1//! Defines the [PdfPageTextObject] struct, exposing functionality related to a single
2//! page object defining a piece of formatted text.
3
4use crate::bindgen::{
5    FPDF_DOCUMENT, FPDF_FONT, FPDF_PAGEOBJECT, FPDF_TEXT_RENDERMODE,
6    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP, FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
7    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP,
8    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE,
9    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
10    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
11    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE,
12    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP,
13    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN, FPDF_WCHAR,
14};
15use crate::bindings::PdfiumLibraryBindings;
16use crate::error::{PdfiumError, PdfiumInternalError};
17use crate::pdf::document::fonts::ToPdfFontToken;
18use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
19use crate::pdf::document::page::object::{
20    PdfPageObject, PdfPageObjectCommon, PdfPageObjectOwnership,
21};
22
23use crate::pdf::document::PdfDocument;
24use crate::pdf::font::PdfFont;
25use crate::pdf::matrix::{PdfMatrix, PdfMatrixValue};
26use crate::pdf::points::PdfPoints;
27use crate::utils::mem::create_byte_buffer;
28use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;
29use crate::{create_transform_getters, create_transform_setters};
30
31#[cfg(any(
32    feature = "pdfium_future",
33    feature = "pdfium_7350",
34    feature = "pdfium_7215",
35    feature = "pdfium_7123",
36    feature = "pdfium_6996",
37    feature = "pdfium_6721",
38    feature = "pdfium_6666",
39    feature = "pdfium_6611",
40))]
41use {
42    crate::pdf::document::page::text::chars::PdfPageTextChars,
43    crate::pdf::document::page::text::PdfPageText,
44};
45
46#[cfg(doc)]
47use {
48    crate::pdf::document::page::object::PdfPageObjectType,
49    crate::pdf::document::page::objects::common::PdfPageObjectsCommon,
50    crate::pdf::document::page::PdfPage,
51};
52
53/// The text rendering modes supported by the PDF standard, as listed in table 5.3
54/// on page 402 in the PDF Reference manual version 1.7.
55#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
56pub enum PdfPageTextRenderMode {
57    /// The text render mode is not recognized by Pdfium.
58    Unknown = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN as isize,
59
60    /// The text will be filled, but not stroked.
61    FilledUnstroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL as isize,
62
63    /// The text will be stroked, but not filled.
64    StrokedUnfilled = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE as isize,
65
66    /// The text will be filled, then stroked.
67    FilledThenStroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE as isize,
68
69    /// The text will be neither filled nor stroked. It will still take up size in the layout, however.
70    Invisible = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE as isize,
71
72    /// The text will be filled and added to the path for clipping.
73    FilledUnstrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP as isize,
74
75    /// The text will be stroked and added to the path for clipping.
76    StrokedUnfilledClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP as isize,
77
78    /// The text will be filled, then stroked, and added to the path for clipping.
79    FilledThenStrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP as isize,
80
81    /// The text will be neither filled nor stroked, only added to the path for clipping.
82    InvisibleClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP as isize,
83}
84
85impl PdfPageTextRenderMode {
86    #[inline]
87    pub(crate) fn from_pdfium(value: i32) -> Result<PdfPageTextRenderMode, PdfiumError> {
88        match value {
89            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN => Ok(PdfPageTextRenderMode::Unknown),
90            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL => {
91                Ok(PdfPageTextRenderMode::FilledUnstroked)
92            }
93            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE => {
94                Ok(PdfPageTextRenderMode::StrokedUnfilled)
95            }
96            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE => {
97                Ok(PdfPageTextRenderMode::FilledThenStroked)
98            }
99            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE => {
100                Ok(PdfPageTextRenderMode::Invisible)
101            }
102            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP => {
103                Ok(PdfPageTextRenderMode::FilledUnstrokedClipping)
104            }
105            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP => {
106                Ok(PdfPageTextRenderMode::StrokedUnfilledClipping)
107            }
108            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP => {
109                Ok(PdfPageTextRenderMode::FilledThenStrokedClipping)
110            }
111            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP => {
112                Ok(PdfPageTextRenderMode::InvisibleClipping)
113            }
114            _ => Err(PdfiumError::UnknownPdfPageTextRenderMode),
115        }
116    }
117
118    #[inline]
119    #[allow(dead_code)]
120    // The as_pdfium() function is not currently used, but we expect it to be in future
121    pub(crate) fn as_pdfium(&self) -> FPDF_TEXT_RENDERMODE {
122        match self {
123            PdfPageTextRenderMode::Unknown => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN,
124            PdfPageTextRenderMode::FilledUnstroked => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
125            PdfPageTextRenderMode::StrokedUnfilled => {
126                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE
127            }
128            PdfPageTextRenderMode::FilledThenStroked => {
129                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE
130            }
131            PdfPageTextRenderMode::Invisible => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
132            PdfPageTextRenderMode::FilledUnstrokedClipping => {
133                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP
134            }
135            PdfPageTextRenderMode::StrokedUnfilledClipping => {
136                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP
137            }
138            PdfPageTextRenderMode::FilledThenStrokedClipping => {
139                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP
140            }
141            PdfPageTextRenderMode::InvisibleClipping => {
142                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP
143            }
144        }
145    }
146}
147
148/// A single [PdfPageObject] of type [PdfPageObjectType::Text]. The page object defines a single
149/// piece of formatted text.
150///
151/// Page objects can be created either attached to a [PdfPage] (in which case the page object's
152/// memory is owned by the containing page) or detached from any page (in which case the page
153/// object's memory is owned by the object). Page objects are not rendered until they are
154/// attached to a page; page objects that are never attached to a page will be lost when they
155/// fall out of scope.
156///
157/// The simplest way to create a page text object that is immediately attached to a page
158/// is to call the [PdfPageObjectsCommon::create_text_object()] function.
159///
160/// Creating a detached page text object offers more scope for customization, but you must
161/// add the object to a containing [PdfPage] manually. To create a detached page text object,
162/// use the [PdfPageTextObject::new()] function. The detached page text object can later
163/// be attached to a page by using the [PdfPageObjectsCommon::add_text_object()] function.
164pub struct PdfPageTextObject<'a> {
165    object_handle: FPDF_PAGEOBJECT,
166    ownership: PdfPageObjectOwnership,
167    bindings: &'a dyn PdfiumLibraryBindings,
168}
169
170impl<'a> PdfPageTextObject<'a> {
171    #[inline]
172    pub(crate) fn from_pdfium(
173        object_handle: FPDF_PAGEOBJECT,
174        ownership: PdfPageObjectOwnership,
175        bindings: &'a dyn PdfiumLibraryBindings,
176    ) -> Self {
177        PdfPageTextObject {
178            object_handle,
179            ownership,
180            bindings,
181        }
182    }
183
184    /// Creates a new [PdfPageTextObject] from the given arguments. The returned page object
185    /// will not be rendered until it is added to a [PdfPage] using the
186    /// [PdfPageObjectsCommon::add_text_object()] function.
187    ///
188    /// A single space will be used if the given text is empty, in order to avoid
189    /// unexpected behaviour from Pdfium when dealing with empty strings.
190    // Specifically, `FPDFPageObj_SetText()` will crash if we try to apply an empty string to a
191    // text object, and `FPDFText_LoadPage()` will crash if any text object on the page contains
192    // an empty string (so it isn't enough to avoid calling `FPDFPageObj_SetText()` for an empty
193    // text object, we _have_ to set a non-empty string to avoid segfaults).
194    #[inline]
195    pub fn new(
196        document: &PdfDocument<'a>,
197        text: impl ToString,
198        font: impl ToPdfFontToken,
199        font_size: PdfPoints,
200    ) -> Result<Self, PdfiumError> {
201        Self::new_from_handles(
202            document.handle(),
203            text,
204            font.token().handle(),
205            font_size,
206            document.bindings(),
207        )
208    }
209
210    // Take raw `FPDF_DOCUMENT` and `FPDF_FONT` handles to avoid cascading lifetime problems
211    // associated with borrowing `PdfDocument<'a>` and/or `PdfFont<'a>`.
212    pub(crate) fn new_from_handles(
213        document: FPDF_DOCUMENT,
214        text: impl ToString,
215        font: FPDF_FONT,
216        font_size: PdfPoints,
217        bindings: &'a dyn PdfiumLibraryBindings,
218    ) -> Result<Self, PdfiumError> {
219        let handle = bindings.FPDFPageObj_CreateTextObj(document, font, font_size.value);
220
221        if handle.is_null() {
222            Err(PdfiumError::PdfiumLibraryInternalError(
223                PdfiumInternalError::Unknown,
224            ))
225        } else {
226            let mut result = PdfPageTextObject {
227                object_handle: handle,
228                ownership: PdfPageObjectOwnership::unowned(),
229                bindings,
230            };
231
232            result.set_text(text)?;
233
234            Ok(result)
235        }
236    }
237
238    /// Returns the text rendering mode for the text contained within this [PdfPageTextObject].
239    pub fn render_mode(&self) -> PdfPageTextRenderMode {
240        PdfPageTextRenderMode::from_pdfium(
241            self.bindings()
242                .FPDFTextObj_GetTextRenderMode(self.object_handle),
243        )
244        .unwrap_or(PdfPageTextRenderMode::Unknown)
245    }
246
247    /// Returns the effective size of the text when rendered, taking into account both the
248    /// font size specified in this text object as well as any vertical scale factor applied
249    /// to the text object's transformation matrix.
250    ///
251    /// To retrieve only the specified font size, ignoring any vertical scaling, use the
252    /// [PdfPageTextObject::unscaled_font_size()] function.
253    #[inline]
254    pub fn scaled_font_size(&self) -> PdfPoints {
255        PdfPoints::new(self.unscaled_font_size().value * self.get_vertical_scale())
256    }
257
258    /// Returns the font size of the text specified in this [PdfPageTextObject].
259    ///
260    /// Note that the effective size of the text when rendered may differ from the font size
261    /// if a scaling factor has been applied to this text object's transformation matrix.
262    /// To retrieve the effective font size, taking vertical scaling into account, use the
263    /// [PdfPageTextObject::scaled_font_size()] function.
264    pub fn unscaled_font_size(&self) -> PdfPoints {
265        let mut result = 0.0;
266
267        if self.bindings().is_true(
268            self.bindings()
269                .FPDFTextObj_GetFontSize(self.object_handle, &mut result),
270        ) {
271            PdfPoints::new(result)
272        } else {
273            PdfPoints::ZERO
274        }
275    }
276
277    /// Returns the [PdfFont] used to render the text contained within this [PdfPageTextObject].
278    pub fn font(&self) -> PdfFont {
279        PdfFont::from_pdfium(
280            self.bindings().FPDFTextObj_GetFont(self.object_handle),
281            self.bindings(),
282            None,
283            false,
284        )
285    }
286
287    /// Returns the text contained within this [PdfPageTextObject].
288    ///
289    /// Text retrieval in Pdfium is handled by the [PdfPageText] object owned by the [PdfPage]
290    /// containing this [PdfPageTextObject]. If this text object has not been attached to a page
291    /// then text retrieval will be unavailable and an empty string will be returned.
292    ///
293    /// When retrieving the text from many [PdfPageTextObject] objects (for instance, as part of
294    /// a loop or an iterator), it may be faster to open the [PdfPageText] object once and keep
295    /// it open while processing the text objects, like so:
296    ///
297    /// ```
298    /// let text_page = page.text()?; // Opens the text page once.
299    ///
300    /// for object in <some object iterator> {
301    ///     let object_text = text_page.for_object(object)?;
302    /// }
303    /// ```
304    ///
305    /// The [PdfPageText] object will be closed when the binding to it (`text_page` in the example above)
306    /// falls out of scope.
307    pub fn text(&self) -> String {
308        // Retrieving the text from Pdfium is a two-step operation. First, we call
309        // FPDFTextObj_GetText() with a null buffer; this will retrieve the length of
310        // the text in bytes. If the length is zero, then there is no text associated
311        // with the page object.
312
313        // If the length is non-zero, then we reserve a byte buffer of the given
314        // length and call FPDFTextObj_GetText() again with a pointer to the buffer;
315        // this will write the text to the buffer in UTF16-LE format.
316
317        let page_handle = match self.ownership() {
318            PdfPageObjectOwnership::Page(ownership) => Some(ownership.page_handle()),
319            PdfPageObjectOwnership::AttachedAnnotation(ownership) => Some(ownership.page_handle()),
320            _ => None,
321        };
322
323        if let Some(page_handle) = page_handle {
324            let text_handle = self.bindings().FPDFText_LoadPage(page_handle);
325
326            if !text_handle.is_null() {
327                let buffer_length = self.bindings().FPDFTextObj_GetText(
328                    self.object_handle(),
329                    text_handle,
330                    std::ptr::null_mut(),
331                    0,
332                );
333
334                if buffer_length == 0 {
335                    // There is no text.
336
337                    return String::new();
338                }
339
340                let mut buffer = create_byte_buffer(buffer_length as usize);
341
342                let result = self.bindings().FPDFTextObj_GetText(
343                    self.object_handle(),
344                    text_handle,
345                    buffer.as_mut_ptr() as *mut FPDF_WCHAR,
346                    buffer_length,
347                );
348
349                assert_eq!(result, buffer_length);
350
351                self.bindings.FPDFText_ClosePage(text_handle);
352
353                get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
354            } else {
355                // The PdfPage containing this page object does not have an associated
356                // FPDF_TEXTPAGE object.
357
358                String::new()
359            }
360        } else {
361            // This page object is not contained by a PdfPage.
362
363            String::new()
364        }
365    }
366
367    /// Sets the text contained within this [PdfPageTextObject], replacing any existing text.
368    ///
369    /// A single space will be used if the given text is empty, in order to avoid
370    /// unexpected behaviour from Pdfium when dealing with an empty string.
371    pub fn set_text(&mut self, text: impl ToString) -> Result<(), PdfiumError> {
372        let text = text.to_string();
373
374        let text = if text.is_empty() { " " } else { text.as_str() };
375
376        if self.bindings().is_true(
377            self.bindings()
378                .FPDFText_SetText_str(self.object_handle(), text),
379        ) {
380            Ok(())
381        } else {
382            Err(PdfiumError::PdfiumLibraryInternalError(
383                PdfiumInternalError::Unknown,
384            ))
385        }
386    }
387
388    /// Sets the text rendering mode for the text contained within this [PdfPageTextObject].
389    pub fn set_render_mode(
390        &mut self,
391        render_mode: PdfPageTextRenderMode,
392    ) -> Result<(), PdfiumError> {
393        if self.bindings().is_true(
394            self.bindings()
395                .FPDFTextObj_SetTextRenderMode(self.object_handle(), render_mode.as_pdfium()),
396        ) {
397            Ok(())
398        } else {
399            Err(PdfiumError::PdfiumLibraryInternalError(
400                PdfiumInternalError::Unknown,
401            ))
402        }
403    }
404
405    #[cfg(any(
406        feature = "pdfium_future",
407        feature = "pdfium_7350",
408        feature = "pdfium_7215",
409        feature = "pdfium_7123",
410        feature = "pdfium_6996",
411        feature = "pdfium_6721",
412        feature = "pdfium_6666",
413        feature = "pdfium_6611",
414    ))]
415    /// Returns a collection of the characters contained within this [PdfPageTextObject],
416    /// using character retrieval functionality provided by the given [PdfPageText] object.
417    #[inline]
418    pub fn chars(&self, text: &'a PdfPageText<'a>) -> Result<PdfPageTextChars<'a>, PdfiumError> {
419        text.chars_for_object(self)
420    }
421
422    #[cfg(any(
423        feature = "pdfium_future",
424        feature = "pdfium_7350",
425        feature = "pdfium_7215",
426        feature = "pdfium_7123",
427        feature = "pdfium_6996",
428        feature = "pdfium_6721",
429        feature = "pdfium_6666",
430        feature = "pdfium_6611",
431    ))]
432    /// Returns `true` if any of the characters contained within this [PdfPageTextObject] have a
433    /// glyph shape that descends below the font baseline.
434    ///
435    /// Character retrieval functionality is provided by the given [PdfPageText] object.
436    #[inline]
437    pub fn has_descenders(&self, text: &PdfPageText) -> Result<bool, PdfiumError> {
438        self.chars(text)
439            .map(|chars| chars.iter().any(|char| char.has_descender()))
440    }
441
442    #[cfg(any(
443        feature = "pdfium_future",
444        feature = "pdfium_7350",
445        feature = "pdfium_7215",
446        feature = "pdfium_7123",
447        feature = "pdfium_6996",
448        feature = "pdfium_6721",
449        feature = "pdfium_6666",
450        feature = "pdfium_6611",
451    ))]
452    /// Returns the descent of this [PdfPageTextObject]. The descent is the maximum distance below
453    /// the baseline reached by any glyph in any of the characters contained in this text object,
454    /// expressed as a negative points value.
455    ///
456    /// Character retrieval and bounds measurement is provided by the given [PdfPageText] object.
457    pub fn descent(&self, text: &PdfPageText) -> Result<PdfPoints, PdfiumError> {
458        let object_bottom = self.get_vertical_translation();
459
460        let mut maximum_descent = object_bottom;
461
462        for char in self.chars(text)?.iter() {
463            let char_bottom = char.tight_bounds()?.bottom();
464
465            if char_bottom < maximum_descent {
466                maximum_descent = char_bottom;
467            }
468        }
469
470        Ok(maximum_descent - object_bottom)
471    }
472
473    create_transform_setters!(
474        &mut Self,
475        Result<(), PdfiumError>,
476        "this [PdfPageTextObject]",
477        "this [PdfPageTextObject].",
478        "this [PdfPageTextObject],"
479    );
480
481    // The transform_impl() function required by the create_transform_setters!() macro
482    // is provided by the PdfPageObjectPrivate trait.
483
484    create_transform_getters!(
485        "this [PdfPageTextObject]",
486        "this [PdfPageTextObject].",
487        "this [PdfPageTextObject],"
488    );
489
490    // The get_matrix_impl() function required by the create_transform_getters!() macro
491    // is provided by the PdfPageObjectPrivate trait.
492}
493
494impl<'a> PdfPageObjectPrivate<'a> for PdfPageTextObject<'a> {
495    #[inline]
496    fn object_handle(&self) -> FPDF_PAGEOBJECT {
497        self.object_handle
498    }
499
500    #[inline]
501    fn ownership(&self) -> &PdfPageObjectOwnership {
502        &self.ownership
503    }
504
505    #[inline]
506    fn set_ownership(&mut self, ownership: PdfPageObjectOwnership) {
507        self.ownership = ownership;
508    }
509
510    #[inline]
511    fn bindings(&self) -> &dyn PdfiumLibraryBindings {
512        self.bindings
513    }
514
515    #[inline]
516    fn is_copyable_impl(&self) -> bool {
517        true
518    }
519
520    #[inline]
521    fn try_copy_impl<'b>(
522        &self,
523        document: FPDF_DOCUMENT,
524        bindings: &'b dyn PdfiumLibraryBindings,
525    ) -> Result<PdfPageObject<'b>, PdfiumError> {
526        let mut copy = PdfPageTextObject::new_from_handles(
527            document,
528            self.text(),
529            self.font().handle(),
530            self.unscaled_font_size(),
531            bindings,
532        )?;
533
534        copy.set_fill_color(self.fill_color()?)?;
535        copy.set_stroke_color(self.stroke_color()?)?;
536        copy.set_stroke_width(self.stroke_width()?)?;
537        copy.set_line_join(self.line_join()?)?;
538        copy.set_line_cap(self.line_cap()?)?;
539        copy.reset_matrix(self.matrix()?)?;
540
541        Ok(PdfPageObject::Text(copy))
542    }
543}