pdfium_render/pdf/document/page/object/
text.rs

1//! Defines the [PdfPageTextObject] struct, exposing functionality related to a single
2//! page object defining a piece of formatted text.
3
4use crate::bindgen::{
5    FPDF_DOCUMENT, FPDF_FONT, FPDF_PAGEOBJECT, FPDF_TEXT_RENDERMODE,
6    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP, FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
7    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP,
8    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE,
9    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
10    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
11    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE,
12    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP,
13    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN, FPDF_WCHAR,
14};
15use crate::bindings::PdfiumLibraryBindings;
16use crate::error::{PdfiumError, PdfiumInternalError};
17use crate::pdf::document::fonts::ToPdfFontToken;
18use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
19use crate::pdf::document::page::object::{
20    PdfPageObject, PdfPageObjectCommon, PdfPageObjectOwnership,
21};
22
23use crate::pdf::document::PdfDocument;
24use crate::pdf::font::PdfFont;
25use crate::pdf::matrix::{PdfMatrix, PdfMatrixValue};
26use crate::pdf::points::PdfPoints;
27use crate::utils::mem::create_byte_buffer;
28use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;
29use crate::{create_transform_getters, create_transform_setters};
30
31#[cfg(any(
32    feature = "pdfium_future",
33    feature = "pdfium_7543",
34    feature = "pdfium_7350",
35    feature = "pdfium_7215",
36    feature = "pdfium_7123",
37    feature = "pdfium_6996",
38    feature = "pdfium_6721",
39    feature = "pdfium_6666",
40    feature = "pdfium_6611",
41))]
42use {
43    crate::pdf::document::page::text::chars::PdfPageTextChars,
44    crate::pdf::document::page::text::PdfPageText,
45};
46
47#[cfg(doc)]
48use {
49    crate::pdf::document::page::object::PdfPageObjectType,
50    crate::pdf::document::page::objects::common::PdfPageObjectsCommon,
51    crate::pdf::document::page::PdfPage,
52};
53
54/// The text rendering modes supported by the PDF standard, as listed in table 5.3
55/// on page 402 in the PDF Reference manual version 1.7.
56#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
57pub enum PdfPageTextRenderMode {
58    /// The text render mode is not recognized by Pdfium.
59    Unknown = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN as isize,
60
61    /// The text will be filled, but not stroked.
62    FilledUnstroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL as isize,
63
64    /// The text will be stroked, but not filled.
65    StrokedUnfilled = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE as isize,
66
67    /// The text will be filled, then stroked.
68    FilledThenStroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE as isize,
69
70    /// The text will be neither filled nor stroked. It will still take up size in the layout, however.
71    Invisible = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE as isize,
72
73    /// The text will be filled and added to the path for clipping.
74    FilledUnstrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP as isize,
75
76    /// The text will be stroked and added to the path for clipping.
77    StrokedUnfilledClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP as isize,
78
79    /// The text will be filled, then stroked, and added to the path for clipping.
80    FilledThenStrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP as isize,
81
82    /// The text will be neither filled nor stroked, only added to the path for clipping.
83    InvisibleClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP as isize,
84}
85
86impl PdfPageTextRenderMode {
87    #[inline]
88    pub(crate) fn from_pdfium(value: i32) -> Result<PdfPageTextRenderMode, PdfiumError> {
89        match value {
90            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN => Ok(PdfPageTextRenderMode::Unknown),
91            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL => {
92                Ok(PdfPageTextRenderMode::FilledUnstroked)
93            }
94            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE => {
95                Ok(PdfPageTextRenderMode::StrokedUnfilled)
96            }
97            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE => {
98                Ok(PdfPageTextRenderMode::FilledThenStroked)
99            }
100            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE => {
101                Ok(PdfPageTextRenderMode::Invisible)
102            }
103            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP => {
104                Ok(PdfPageTextRenderMode::FilledUnstrokedClipping)
105            }
106            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP => {
107                Ok(PdfPageTextRenderMode::StrokedUnfilledClipping)
108            }
109            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP => {
110                Ok(PdfPageTextRenderMode::FilledThenStrokedClipping)
111            }
112            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP => {
113                Ok(PdfPageTextRenderMode::InvisibleClipping)
114            }
115            _ => Err(PdfiumError::UnknownPdfPageTextRenderMode),
116        }
117    }
118
119    #[inline]
120    #[allow(dead_code)]
121    // The as_pdfium() function is not currently used, but we expect it to be in future
122    pub(crate) fn as_pdfium(&self) -> FPDF_TEXT_RENDERMODE {
123        match self {
124            PdfPageTextRenderMode::Unknown => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN,
125            PdfPageTextRenderMode::FilledUnstroked => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
126            PdfPageTextRenderMode::StrokedUnfilled => {
127                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE
128            }
129            PdfPageTextRenderMode::FilledThenStroked => {
130                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE
131            }
132            PdfPageTextRenderMode::Invisible => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
133            PdfPageTextRenderMode::FilledUnstrokedClipping => {
134                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP
135            }
136            PdfPageTextRenderMode::StrokedUnfilledClipping => {
137                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP
138            }
139            PdfPageTextRenderMode::FilledThenStrokedClipping => {
140                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP
141            }
142            PdfPageTextRenderMode::InvisibleClipping => {
143                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP
144            }
145        }
146    }
147}
148
149/// A single [PdfPageObject] of type [PdfPageObjectType::Text]. The page object defines a single
150/// piece of formatted text.
151///
152/// Page objects can be created either attached to a [PdfPage] (in which case the page object's
153/// memory is owned by the containing page) or detached from any page (in which case the page
154/// object's memory is owned by the object). Page objects are not rendered until they are
155/// attached to a page; page objects that are never attached to a page will be lost when they
156/// fall out of scope.
157///
158/// The simplest way to create a page text object that is immediately attached to a page
159/// is to call the [PdfPageObjectsCommon::create_text_object()] function.
160///
161/// Creating a detached page text object offers more scope for customization, but you must
162/// add the object to a containing [PdfPage] manually. To create a detached page text object,
163/// use the [PdfPageTextObject::new()] function. The detached page text object can later
164/// be attached to a page by using the [PdfPageObjectsCommon::add_text_object()] function.
165pub struct PdfPageTextObject<'a> {
166    object_handle: FPDF_PAGEOBJECT,
167    ownership: PdfPageObjectOwnership,
168    bindings: &'a dyn PdfiumLibraryBindings,
169}
170
171impl<'a> PdfPageTextObject<'a> {
172    #[inline]
173    pub(crate) fn from_pdfium(
174        object_handle: FPDF_PAGEOBJECT,
175        ownership: PdfPageObjectOwnership,
176        bindings: &'a dyn PdfiumLibraryBindings,
177    ) -> Self {
178        PdfPageTextObject {
179            object_handle,
180            ownership,
181            bindings,
182        }
183    }
184
185    /// Creates a new [PdfPageTextObject] from the given arguments. The returned page object
186    /// will not be rendered until it is added to a [PdfPage] using the
187    /// [PdfPageObjectsCommon::add_text_object()] function.
188    ///
189    /// A single space will be used if the given text is empty, in order to avoid
190    /// unexpected behaviour from Pdfium when dealing with empty strings.
191    // Specifically, `FPDFPageObj_SetText()` will crash if we try to apply an empty string to a
192    // text object, and `FPDFText_LoadPage()` will crash if any text object on the page contains
193    // an empty string (so it isn't enough to avoid calling `FPDFPageObj_SetText()` for an empty
194    // text object, we _have_ to set a non-empty string to avoid segfaults).
195    #[inline]
196    pub fn new(
197        document: &PdfDocument<'a>,
198        text: impl ToString,
199        font: impl ToPdfFontToken,
200        font_size: PdfPoints,
201    ) -> Result<Self, PdfiumError> {
202        Self::new_from_handles(
203            document.handle(),
204            text,
205            font.token().handle(),
206            font_size,
207            document.bindings(),
208        )
209    }
210
211    // Take raw `FPDF_DOCUMENT` and `FPDF_FONT` handles to avoid cascading lifetime problems
212    // associated with borrowing `PdfDocument<'a>` and/or `PdfFont<'a>`.
213    pub(crate) fn new_from_handles(
214        document: FPDF_DOCUMENT,
215        text: impl ToString,
216        font: FPDF_FONT,
217        font_size: PdfPoints,
218        bindings: &'a dyn PdfiumLibraryBindings,
219    ) -> Result<Self, PdfiumError> {
220        let handle = bindings.FPDFPageObj_CreateTextObj(document, font, font_size.value);
221
222        if handle.is_null() {
223            Err(PdfiumError::PdfiumLibraryInternalError(
224                PdfiumInternalError::Unknown,
225            ))
226        } else {
227            let mut result = PdfPageTextObject {
228                object_handle: handle,
229                ownership: PdfPageObjectOwnership::unowned(),
230                bindings,
231            };
232
233            result.set_text(text)?;
234
235            Ok(result)
236        }
237    }
238
239    /// Returns the text rendering mode for the text contained within this [PdfPageTextObject].
240    pub fn render_mode(&self) -> PdfPageTextRenderMode {
241        PdfPageTextRenderMode::from_pdfium(
242            self.bindings()
243                .FPDFTextObj_GetTextRenderMode(self.object_handle),
244        )
245        .unwrap_or(PdfPageTextRenderMode::Unknown)
246    }
247
248    /// Returns the effective size of the text when rendered, taking into account both the
249    /// font size specified in this text object as well as any vertical scale factor applied
250    /// to the text object's transformation matrix.
251    ///
252    /// To retrieve only the specified font size, ignoring any vertical scaling, use the
253    /// [PdfPageTextObject::unscaled_font_size()] function.
254    #[inline]
255    pub fn scaled_font_size(&self) -> PdfPoints {
256        PdfPoints::new(self.unscaled_font_size().value * self.get_vertical_scale())
257    }
258
259    /// Returns the font size of the text specified in this [PdfPageTextObject].
260    ///
261    /// Note that the effective size of the text when rendered may differ from the font size
262    /// if a scaling factor has been applied to this text object's transformation matrix.
263    /// To retrieve the effective font size, taking vertical scaling into account, use the
264    /// [PdfPageTextObject::scaled_font_size()] function.
265    pub fn unscaled_font_size(&self) -> PdfPoints {
266        let mut result = 0.0;
267
268        if self.bindings().is_true(
269            self.bindings()
270                .FPDFTextObj_GetFontSize(self.object_handle, &mut result),
271        ) {
272            PdfPoints::new(result)
273        } else {
274            PdfPoints::ZERO
275        }
276    }
277
278    /// Returns the [PdfFont] used to render the text contained within this [PdfPageTextObject].
279    pub fn font(&self) -> PdfFont<'_> {
280        PdfFont::from_pdfium(
281            self.bindings().FPDFTextObj_GetFont(self.object_handle),
282            self.bindings(),
283            None,
284            false,
285        )
286    }
287
288    /// Returns the text contained within this [PdfPageTextObject].
289    ///
290    /// Text retrieval in Pdfium is handled by the [PdfPageText] object owned by the [PdfPage]
291    /// containing this [PdfPageTextObject]. If this text object has not been attached to a page
292    /// then text retrieval will be unavailable and an empty string will be returned.
293    ///
294    /// When retrieving the text from many [PdfPageTextObject] objects (for instance, as part of
295    /// a loop or an iterator), it may be faster to open the [PdfPageText] object once and keep
296    /// it open while processing the text objects, like so:
297    ///
298    /// ```
299    /// let text_page = page.text()?; // Opens the text page once.
300    ///
301    /// for object in <some object iterator> {
302    ///     let object_text = text_page.for_object(object)?;
303    /// }
304    /// ```
305    ///
306    /// The [PdfPageText] object will be closed when the binding to it (`text_page` in the example above)
307    /// falls out of scope.
308    pub fn text(&self) -> String {
309        // Retrieving the text from Pdfium is a two-step operation. First, we call
310        // FPDFTextObj_GetText() with a null buffer; this will retrieve the length of
311        // the text in bytes. If the length is zero, then there is no text associated
312        // with the page object.
313
314        // If the length is non-zero, then we reserve a byte buffer of the given
315        // length and call FPDFTextObj_GetText() again with a pointer to the buffer;
316        // this will write the text to the buffer in UTF16-LE format.
317
318        let page_handle = match self.ownership() {
319            PdfPageObjectOwnership::Page(ownership) => Some(ownership.page_handle()),
320            PdfPageObjectOwnership::AttachedAnnotation(ownership) => Some(ownership.page_handle()),
321            _ => None,
322        };
323
324        if let Some(page_handle) = page_handle {
325            let text_handle = self.bindings().FPDFText_LoadPage(page_handle);
326
327            if !text_handle.is_null() {
328                let buffer_length = self.bindings().FPDFTextObj_GetText(
329                    self.object_handle(),
330                    text_handle,
331                    std::ptr::null_mut(),
332                    0,
333                );
334
335                if buffer_length == 0 {
336                    // There is no text.
337
338                    return String::new();
339                }
340
341                let mut buffer = create_byte_buffer(buffer_length as usize);
342
343                let result = self.bindings().FPDFTextObj_GetText(
344                    self.object_handle(),
345                    text_handle,
346                    buffer.as_mut_ptr() as *mut FPDF_WCHAR,
347                    buffer_length,
348                );
349
350                assert_eq!(result, buffer_length);
351
352                self.bindings.FPDFText_ClosePage(text_handle);
353
354                get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
355            } else {
356                // The PdfPage containing this page object does not have an associated
357                // FPDF_TEXTPAGE object.
358
359                String::new()
360            }
361        } else {
362            // This page object is not contained by a PdfPage.
363
364            String::new()
365        }
366    }
367
368    /// Sets the text contained within this [PdfPageTextObject], replacing any existing text.
369    ///
370    /// A single space will be used if the given text is empty, in order to avoid
371    /// unexpected behaviour from Pdfium when dealing with an empty string.
372    pub fn set_text(&mut self, text: impl ToString) -> Result<(), PdfiumError> {
373        let text = text.to_string();
374
375        let text = if text.is_empty() { " " } else { text.as_str() };
376
377        if self.bindings().is_true(
378            self.bindings()
379                .FPDFText_SetText_str(self.object_handle(), text),
380        ) {
381            Ok(())
382        } else {
383            Err(PdfiumError::PdfiumLibraryInternalError(
384                PdfiumInternalError::Unknown,
385            ))
386        }
387    }
388
389    /// Sets the text rendering mode for the text contained within this [PdfPageTextObject].
390    pub fn set_render_mode(
391        &mut self,
392        render_mode: PdfPageTextRenderMode,
393    ) -> Result<(), PdfiumError> {
394        if self.bindings().is_true(
395            self.bindings()
396                .FPDFTextObj_SetTextRenderMode(self.object_handle(), render_mode.as_pdfium()),
397        ) {
398            Ok(())
399        } else {
400            Err(PdfiumError::PdfiumLibraryInternalError(
401                PdfiumInternalError::Unknown,
402            ))
403        }
404    }
405
406    #[cfg(any(
407        feature = "pdfium_future",
408        feature = "pdfium_7543",
409        feature = "pdfium_7350",
410        feature = "pdfium_7215",
411        feature = "pdfium_7123",
412        feature = "pdfium_6996",
413        feature = "pdfium_6721",
414        feature = "pdfium_6666",
415        feature = "pdfium_6611",
416    ))]
417    /// Returns a collection of the characters contained within this [PdfPageTextObject],
418    /// using character retrieval functionality provided by the given [PdfPageText] object.
419    #[inline]
420    pub fn chars(&self, text: &'a PdfPageText<'a>) -> Result<PdfPageTextChars<'a>, PdfiumError> {
421        text.chars_for_object(self)
422    }
423
424    #[cfg(any(
425        feature = "pdfium_future",
426        feature = "pdfium_7543",
427        feature = "pdfium_7350",
428        feature = "pdfium_7215",
429        feature = "pdfium_7123",
430        feature = "pdfium_6996",
431        feature = "pdfium_6721",
432        feature = "pdfium_6666",
433        feature = "pdfium_6611",
434    ))]
435    /// Returns `true` if any of the characters contained within this [PdfPageTextObject] have a
436    /// glyph shape that descends below the font baseline.
437    ///
438    /// Character retrieval functionality is provided by the given [PdfPageText] object.
439    #[inline]
440    pub fn has_descenders(&self, text: &PdfPageText) -> Result<bool, PdfiumError> {
441        self.chars(text)
442            .map(|chars| chars.iter().any(|char| char.has_descender()))
443    }
444
445    #[cfg(any(
446        feature = "pdfium_future",
447        feature = "pdfium_7543",
448        feature = "pdfium_7350",
449        feature = "pdfium_7215",
450        feature = "pdfium_7123",
451        feature = "pdfium_6996",
452        feature = "pdfium_6721",
453        feature = "pdfium_6666",
454        feature = "pdfium_6611",
455    ))]
456    /// Returns the descent of this [PdfPageTextObject]. The descent is the maximum distance below
457    /// the baseline reached by any glyph in any of the characters contained in this text object,
458    /// expressed as a negative points value.
459    ///
460    /// Character retrieval and bounds measurement is provided by the given [PdfPageText] object.
461    pub fn descent(&self, text: &PdfPageText) -> Result<PdfPoints, PdfiumError> {
462        let object_bottom = self.get_vertical_translation();
463
464        let mut maximum_descent = object_bottom;
465
466        for char in self.chars(text)?.iter() {
467            let char_bottom = char.tight_bounds()?.bottom();
468
469            if char_bottom < maximum_descent {
470                maximum_descent = char_bottom;
471            }
472        }
473
474        Ok(maximum_descent - object_bottom)
475    }
476
477    create_transform_setters!(
478        &mut Self,
479        Result<(), PdfiumError>,
480        "this [PdfPageTextObject]",
481        "this [PdfPageTextObject].",
482        "this [PdfPageTextObject],"
483    );
484
485    // The transform_impl() function required by the create_transform_setters!() macro
486    // is provided by the PdfPageObjectPrivate trait.
487
488    create_transform_getters!(
489        "this [PdfPageTextObject]",
490        "this [PdfPageTextObject].",
491        "this [PdfPageTextObject],"
492    );
493
494    // The get_matrix_impl() function required by the create_transform_getters!() macro
495    // is provided by the PdfPageObjectPrivate trait.
496}
497
498impl<'a> PdfPageObjectPrivate<'a> for PdfPageTextObject<'a> {
499    #[inline]
500    fn object_handle(&self) -> FPDF_PAGEOBJECT {
501        self.object_handle
502    }
503
504    #[inline]
505    fn ownership(&self) -> &PdfPageObjectOwnership {
506        &self.ownership
507    }
508
509    #[inline]
510    fn set_ownership(&mut self, ownership: PdfPageObjectOwnership) {
511        self.ownership = ownership;
512    }
513
514    #[inline]
515    fn bindings(&self) -> &dyn PdfiumLibraryBindings {
516        self.bindings
517    }
518
519    #[inline]
520    fn is_copyable_impl(&self) -> bool {
521        true
522    }
523
524    #[inline]
525    fn try_copy_impl<'b>(
526        &self,
527        document: FPDF_DOCUMENT,
528        bindings: &'b dyn PdfiumLibraryBindings,
529    ) -> Result<PdfPageObject<'b>, PdfiumError> {
530        let mut copy = PdfPageTextObject::new_from_handles(
531            document,
532            self.text(),
533            self.font().handle(),
534            self.unscaled_font_size(),
535            bindings,
536        )?;
537
538        copy.set_fill_color(self.fill_color()?)?;
539        copy.set_stroke_color(self.stroke_color()?)?;
540        copy.set_stroke_width(self.stroke_width()?)?;
541        copy.set_line_join(self.line_join()?)?;
542        copy.set_line_cap(self.line_cap()?)?;
543        copy.reset_matrix(self.matrix()?)?;
544
545        Ok(PdfPageObject::Text(copy))
546    }
547}
548
549impl<'a> Drop for PdfPageTextObject<'a> {
550    /// Closes this [PdfPageTextObject], releasing held memory.
551    fn drop(&mut self) {
552        self.drop_impl();
553    }
554}