pdfium_render/pdf/document/page/object/
text.rs

1//! Defines the [PdfPageTextObject] struct, exposing functionality related to a single
2//! page object defining a piece of formatted text.
3
4use crate::bindgen::{
5    FPDF_DOCUMENT, FPDF_FONT, FPDF_PAGEOBJECT, FPDF_TEXT_RENDERMODE,
6    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP, FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
7    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP,
8    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE,
9    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
10    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
11    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE,
12    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP,
13    FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN, FPDF_WCHAR,
14};
15use crate::bindings::PdfiumLibraryBindings;
16use crate::error::{PdfiumError, PdfiumInternalError};
17use crate::pdf::document::fonts::ToPdfFontToken;
18use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
19use crate::pdf::document::page::object::{
20    PdfPageObject, PdfPageObjectCommon, PdfPageObjectOwnership,
21};
22
23use crate::pdf::document::PdfDocument;
24use crate::pdf::font::PdfFont;
25use crate::pdf::matrix::{PdfMatrix, PdfMatrixValue};
26use crate::pdf::points::PdfPoints;
27use crate::utils::mem::create_byte_buffer;
28use crate::utils::utf16le::get_string_from_pdfium_utf16le_bytes;
29use crate::{create_transform_getters, create_transform_setters};
30
31#[cfg(any(
32    feature = "pdfium_future",
33    feature = "pdfium_7123",
34    feature = "pdfium_6996",
35    feature = "pdfium_6721",
36    feature = "pdfium_6666",
37    feature = "pdfium_6611",
38))]
39use {
40    crate::pdf::document::page::text::chars::PdfPageTextChars,
41    crate::pdf::document::page::text::PdfPageText,
42};
43
44#[cfg(doc)]
45use {
46    crate::pdf::document::page::object::PdfPageObjectType,
47    crate::pdf::document::page::objects::common::PdfPageObjectsCommon,
48    crate::pdf::document::page::PdfPage,
49};
50
51/// The text rendering modes supported by the PDF standard, as listed in table 5.3
52/// on page 402 in the PDF Reference manual version 1.7.
53#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
54pub enum PdfPageTextRenderMode {
55    /// The text render mode is not recognized by Pdfium.
56    Unknown = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN as isize,
57
58    /// The text will be filled, but not stroked.
59    FilledUnstroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL as isize,
60
61    /// The text will be stroked, but not filled.
62    StrokedUnfilled = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE as isize,
63
64    /// The text will be filled, then stroked.
65    FilledThenStroked = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE as isize,
66
67    /// The text will be neither filled nor stroked. It will still take up size in the layout, however.
68    Invisible = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE as isize,
69
70    /// The text will be filled and added to the path for clipping.
71    FilledUnstrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP as isize,
72
73    /// The text will be stroked and added to the path for clipping.
74    StrokedUnfilledClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP as isize,
75
76    /// The text will be filled, then stroked, and added to the path for clipping.
77    FilledThenStrokedClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP as isize,
78
79    /// The text will be neither filled nor stroked, only added to the path for clipping.
80    InvisibleClipping = FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP as isize,
81}
82
83impl PdfPageTextRenderMode {
84    #[inline]
85    pub(crate) fn from_pdfium(value: i32) -> Result<PdfPageTextRenderMode, PdfiumError> {
86        match value {
87            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN => Ok(PdfPageTextRenderMode::Unknown),
88            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL => {
89                Ok(PdfPageTextRenderMode::FilledUnstroked)
90            }
91            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE => {
92                Ok(PdfPageTextRenderMode::StrokedUnfilled)
93            }
94            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE => {
95                Ok(PdfPageTextRenderMode::FilledThenStroked)
96            }
97            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE => {
98                Ok(PdfPageTextRenderMode::Invisible)
99            }
100            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP => {
101                Ok(PdfPageTextRenderMode::FilledUnstrokedClipping)
102            }
103            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP => {
104                Ok(PdfPageTextRenderMode::StrokedUnfilledClipping)
105            }
106            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP => {
107                Ok(PdfPageTextRenderMode::FilledThenStrokedClipping)
108            }
109            FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP => {
110                Ok(PdfPageTextRenderMode::InvisibleClipping)
111            }
112            _ => Err(PdfiumError::UnknownPdfPageTextRenderMode),
113        }
114    }
115
116    #[inline]
117    #[allow(dead_code)]
118    // The as_pdfium() function is not currently used, but we expect it to be in future
119    pub(crate) fn as_pdfium(&self) -> FPDF_TEXT_RENDERMODE {
120        match self {
121            PdfPageTextRenderMode::Unknown => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_UNKNOWN,
122            PdfPageTextRenderMode::FilledUnstroked => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL,
123            PdfPageTextRenderMode::StrokedUnfilled => {
124                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE
125            }
126            PdfPageTextRenderMode::FilledThenStroked => {
127                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE
128            }
129            PdfPageTextRenderMode::Invisible => FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_INVISIBLE,
130            PdfPageTextRenderMode::FilledUnstrokedClipping => {
131                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_CLIP
132            }
133            PdfPageTextRenderMode::StrokedUnfilledClipping => {
134                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_STROKE_CLIP
135            }
136            PdfPageTextRenderMode::FilledThenStrokedClipping => {
137                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP
138            }
139            PdfPageTextRenderMode::InvisibleClipping => {
140                FPDF_TEXT_RENDERMODE_FPDF_TEXTRENDERMODE_CLIP
141            }
142        }
143    }
144}
145
146/// A single [PdfPageObject] of type [PdfPageObjectType::Text]. The page object defines a single
147/// piece of formatted text.
148///
149/// Page objects can be created either attached to a [PdfPage] (in which case the page object's
150/// memory is owned by the containing page) or detached from any page (in which case the page
151/// object's memory is owned by the object). Page objects are not rendered until they are
152/// attached to a page; page objects that are never attached to a page will be lost when they
153/// fall out of scope.
154///
155/// The simplest way to create a page text object that is immediately attached to a page
156/// is to call the [PdfPageObjectsCommon::create_text_object()] function.
157///
158/// Creating a detached page text object offers more scope for customization, but you must
159/// add the object to a containing [PdfPage] manually. To create a detached page text object,
160/// use the [PdfPageTextObject::new()] function. The detached page text object can later
161/// be attached to a page by using the [PdfPageObjectsCommon::add_text_object()] function.
162pub struct PdfPageTextObject<'a> {
163    object_handle: FPDF_PAGEOBJECT,
164    ownership: PdfPageObjectOwnership,
165    bindings: &'a dyn PdfiumLibraryBindings,
166}
167
168impl<'a> PdfPageTextObject<'a> {
169    #[inline]
170    pub(crate) fn from_pdfium(
171        object_handle: FPDF_PAGEOBJECT,
172        ownership: PdfPageObjectOwnership,
173        bindings: &'a dyn PdfiumLibraryBindings,
174    ) -> Self {
175        PdfPageTextObject {
176            object_handle,
177            ownership,
178            bindings,
179        }
180    }
181
182    /// Creates a new [PdfPageTextObject] from the given arguments. The returned page object
183    /// will not be rendered until it is added to a [PdfPage] using the
184    /// [PdfPageObjectsCommon::add_text_object()] function.
185    ///
186    /// A single space will be used if the given text is empty, in order to avoid
187    /// unexpected behaviour from Pdfium when dealing with empty strings.
188    // Specifically, `FPDFPageObj_SetText()` will crash if we try to apply an empty string to a
189    // text object, and `FPDFText_LoadPage()` will crash if any text object on the page contains
190    // an empty string (so it isn't enough to avoid calling `FPDFPageObj_SetText()` for an empty
191    // text object, we _have_ to set a non-empty string to avoid segfaults).
192    #[inline]
193    pub fn new(
194        document: &PdfDocument<'a>,
195        text: impl ToString,
196        font: impl ToPdfFontToken,
197        font_size: PdfPoints,
198    ) -> Result<Self, PdfiumError> {
199        Self::new_from_handles(
200            document.handle(),
201            text,
202            font.token().handle(),
203            font_size,
204            document.bindings(),
205        )
206    }
207
208    // Take raw `FPDF_DOCUMENT` and `FPDF_FONT` handles to avoid cascading lifetime problems
209    // associated with borrowing `PdfDocument<'a>` and/or `PdfFont<'a>`.
210    pub(crate) fn new_from_handles(
211        document: FPDF_DOCUMENT,
212        text: impl ToString,
213        font: FPDF_FONT,
214        font_size: PdfPoints,
215        bindings: &'a dyn PdfiumLibraryBindings,
216    ) -> Result<Self, PdfiumError> {
217        let handle = bindings.FPDFPageObj_CreateTextObj(document, font, font_size.value);
218
219        if handle.is_null() {
220            Err(PdfiumError::PdfiumLibraryInternalError(
221                PdfiumInternalError::Unknown,
222            ))
223        } else {
224            let mut result = PdfPageTextObject {
225                object_handle: handle,
226                ownership: PdfPageObjectOwnership::unowned(),
227                bindings,
228            };
229
230            result.set_text(text)?;
231
232            Ok(result)
233        }
234    }
235
236    /// Returns the text rendering mode for the text contained within this [PdfPageTextObject].
237    pub fn render_mode(&self) -> PdfPageTextRenderMode {
238        PdfPageTextRenderMode::from_pdfium(
239            self.bindings()
240                .FPDFTextObj_GetTextRenderMode(self.object_handle),
241        )
242        .unwrap_or(PdfPageTextRenderMode::Unknown)
243    }
244
245    /// Returns the effective size of the text when rendered, taking into account both the
246    /// font size specified in this text object as well as any vertical scale factor applied
247    /// to the text object's transformation matrix.
248    ///
249    /// To retrieve only the specified font size, ignoring any vertical scaling, use the
250    /// [PdfPageTextObject::unscaled_font_size()] function.
251    #[inline]
252    pub fn scaled_font_size(&self) -> PdfPoints {
253        PdfPoints::new(self.unscaled_font_size().value * self.get_vertical_scale())
254    }
255
256    /// Returns the font size of the text specified in this [PdfPageTextObject].
257    ///
258    /// Note that the effective size of the text when rendered may differ from the font size
259    /// if a scaling factor has been applied to this text object's transformation matrix.
260    /// To retrieve the effective font size, taking vertical scaling into account, use the
261    /// [PdfPageTextObject::scaled_font_size()] function.
262    pub fn unscaled_font_size(&self) -> PdfPoints {
263        let mut result = 0.0;
264
265        if self.bindings().is_true(
266            self.bindings()
267                .FPDFTextObj_GetFontSize(self.object_handle, &mut result),
268        ) {
269            PdfPoints::new(result)
270        } else {
271            PdfPoints::ZERO
272        }
273    }
274
275    /// Returns the [PdfFont] used to render the text contained within this [PdfPageTextObject].
276    pub fn font(&self) -> PdfFont {
277        PdfFont::from_pdfium(
278            self.bindings().FPDFTextObj_GetFont(self.object_handle),
279            self.bindings(),
280            None,
281            false,
282        )
283    }
284
285    /// Returns the text contained within this [PdfPageTextObject].
286    ///
287    /// Text retrieval in Pdfium is handled by the [PdfPageText] object owned by the [PdfPage]
288    /// containing this [PdfPageTextObject]. If this text object has not been attached to a page
289    /// then text retrieval will be unavailable and an empty string will be returned.
290    ///
291    /// When retrieving the text from many [PdfPageTextObject] objects (for instance, as part of
292    /// a loop or an iterator), it may be faster to open the [PdfPageText] object once and keep
293    /// it open while processing the text objects, like so:
294    ///
295    /// ```
296    /// let text_page = page.text()?; // Opens the text page once.
297    ///
298    /// for object in <some object iterator> {
299    ///     let object_text = text_page.for_object(object)?;
300    /// }
301    /// ```
302    ///
303    /// The [PdfPageText] object will be closed when the binding to it (`text_page` in the example above)
304    /// falls out of scope.
305    pub fn text(&self) -> String {
306        // Retrieving the text from Pdfium is a two-step operation. First, we call
307        // FPDFTextObj_GetText() with a null buffer; this will retrieve the length of
308        // the text in bytes. If the length is zero, then there is no text associated
309        // with the page object.
310
311        // If the length is non-zero, then we reserve a byte buffer of the given
312        // length and call FPDFTextObj_GetText() again with a pointer to the buffer;
313        // this will write the text to the buffer in UTF16-LE format.
314
315        let page_handle = match self.ownership() {
316            PdfPageObjectOwnership::Page(ownership) => Some(ownership.page_handle()),
317            PdfPageObjectOwnership::AttachedAnnotation(ownership) => Some(ownership.page_handle()),
318            _ => None,
319        };
320
321        if let Some(page_handle) = page_handle {
322            let text_handle = self.bindings().FPDFText_LoadPage(page_handle);
323
324            if !text_handle.is_null() {
325                let buffer_length = self.bindings().FPDFTextObj_GetText(
326                    self.object_handle(),
327                    text_handle,
328                    std::ptr::null_mut(),
329                    0,
330                );
331
332                if buffer_length == 0 {
333                    // There is no text.
334
335                    return String::new();
336                }
337
338                let mut buffer = create_byte_buffer(buffer_length as usize);
339
340                let result = self.bindings().FPDFTextObj_GetText(
341                    self.object_handle(),
342                    text_handle,
343                    buffer.as_mut_ptr() as *mut FPDF_WCHAR,
344                    buffer_length,
345                );
346
347                assert_eq!(result, buffer_length);
348
349                self.bindings.FPDFText_ClosePage(text_handle);
350
351                get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
352            } else {
353                // The PdfPage containing this page object does not have an associated
354                // FPDF_TEXTPAGE object.
355
356                String::new()
357            }
358        } else {
359            // This page object is not contained by a PdfPage.
360
361            String::new()
362        }
363    }
364
365    /// Sets the text contained within this [PdfPageTextObject], replacing any existing text.
366    ///
367    /// A single space will be used if the given text is empty, in order to avoid
368    /// unexpected behaviour from Pdfium when dealing with an empty string.
369    pub fn set_text(&mut self, text: impl ToString) -> Result<(), PdfiumError> {
370        let text = text.to_string();
371
372        let text = if text.is_empty() { " " } else { text.as_str() };
373
374        if self.bindings().is_true(
375            self.bindings()
376                .FPDFText_SetText_str(self.object_handle(), text),
377        ) {
378            Ok(())
379        } else {
380            Err(PdfiumError::PdfiumLibraryInternalError(
381                PdfiumInternalError::Unknown,
382            ))
383        }
384    }
385
386    /// Sets the text rendering mode for the text contained within this [PdfPageTextObject].
387    pub fn set_render_mode(
388        &mut self,
389        render_mode: PdfPageTextRenderMode,
390    ) -> Result<(), PdfiumError> {
391        if self.bindings().is_true(
392            self.bindings()
393                .FPDFTextObj_SetTextRenderMode(self.object_handle(), render_mode.as_pdfium()),
394        ) {
395            Ok(())
396        } else {
397            Err(PdfiumError::PdfiumLibraryInternalError(
398                PdfiumInternalError::Unknown,
399            ))
400        }
401    }
402
403    #[cfg(any(
404        feature = "pdfium_future",
405        feature = "pdfium_7123",
406        feature = "pdfium_6996",
407        feature = "pdfium_6721",
408        feature = "pdfium_6666",
409        feature = "pdfium_6611",
410    ))]
411    /// Returns a collection of the characters contained within this [PdfPageTextObject],
412    /// using character retrieval functionality provided by the given [PdfPageText] object.
413    #[inline]
414    pub fn chars(&self, text: &'a PdfPageText<'a>) -> Result<PdfPageTextChars<'a>, PdfiumError> {
415        text.chars_for_object(self)
416    }
417
418    #[cfg(any(
419        feature = "pdfium_future",
420        feature = "pdfium_7123",
421        feature = "pdfium_6996",
422        feature = "pdfium_6721",
423        feature = "pdfium_6666",
424        feature = "pdfium_6611",
425    ))]
426    /// Returns `true` if any of the characters contained within this [PdfPageTextObject] have a
427    /// glyph shape that descends below the font baseline.
428    ///
429    /// Character retrieval functionality is provided by the given [PdfPageText] object.
430    #[inline]
431    pub fn has_descenders(&self, text: &PdfPageText) -> Result<bool, PdfiumError> {
432        self.chars(text)
433            .map(|chars| chars.iter().any(|char| char.has_descender()))
434    }
435
436    #[cfg(any(
437        feature = "pdfium_future",
438        feature = "pdfium_7123",
439        feature = "pdfium_6996",
440        feature = "pdfium_6721",
441        feature = "pdfium_6666",
442        feature = "pdfium_6611",
443    ))]
444    /// Returns the descent of this [PdfPageTextObject]. The descent is the maximum distance below
445    /// the baseline reached by any glyph in any of the characters contained in this text object,
446    /// expressed as a negative points value.
447    ///
448    /// Character retrieval and bounds measurement is provided by the given [PdfPageText] object.
449    pub fn descent(&self, text: &PdfPageText) -> Result<PdfPoints, PdfiumError> {
450        let object_bottom = self.get_vertical_translation();
451
452        let mut maximum_descent = object_bottom;
453
454        for char in self.chars(text)?.iter() {
455            let char_bottom = char.tight_bounds()?.bottom();
456
457            if char_bottom < maximum_descent {
458                maximum_descent = char_bottom;
459            }
460        }
461
462        Ok(maximum_descent - object_bottom)
463    }
464
465    create_transform_setters!(
466        &mut Self,
467        Result<(), PdfiumError>,
468        "this [PdfPageTextObject]",
469        "this [PdfPageTextObject].",
470        "this [PdfPageTextObject],"
471    );
472
473    // The transform_impl() function required by the create_transform_setters!() macro
474    // is provided by the PdfPageObjectPrivate trait.
475
476    create_transform_getters!(
477        "this [PdfPageTextObject]",
478        "this [PdfPageTextObject].",
479        "this [PdfPageTextObject],"
480    );
481
482    // The get_matrix_impl() function required by the create_transform_getters!() macro
483    // is provided by the PdfPageObjectPrivate trait.
484}
485
486impl<'a> PdfPageObjectPrivate<'a> for PdfPageTextObject<'a> {
487    #[inline]
488    fn object_handle(&self) -> FPDF_PAGEOBJECT {
489        self.object_handle
490    }
491
492    #[inline]
493    fn ownership(&self) -> &PdfPageObjectOwnership {
494        &self.ownership
495    }
496
497    #[inline]
498    fn set_ownership(&mut self, ownership: PdfPageObjectOwnership) {
499        self.ownership = ownership;
500    }
501
502    #[inline]
503    fn bindings(&self) -> &dyn PdfiumLibraryBindings {
504        self.bindings
505    }
506
507    #[inline]
508    fn is_copyable_impl(&self) -> bool {
509        true
510    }
511
512    #[inline]
513    fn try_copy_impl<'b>(
514        &self,
515        document: FPDF_DOCUMENT,
516        bindings: &'b dyn PdfiumLibraryBindings,
517    ) -> Result<PdfPageObject<'b>, PdfiumError> {
518        let mut copy = PdfPageTextObject::new_from_handles(
519            document,
520            self.text(),
521            self.font().handle(),
522            self.unscaled_font_size(),
523            bindings,
524        )?;
525
526        copy.set_fill_color(self.fill_color()?)?;
527        copy.set_stroke_color(self.stroke_color()?)?;
528        copy.set_stroke_width(self.stroke_width()?)?;
529        copy.set_line_join(self.line_join()?)?;
530        copy.set_line_cap(self.line_cap()?)?;
531        copy.reset_matrix(self.matrix()?)?;
532
533        Ok(PdfPageObject::Text(copy))
534    }
535}