pdfium_render/pdf/document/page/text/
segment.rs

1//! Defines the [PdfPageTextSegment] struct, exposing functionality related to a single rectangular
2//! text segment in a `PdfPageTextSegments` collection.
3
4use crate::error::PdfiumError;
5use crate::pdf::document::page::text::chars::PdfPageTextChars;
6use crate::pdf::document::page::text::PdfPageText;
7use crate::pdf::points::PdfPoints;
8use crate::pdf::rect::PdfRect;
9
10#[cfg(doc)]
11use {
12    crate::pdf::document::page::text::char::PdfPageTextChar, crate::pdf::document::page::PdfPage,
13};
14
15/// A single rectangular text segment in a `PdfPageTextSegments` collection.
16///
17/// Pdfium automatically merges smaller text boxes into larger text segments if all
18/// enclosed characters share the same baseline and the same font settings. The number of
19/// individual `PdfPageTextObject` objects on the page may be much larger than the number of
20/// text segments.
21pub struct PdfPageTextSegment<'a> {
22    text: &'a PdfPageText<'a>,
23    bounds: PdfRect,
24}
25
26impl<'a> PdfPageTextSegment<'a> {
27    pub(crate) fn from_pdfium(text: &'a PdfPageText<'a>, bounds: PdfRect) -> Self {
28        PdfPageTextSegment { text, bounds }
29    }
30
31    /// Returns the bounding box of this [PdfPageTextSegment].
32    #[inline]
33    pub fn bounds(&self) -> PdfRect {
34        self.bounds
35    }
36
37    /// Returns the width of this [PdfPageTextSegment].
38    #[inline]
39    pub fn width(&self) -> PdfPoints {
40        self.bounds.width()
41    }
42
43    /// Returns the height of this [PdfPageTextSegment].
44    #[inline]
45    pub fn height(&self) -> PdfPoints {
46        self.bounds.height()
47    }
48
49    /// Returns `true` if the bounds of this [PdfPageTextSegment] lie entirely within the given rectangle.
50    #[inline]
51    pub fn is_inside_rect(&self, rect: &PdfRect) -> bool {
52        self.bounds.is_inside(rect)
53    }
54
55    /// Returns `true` if the bounds of this [PdfPageTextSegment] lie at least partially within
56    /// the given rectangle.
57    #[inline]
58    pub fn does_overlap_rect(&self, rect: &PdfRect) -> bool {
59        self.bounds.does_overlap(rect)
60    }
61
62    /// Returns all characters that lie within the bounds of this [PdfPageTextSegment] in the
63    /// containing [PdfPage], in the order in which they are defined in the document.
64    ///
65    /// In complex custom layouts, the order in which characters are defined in the document
66    /// and the order in which they appear visually during rendering (and thus the order in
67    /// which they are read by a user) may not necessarily match.
68    #[inline]
69    pub fn text(&self) -> String {
70        self.text.inside_rect(self.bounds)
71    }
72
73    /// Returns a collection of all the [PdfPageTextChar] characters that lie within the bounds of
74    /// this [PdfPageTextSegment] in the containing [PdfPage], in the order in which they are
75    /// defined in the document.
76    ///
77    /// In complex custom layouts, the order in which characters are defined in the document
78    /// and the order in which they appear visually during rendering (and thus the order in
79    /// which they are read by a user) may not necessarily match.
80    #[inline]
81    pub fn chars(&self) -> Result<PdfPageTextChars, PdfiumError> {
82        self.text.chars_inside_rect(self.bounds)
83    }
84}