pdfium_render/pdf/document/page/text/
segments.rs

1//! Defines the [PdfPageTextSegments] struct, a collection of all the distinct rectangular
2//! areas of a single [PdfPage] occupied by spans of text that share a common text style.
3
4use crate::bindgen::FS_RECTF;
5use crate::bindings::PdfiumLibraryBindings;
6use crate::error::PdfiumError;
7use crate::pdf::document::page::text::segment::PdfPageTextSegment;
8use crate::pdf::document::page::text::PdfPageText;
9use crate::pdf::rect::PdfRect;
10use std::ops::{Range, RangeInclusive};
11use std::os::raw::c_int;
12
13#[cfg(doc)]
14use {
15    crate::pdf::document::page::object::text::PdfPageTextObject,
16    crate::pdf::document::page::PdfPage,
17};
18
19/// The zero-based index of a single [PdfPageTextSegment] inside its containing
20/// [PdfPageTextSegments] collection.
21pub type PdfPageTextSegmentIndex = usize;
22
23/// A collection of all the distinct rectangular areas of a single [PdfPage] occupied by
24/// spans of text that share a common text style.
25///
26/// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
27/// are on the same line and share the same font settings.
28pub struct PdfPageTextSegments<'a> {
29    text: &'a PdfPageText<'a>,
30    start: i32,
31    characters: i32,
32    bindings: &'a dyn PdfiumLibraryBindings,
33}
34
35impl<'a> PdfPageTextSegments<'a> {
36    #[inline]
37    pub(crate) fn new(
38        text: &'a PdfPageText<'a>,
39        start: i32,
40        characters: i32,
41        bindings: &'a dyn PdfiumLibraryBindings,
42    ) -> Self {
43        PdfPageTextSegments {
44            text,
45            start,
46            characters,
47            bindings,
48        }
49    }
50
51    /// Returns the number of distinct rectangular areas occupied by text in the containing [PdfPage].
52    ///
53    /// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
54    /// are on the same line and share the same font settings. The number of rectangular text segments
55    /// returned by this function therefore indicates the minimum number of spans of text that
56    /// share text styles on the page. The number of individual [PdfPageTextObject] objects on
57    /// the page may be much larger than the number of text segments.
58    #[inline]
59    pub fn len(&self) -> PdfPageTextSegmentIndex {
60        self.bindings
61            .FPDFText_CountRects(self.text.text_page_handle(), self.start, self.characters)
62            as PdfPageTextSegmentIndex
63    }
64
65    /// Returns `true` if this [PdfPageTextSegments] collection is empty.
66    #[inline]
67    pub fn is_empty(&self) -> bool {
68        self.len() == 0
69    }
70
71    /// Returns a Range from `0..(number of segments)` for this [PdfPageTextSegments] collection.
72    #[inline]
73    pub fn as_range(&self) -> Range<PdfPageTextSegmentIndex> {
74        0..self.len()
75    }
76
77    /// Returns an inclusive Range from `0..=(number of segments - 1)` for this
78    /// [PdfPageTextSegments] collection.
79    #[inline]
80    pub fn as_range_inclusive(&self) -> RangeInclusive<PdfPageTextSegmentIndex> {
81        if self.is_empty() {
82            0..=0
83        } else {
84            0..=(self.len() - 1)
85        }
86    }
87
88    /// Returns a single [PdfPageTextSegment] from this [PdfPageTextSegments] collection.
89    #[inline]
90    pub fn get(
91        &self,
92        index: PdfPageTextSegmentIndex,
93    ) -> Result<PdfPageTextSegment<'_>, PdfiumError> {
94        if index >= self.len() {
95            return Err(PdfiumError::TextSegmentIndexOutOfBounds);
96        }
97
98        let mut left = 0.0;
99
100        let mut bottom = 0.0;
101
102        let mut right = 0.0;
103
104        let mut top = 0.0;
105
106        let result = self.bindings.FPDFText_GetRect(
107            self.text.text_page_handle(),
108            index as c_int,
109            &mut left,
110            &mut top,
111            &mut right,
112            &mut bottom,
113        );
114
115        PdfRect::from_pdfium_as_result(
116            result,
117            FS_RECTF {
118                left: left as f32,
119                top: top as f32,
120                right: right as f32,
121                bottom: bottom as f32,
122            },
123            self.bindings,
124        )
125        .map(|rect| PdfPageTextSegment::from_pdfium(self.text, rect))
126    }
127
128    /// Returns an iterator over all the text segments in this [PdfPageTextSegments] collection.
129    ///
130    /// Pdfium automatically merges smaller text boxes into larger text segments if all
131    /// enclosed characters are on the same line and share the same font settings. The number of
132    /// individual [PdfPageTextObject] objects on the page may be much larger than the number of
133    /// text segments.
134    #[inline]
135    pub fn iter(&self) -> PdfPageTextSegmentsIterator<'_> {
136        PdfPageTextSegmentsIterator::new(self)
137    }
138}
139
140/// An iterator over all the [PdfPageTextSegment] objects in a [PdfPageTextSegments] collection.
141pub struct PdfPageTextSegmentsIterator<'a> {
142    segments: &'a PdfPageTextSegments<'a>,
143    next_index: PdfPageTextSegmentIndex,
144}
145
146impl<'a> PdfPageTextSegmentsIterator<'a> {
147    #[inline]
148    pub(crate) fn new(segments: &'a PdfPageTextSegments<'a>) -> Self {
149        PdfPageTextSegmentsIterator {
150            segments,
151            next_index: 0,
152        }
153    }
154}
155
156impl<'a> Iterator for PdfPageTextSegmentsIterator<'a> {
157    type Item = PdfPageTextSegment<'a>;
158
159    fn next(&mut self) -> Option<Self::Item> {
160        let next = self.segments.get(self.next_index);
161
162        self.next_index += 1;
163
164        next.ok()
165    }
166}