pdfium_render/pdf/document/page/text/
segments.rs

1//! Defines the [PdfPageTextSegments] struct, a collection of all the distinct rectangular
2//! areas of a single [PdfPage] occupied by spans of text that share a common text style.
3
4use crate::bindgen::FS_RECTF;
5use crate::bindings::PdfiumLibraryBindings;
6use crate::error::PdfiumError;
7use crate::pdf::document::page::text::segment::PdfPageTextSegment;
8use crate::pdf::document::page::text::PdfPageText;
9use crate::pdf::rect::PdfRect;
10use std::ops::{Range, RangeInclusive};
11use std::os::raw::c_int;
12
13#[cfg(doc)]
14use {
15    crate::pdf::document::page::object::text::PdfPageTextObject,
16    crate::pdf::document::page::PdfPage,
17};
18
19/// The zero-based index of a single [PdfPageTextSegment] inside its containing
20/// [PdfPageTextSegments] collection.
21pub type PdfPageTextSegmentIndex = usize;
22
23/// A collection of all the distinct rectangular areas of a single [PdfPage] occupied by
24/// spans of text that share a common text style.
25///
26/// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
27/// are on the same line and share the same font settings.
28pub struct PdfPageTextSegments<'a> {
29    text: &'a PdfPageText<'a>,
30    start: i32,
31    characters: i32,
32    bindings: &'a dyn PdfiumLibraryBindings,
33}
34
35impl<'a> PdfPageTextSegments<'a> {
36    #[inline]
37    pub(crate) fn new(
38        text: &'a PdfPageText<'a>,
39        start: i32,
40        characters: i32,
41        bindings: &'a dyn PdfiumLibraryBindings,
42    ) -> Self {
43        PdfPageTextSegments {
44            text,
45            start,
46            characters,
47            bindings,
48        }
49    }
50
51    /// Returns the number of distinct rectangular areas occupied by text in the containing [PdfPage].
52    ///
53    /// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
54    /// are on the same line and share the same font settings. The number of rectangular text segments
55    /// returned by this function therefore indicates the minimum number of spans of text that
56    /// share text styles on the page. The number of individual [PdfPageTextObject] objects on
57    /// the page may be much larger than the number of text segments.
58    #[inline]
59    pub fn len(&self) -> PdfPageTextSegmentIndex {
60        self.bindings
61            .FPDFText_CountRects(self.text.text_page_handle(), self.start, self.characters)
62            as PdfPageTextSegmentIndex
63    }
64
65    /// Returns `true` if this [PdfPageTextSegments] collection is empty.
66    #[inline]
67    pub fn is_empty(&self) -> bool {
68        self.len() == 0
69    }
70
71    /// Returns a Range from `0..(number of segments)` for this [PdfPageTextSegments] collection.
72    #[inline]
73    pub fn as_range(&self) -> Range<PdfPageTextSegmentIndex> {
74        0..self.len()
75    }
76
77    /// Returns an inclusive Range from `0..=(number of segments - 1)` for this
78    /// [PdfPageTextSegments] collection.
79    #[inline]
80    pub fn as_range_inclusive(&self) -> RangeInclusive<PdfPageTextSegmentIndex> {
81        if self.is_empty() {
82            0..=0
83        } else {
84            0..=(self.len() - 1)
85        }
86    }
87
88    /// Returns a single [PdfPageTextSegment] from this [PdfPageTextSegments] collection.
89    #[inline]
90    pub fn get(&self, index: PdfPageTextSegmentIndex) -> Result<PdfPageTextSegment, PdfiumError> {
91        if index >= self.len() {
92            return Err(PdfiumError::TextSegmentIndexOutOfBounds);
93        }
94
95        let mut left = 0.0;
96
97        let mut bottom = 0.0;
98
99        let mut right = 0.0;
100
101        let mut top = 0.0;
102
103        let result = self.bindings.FPDFText_GetRect(
104            self.text.text_page_handle(),
105            index as c_int,
106            &mut left,
107            &mut top,
108            &mut right,
109            &mut bottom,
110        );
111
112        PdfRect::from_pdfium_as_result(
113            result,
114            FS_RECTF {
115                left: left as f32,
116                top: top as f32,
117                right: right as f32,
118                bottom: bottom as f32,
119            },
120            self.bindings,
121        )
122        .map(|rect| PdfPageTextSegment::from_pdfium(self.text, rect))
123    }
124
125    /// Returns an iterator over all the text segments in this [PdfPageTextSegments] collection.
126    ///
127    /// Pdfium automatically merges smaller text boxes into larger text segments if all
128    /// enclosed characters are on the same line and share the same font settings. The number of
129    /// individual [PdfPageTextObject] objects on the page may be much larger than the number of
130    /// text segments.
131    #[inline]
132    pub fn iter(&self) -> PdfPageTextSegmentsIterator {
133        PdfPageTextSegmentsIterator::new(self)
134    }
135}
136
137/// An iterator over all the [PdfPageTextSegment] objects in a [PdfPageTextSegments] collection.
138pub struct PdfPageTextSegmentsIterator<'a> {
139    segments: &'a PdfPageTextSegments<'a>,
140    next_index: PdfPageTextSegmentIndex,
141}
142
143impl<'a> PdfPageTextSegmentsIterator<'a> {
144    #[inline]
145    pub(crate) fn new(segments: &'a PdfPageTextSegments<'a>) -> Self {
146        PdfPageTextSegmentsIterator {
147            segments,
148            next_index: 0,
149        }
150    }
151}
152
153impl<'a> Iterator for PdfPageTextSegmentsIterator<'a> {
154    type Item = PdfPageTextSegment<'a>;
155
156    fn next(&mut self) -> Option<Self::Item> {
157        let next = self.segments.get(self.next_index);
158
159        self.next_index += 1;
160
161        next.ok()
162    }
163}