Skip to main content

pdfium_render/pdf/document/page/text/
segments.rs

1//! Defines the [PdfPageTextSegments] struct, a collection of all the distinct rectangular
2//! areas of a single [PdfPage] occupied by spans of text that share a common text style.
3
4use crate::bindgen::FS_RECTF;
5use crate::bindings::PdfiumLibraryBindings;
6use crate::error::PdfiumError;
7use crate::pdf::document::page::text::segment::PdfPageTextSegment;
8use crate::pdf::document::page::text::PdfPageText;
9use crate::pdf::rect::PdfRect;
10use std::ops::{Range, RangeInclusive};
11use std::os::raw::c_int;
12
13#[cfg(doc)]
14use {
15    crate::pdf::document::page::object::text::PdfPageTextObject,
16    crate::pdf::document::page::PdfPage,
17};
18
19/// The zero-based index of a single [PdfPageTextSegment] inside its containing
20/// [PdfPageTextSegments] collection.
21pub type PdfPageTextSegmentIndex = usize;
22
23/// A collection of all the distinct rectangular areas of a single [PdfPage] occupied by
24/// spans of text that share a common text style.
25///
26/// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
27/// are on the same line and share the same font settings.
28pub struct PdfPageTextSegments<'a> {
29    text: &'a PdfPageText<'a>,
30    start: i32,
31    characters: i32,
32    bindings: &'a dyn PdfiumLibraryBindings,
33}
34
35impl<'a> PdfPageTextSegments<'a> {
36    #[inline]
37    pub(crate) fn new(
38        text: &'a PdfPageText<'a>,
39        start: i32,
40        characters: i32,
41        bindings: &'a dyn PdfiumLibraryBindings,
42    ) -> Self {
43        PdfPageTextSegments {
44            text,
45            start,
46            characters,
47            bindings,
48        }
49    }
50
51    /// Returns the number of distinct rectangular areas occupied by text in the containing [PdfPage].
52    ///
53    /// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
54    /// are on the same line and share the same font settings. The number of rectangular text segments
55    /// returned by this function therefore indicates the minimum number of spans of text that
56    /// share text styles on the page. The number of individual [PdfPageTextObject] objects on
57    /// the page may be much larger than the number of text segments.
58    #[inline]
59    pub fn len(&self) -> PdfPageTextSegmentIndex {
60        (unsafe {
61            self.bindings.FPDFText_CountRects(
62                self.text.text_page_handle(),
63                self.start,
64                self.characters,
65            )
66        }) as PdfPageTextSegmentIndex
67    }
68
69    /// Returns `true` if this [PdfPageTextSegments] collection is empty.
70    #[inline]
71    pub fn is_empty(&self) -> bool {
72        self.len() == 0
73    }
74
75    /// Returns a Range from `0..(number of segments)` for this [PdfPageTextSegments] collection.
76    #[inline]
77    pub fn as_range(&self) -> Range<PdfPageTextSegmentIndex> {
78        0..self.len()
79    }
80
81    /// Returns an inclusive Range from `0..=(number of segments - 1)` for this
82    /// [PdfPageTextSegments] collection.
83    #[inline]
84    pub fn as_range_inclusive(&self) -> RangeInclusive<PdfPageTextSegmentIndex> {
85        if self.is_empty() {
86            0..=0
87        } else {
88            0..=(self.len() - 1)
89        }
90    }
91
92    /// Returns a single [PdfPageTextSegment] from this [PdfPageTextSegments] collection.
93    #[inline]
94    pub fn get(
95        &self,
96        index: PdfPageTextSegmentIndex,
97    ) -> Result<PdfPageTextSegment<'_>, PdfiumError> {
98        if index >= self.len() {
99            return Err(PdfiumError::TextSegmentIndexOutOfBounds);
100        }
101
102        let mut left = 0.0;
103        let mut bottom = 0.0;
104        let mut right = 0.0;
105        let mut top = 0.0;
106
107        let result = unsafe {
108            self.bindings.FPDFText_GetRect(
109                self.text.text_page_handle(),
110                index as c_int,
111                &mut left,
112                &mut top,
113                &mut right,
114                &mut bottom,
115            )
116        };
117
118        PdfRect::from_pdfium_as_result(
119            result,
120            FS_RECTF {
121                left: left as f32,
122                top: top as f32,
123                right: right as f32,
124                bottom: bottom as f32,
125            },
126            self.bindings,
127        )
128        .map(|rect| PdfPageTextSegment::from_pdfium(self.text, rect))
129    }
130
131    /// Returns an iterator over all the text segments in this [PdfPageTextSegments] collection.
132    ///
133    /// Pdfium automatically merges smaller text boxes into larger text segments if all
134    /// enclosed characters are on the same line and share the same font settings. The number of
135    /// individual [PdfPageTextObject] objects on the page may be much larger than the number of
136    /// text segments.
137    #[inline]
138    pub fn iter(&self) -> PdfPageTextSegmentsIterator<'_> {
139        PdfPageTextSegmentsIterator::new(self)
140    }
141}
142
143/// An iterator over all the [PdfPageTextSegment] objects in a [PdfPageTextSegments] collection.
144pub struct PdfPageTextSegmentsIterator<'a> {
145    segments: &'a PdfPageTextSegments<'a>,
146    next_index: PdfPageTextSegmentIndex,
147}
148
149impl<'a> PdfPageTextSegmentsIterator<'a> {
150    #[inline]
151    pub(crate) fn new(segments: &'a PdfPageTextSegments<'a>) -> Self {
152        PdfPageTextSegmentsIterator {
153            segments,
154            next_index: 0,
155        }
156    }
157}
158
159impl<'a> Iterator for PdfPageTextSegmentsIterator<'a> {
160    type Item = PdfPageTextSegment<'a>;
161
162    fn next(&mut self) -> Option<Self::Item> {
163        let next = self.segments.get(self.next_index);
164
165        self.next_index += 1;
166
167        next.ok()
168    }
169}