1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
//! Defines the [PdfPageTextSegments] struct, a collection of all the distinct rectangular
//! areas of a single [PdfPage] occupied by spans of text that share a common text style.
use crate::bindgen::FS_RECTF;
use crate::bindings::PdfiumLibraryBindings;
use crate::error::PdfiumError;
use crate::page_text::PdfPageText;
use crate::page_text_segment::PdfPageTextSegment;
use crate::rect::PdfRect;
use std::ops::{Range, RangeInclusive};
use std::os::raw::c_int;
#[cfg(doc)]
use crate::page::PdfPage;
#[cfg(doc)]
use crate::page_object_text::PdfPageTextObject;
pub type PdfPageTextSegmentIndex = usize;
/// A collection of all the distinct rectangular areas of a single [PdfPage] occupied by
/// spans of text that share a common text style.
///
/// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
/// are on the same line and share the same font settings.
pub struct PdfPageTextSegments<'a> {
text: &'a PdfPageText<'a>,
start: i32,
characters: i32,
bindings: &'a dyn PdfiumLibraryBindings,
}
impl<'a> PdfPageTextSegments<'a> {
#[inline]
pub(crate) fn new(
text: &'a PdfPageText<'a>,
start: i32,
characters: i32,
bindings: &'a dyn PdfiumLibraryBindings,
) -> Self {
PdfPageTextSegments {
text,
start,
characters,
bindings,
}
}
/// Returns the number of distinct rectangular areas occupied by text in the containing [PdfPage].
///
/// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
/// are on the same line and share the same font settings. The number of rectangular text segments
/// returned by this function therefore indicates the minimum number of spans of text that
/// share text styles on the page. The number of individual [PdfPageTextObject] objects on
/// the page may be much larger than the number of text segments.
#[inline]
pub fn len(&self) -> PdfPageTextSegmentIndex {
self.bindings
.FPDFText_CountRects(*self.text.handle(), self.start, self.characters)
as PdfPageTextSegmentIndex
}
/// Returns `true` if this [PdfPageTextSegments] collection is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Returns a Range from `0..(number of segments)` for this [PdfPageTextSegments] collection.
#[inline]
pub fn as_range(&self) -> Range<PdfPageTextSegmentIndex> {
0..self.len()
}
/// Returns an inclusive Range from `0..=(number of segments - 1)` for this
/// [PdfPageTextSegments] collection.
#[inline]
pub fn as_range_inclusive(&self) -> RangeInclusive<PdfPageTextSegmentIndex> {
if self.is_empty() {
0..=0
} else {
0..=(self.len() - 1)
}
}
/// Returns a single [PdfPageTextSegment] from this [PdfPageTextSegments] collection.
#[inline]
pub fn get(&self, index: PdfPageTextSegmentIndex) -> Result<PdfPageTextSegment, PdfiumError> {
if index >= self.len() {
return Err(PdfiumError::TextSegmentIndexOutOfBounds);
}
let mut left = 0.0;
let mut bottom = 0.0;
let mut right = 0.0;
let mut top = 0.0;
let result = self.bindings.FPDFText_GetRect(
*self.text.handle(),
index as c_int,
&mut left,
&mut top,
&mut right,
&mut bottom,
);
PdfRect::from_pdfium_as_result(
result,
FS_RECTF {
left: left as f32,
top: top as f32,
right: right as f32,
bottom: bottom as f32,
},
self.bindings,
)
.map(|rect| PdfPageTextSegment::from_pdfium(self.text, rect))
}
/// Returns an iterator over all the text segments in this [PdfPageTextSegments] collection.
///
/// Pdfium automatically merges smaller text boxes into larger text segments if all
/// enclosed characters are on the same line and share the same font settings. The number of
/// individual [PdfPageTextObject] objects on the page may be much larger than the number of
/// text segments.
#[inline]
pub fn iter(&self) -> PdfPageTextSegmentsIterator {
PdfPageTextSegmentsIterator::new(self)
}
}
/// An iterator over all the [PdfPageTextSegment] objects in a [PdfPageTextSegments] collection.
pub struct PdfPageTextSegmentsIterator<'a> {
segments: &'a PdfPageTextSegments<'a>,
next_index: PdfPageTextSegmentIndex,
}
impl<'a> PdfPageTextSegmentsIterator<'a> {
#[inline]
pub(crate) fn new(segments: &'a PdfPageTextSegments<'a>) -> Self {
PdfPageTextSegmentsIterator {
segments,
next_index: 0,
}
}
}
impl<'a> Iterator for PdfPageTextSegmentsIterator<'a> {
type Item = PdfPageTextSegment<'a>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.segments.get(self.next_index);
self.next_index += 1;
next.ok()
}
}