pdfium_render/pdf/document/page/text/segments.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
//! Defines the [PdfPageTextSegments] struct, a collection of all the distinct rectangular
//! areas of a single [PdfPage] occupied by spans of text that share a common text style.
use crate::bindgen::FS_RECTF;
use crate::bindings::PdfiumLibraryBindings;
use crate::error::PdfiumError;
use crate::pdf::document::page::text::segment::PdfPageTextSegment;
use crate::pdf::document::page::text::PdfPageText;
use crate::pdf::rect::PdfRect;
use std::ops::{Range, RangeInclusive};
use std::os::raw::c_int;
#[cfg(doc)]
use {
crate::pdf::document::page::object::text::PdfPageTextObject,
crate::pdf::document::page::PdfPage,
};
pub type PdfPageTextSegmentIndex = usize;
/// A collection of all the distinct rectangular areas of a single [PdfPage] occupied by
/// spans of text that share a common text style.
///
/// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
/// are on the same line and share the same font settings.
pub struct PdfPageTextSegments<'a> {
text: &'a PdfPageText<'a>,
start: i32,
characters: i32,
bindings: &'a dyn PdfiumLibraryBindings,
}
impl<'a> PdfPageTextSegments<'a> {
#[inline]
pub(crate) fn new(
text: &'a PdfPageText<'a>,
start: i32,
characters: i32,
bindings: &'a dyn PdfiumLibraryBindings,
) -> Self {
PdfPageTextSegments {
text,
start,
characters,
bindings,
}
}
/// Returns the number of distinct rectangular areas occupied by text in the containing [PdfPage].
///
/// Pdfium automatically merges smaller text boxes into larger ones if all enclosed characters
/// are on the same line and share the same font settings. The number of rectangular text segments
/// returned by this function therefore indicates the minimum number of spans of text that
/// share text styles on the page. The number of individual [PdfPageTextObject] objects on
/// the page may be much larger than the number of text segments.
#[inline]
pub fn len(&self) -> PdfPageTextSegmentIndex {
self.bindings
.FPDFText_CountRects(*self.text.handle(), self.start, self.characters)
as PdfPageTextSegmentIndex
}
/// Returns `true` if this [PdfPageTextSegments] collection is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Returns a Range from `0..(number of segments)` for this [PdfPageTextSegments] collection.
#[inline]
pub fn as_range(&self) -> Range<PdfPageTextSegmentIndex> {
0..self.len()
}
/// Returns an inclusive Range from `0..=(number of segments - 1)` for this
/// [PdfPageTextSegments] collection.
#[inline]
pub fn as_range_inclusive(&self) -> RangeInclusive<PdfPageTextSegmentIndex> {
if self.is_empty() {
0..=0
} else {
0..=(self.len() - 1)
}
}
/// Returns a single [PdfPageTextSegment] from this [PdfPageTextSegments] collection.
#[inline]
pub fn get(&self, index: PdfPageTextSegmentIndex) -> Result<PdfPageTextSegment, PdfiumError> {
if index >= self.len() {
return Err(PdfiumError::TextSegmentIndexOutOfBounds);
}
let mut left = 0.0;
let mut bottom = 0.0;
let mut right = 0.0;
let mut top = 0.0;
let result = self.bindings.FPDFText_GetRect(
*self.text.handle(),
index as c_int,
&mut left,
&mut top,
&mut right,
&mut bottom,
);
PdfRect::from_pdfium_as_result(
result,
FS_RECTF {
left: left as f32,
top: top as f32,
right: right as f32,
bottom: bottom as f32,
},
self.bindings,
)
.map(|rect| PdfPageTextSegment::from_pdfium(self.text, rect))
}
/// Returns an iterator over all the text segments in this [PdfPageTextSegments] collection.
///
/// Pdfium automatically merges smaller text boxes into larger text segments if all
/// enclosed characters are on the same line and share the same font settings. The number of
/// individual [PdfPageTextObject] objects on the page may be much larger than the number of
/// text segments.
#[inline]
pub fn iter(&self) -> PdfPageTextSegmentsIterator {
PdfPageTextSegmentsIterator::new(self)
}
}
/// An iterator over all the [PdfPageTextSegment] objects in a [PdfPageTextSegments] collection.
pub struct PdfPageTextSegmentsIterator<'a> {
segments: &'a PdfPageTextSegments<'a>,
next_index: PdfPageTextSegmentIndex,
}
impl<'a> PdfPageTextSegmentsIterator<'a> {
#[inline]
pub(crate) fn new(segments: &'a PdfPageTextSegments<'a>) -> Self {
PdfPageTextSegmentsIterator {
segments,
next_index: 0,
}
}
}
impl<'a> Iterator for PdfPageTextSegmentsIterator<'a> {
type Item = PdfPageTextSegment<'a>;
fn next(&mut self) -> Option<Self::Item> {
let next = self.segments.get(self.next_index);
self.next_index += 1;
next.ok()
}
}