1pub mod char;
5pub mod chars;
6pub mod search;
7pub mod segment;
8pub mod segments;
9
10use crate::bindgen::{FPDF_TEXTPAGE, FPDF_WCHAR, FPDF_WIDESTRING};
11use crate::bindings::PdfiumLibraryBindings;
12use crate::error::PdfiumError;
13use crate::pdf::document::page::annotation::PdfPageAnnotation;
14use crate::pdf::document::page::annotation::PdfPageAnnotationCommon;
15use crate::pdf::document::page::object::private::internal::PdfPageObjectPrivate;
16use crate::pdf::document::page::object::text::PdfPageTextObject;
17use crate::pdf::document::page::text::chars::{PdfPageTextCharIndex, PdfPageTextChars};
18use crate::pdf::document::page::text::search::{PdfPageTextSearch, PdfSearchOptions};
19use crate::pdf::document::page::text::segments::PdfPageTextSegments;
20use crate::pdf::document::page::PdfPage;
21use crate::pdf::points::PdfPoints;
22use crate::pdf::rect::PdfRect;
23use crate::utils::mem::{create_byte_buffer, create_sized_buffer};
24use crate::utils::utf16le::{
25 get_pdfium_utf16le_bytes_from_str, get_string_from_pdfium_utf16le_bytes,
26};
27use bytemuck::cast_slice;
28use std::fmt::{Display, Formatter};
29use std::os::raw::{c_double, c_int};
30use std::ptr::null_mut;
31
32#[cfg(any(
33 feature = "pdfium_future",
34 feature = "pdfium_7350",
35 feature = "pdfium_7215",
36 feature = "pdfium_7123",
37 feature = "pdfium_6996",
38 feature = "pdfium_6721",
39 feature = "pdfium_6666",
40 feature = "pdfium_6611",
41))]
42use crate::pdf::document::page::object::PdfPageObjectCommon;
43
44pub struct PdfPageText<'a> {
58 text_page_handle: FPDF_TEXTPAGE,
59 page: &'a PdfPage<'a>,
60 bindings: &'a dyn PdfiumLibraryBindings,
61}
62
63impl<'a> PdfPageText<'a> {
64 pub(crate) fn from_pdfium(
65 text_page_handle: FPDF_TEXTPAGE,
66 page: &'a PdfPage<'a>,
67 bindings: &'a dyn PdfiumLibraryBindings,
68 ) -> Self {
69 PdfPageText {
70 text_page_handle,
71 page,
72 bindings,
73 }
74 }
75
76 #[inline]
78 pub(crate) fn text_page_handle(&self) -> FPDF_TEXTPAGE {
79 self.text_page_handle
80 }
81
82 #[inline]
84 pub fn bindings(&self) -> &'a dyn PdfiumLibraryBindings {
85 self.bindings
86 }
87
88 #[inline]
93 pub fn len(&self) -> i32 {
94 self.bindings.FPDFText_CountChars(self.text_page_handle())
95 }
96
97 #[inline]
99 pub fn is_empty(&self) -> bool {
100 self.len() == 0
101 }
102
103 #[inline]
105 pub fn segments(&self) -> PdfPageTextSegments {
106 PdfPageTextSegments::new(self, 0, self.len(), self.bindings())
107 }
108
109 #[inline]
112 pub fn segments_subset(
113 &self,
114 start: PdfPageTextCharIndex,
115 count: PdfPageTextCharIndex,
116 ) -> PdfPageTextSegments {
117 PdfPageTextSegments::new(self, start as i32, count as i32, self.bindings())
118 }
119
120 #[inline]
122 pub fn chars(&self) -> PdfPageTextChars {
123 PdfPageTextChars::new(
124 self.page.document_handle(),
125 self.page.page_handle(),
126 self.text_page_handle(),
127 (0..self.len()).collect(),
128 self.bindings(),
129 )
130 }
131
132 #[cfg(any(
133 feature = "pdfium_future",
134 feature = "pdfium_7350",
135 feature = "pdfium_7215",
136 feature = "pdfium_7123",
137 feature = "pdfium_6996",
138 feature = "pdfium_6721",
139 feature = "pdfium_6666",
140 feature = "pdfium_6611",
141 ))]
142 #[inline]
147 pub fn chars_for_object(
148 &self,
149 object: &PdfPageTextObject,
150 ) -> Result<PdfPageTextChars, PdfiumError> {
151 let chars_inside_bounds = self
152 .chars_inside_rect(object.bounds()?.to_rect())
153 .map_err(|_| PdfiumError::NoCharsInPageObject)?;
154
155 Ok(PdfPageTextChars::new(
161 self.page.document_handle(),
162 self.page.page_handle(),
163 self.text_page_handle(),
164 chars_inside_bounds
165 .iter()
166 .filter(|char| {
167 self.bindings
168 .FPDFText_GetTextObject(self.text_page_handle(), char.index() as i32)
169 == object.object_handle()
170 })
171 .map(|char| char.index() as i32)
172 .collect(),
173 self.bindings(),
174 ))
175 }
176
177 #[inline]
182 pub fn chars_for_annotation(
183 &self,
184 annotation: &PdfPageAnnotation,
185 ) -> Result<PdfPageTextChars, PdfiumError> {
186 self.chars_inside_rect(annotation.bounds()?)
187 .map_err(|_| PdfiumError::NoCharsInAnnotation)
188 }
189
190 #[inline]
193 pub fn chars_inside_rect(&self, rect: PdfRect) -> Result<PdfPageTextChars, PdfiumError> {
194 let tolerance_x = rect.width() / 2.0;
195 let tolerance_y = rect.height() / 2.0;
196 let center_height = rect.bottom() + tolerance_y;
197
198 let chars = self.chars();
199
200 match (
201 chars.get_char_near_point(rect.left(), tolerance_x, center_height, tolerance_y),
202 chars.get_char_near_point(rect.right(), tolerance_x, center_height, tolerance_y),
203 ) {
204 (Some(start), Some(end)) => Ok(PdfPageTextChars::new(
205 self.page.document_handle(),
206 self.page.page_handle(),
207 self.text_page_handle(),
208 (start.index() as i32..end.index().saturating_sub(start.index()) as i32 + 1)
209 .collect(),
210 self.bindings,
211 )),
212 _ => Err(PdfiumError::NoCharsInRect),
213 }
214 }
215
216 pub(crate) fn get_char_index_near_point(
220 text_page_handle: FPDF_TEXTPAGE,
221 x: PdfPoints,
222 tolerance_x: PdfPoints,
223 y: PdfPoints,
224 tolerance_y: PdfPoints,
225 bindings: &dyn PdfiumLibraryBindings,
226 ) -> Option<PdfPageTextCharIndex> {
227 match bindings.FPDFText_GetCharIndexAtPos(
228 text_page_handle,
229 x.value as c_double,
230 y.value as c_double,
231 tolerance_x.value as c_double,
232 tolerance_y.value as c_double,
233 ) {
234 -1 => None, -3 => None, index => Some(index as PdfPageTextCharIndex),
237 }
238 }
239
240 pub fn all(&self) -> String {
247 self.inside_rect(self.page.page_size())
248 }
249
250 pub fn inside_rect(&self, rect: PdfRect) -> String {
258 let left = rect.left().value as f64;
268
269 let top = rect.top().value as f64;
270
271 let right = rect.right().value as f64;
272
273 let bottom = rect.bottom().value as f64;
274
275 let chars_count = self.bindings().FPDFText_GetBoundedText(
276 self.text_page_handle(),
277 left,
278 top,
279 right,
280 bottom,
281 null_mut(),
282 0,
283 );
284
285 if chars_count == 0 {
286 return String::new();
289 }
290
291 let mut buffer = create_sized_buffer(chars_count as usize);
292
293 let result = self.bindings().FPDFText_GetBoundedText(
294 self.text_page_handle(),
295 left,
296 top,
297 right,
298 bottom,
299 buffer.as_mut_ptr(),
300 chars_count,
301 );
302
303 assert_eq!(result, chars_count);
304
305 get_string_from_pdfium_utf16le_bytes(cast_slice(buffer.as_slice()).to_vec())
306 .unwrap_or_default()
307 }
308
309 pub fn for_object(&self, object: &PdfPageTextObject) -> String {
312 let buffer_length = self.bindings().FPDFTextObj_GetText(
322 object.object_handle(),
323 self.text_page_handle(),
324 null_mut(),
325 0,
326 );
327
328 if buffer_length == 0 {
329 return String::new();
332 }
333
334 let mut buffer = create_byte_buffer(buffer_length as usize);
335
336 let result = self.bindings().FPDFTextObj_GetText(
337 object.object_handle(),
338 self.text_page_handle(),
339 buffer.as_mut_ptr() as *mut FPDF_WCHAR,
340 buffer_length,
341 );
342
343 assert_eq!(result, buffer_length);
344
345 get_string_from_pdfium_utf16le_bytes(buffer).unwrap_or_default()
346 }
347
348 #[inline]
356 pub fn for_annotation(&self, annotation: &PdfPageAnnotation) -> Result<String, PdfiumError> {
357 let bounds = annotation.bounds()?;
358
359 Ok(self.inside_rect(bounds))
360 }
361
362 #[inline]
365 pub fn search(
366 &self,
367 text: &str,
368 options: &PdfSearchOptions,
369 ) -> Result<PdfPageTextSearch, PdfiumError> {
370 self.search_from(text, options, 0)
371 }
372
373 pub fn search_from(
377 &self,
378 text: &str,
379 options: &PdfSearchOptions,
380 index: PdfPageTextCharIndex,
381 ) -> Result<PdfPageTextSearch, PdfiumError> {
382 if text.is_empty() {
383 Err(PdfiumError::TextSearchTargetIsEmpty)
384 } else {
385 Ok(PdfPageTextSearch::from_pdfium(
386 self.bindings().FPDFText_FindStart(
387 self.text_page_handle(),
388 get_pdfium_utf16le_bytes_from_str(text).as_ptr() as FPDF_WIDESTRING,
389 options.as_pdfium(),
390 index as c_int,
391 ),
392 self,
393 self.bindings(),
394 ))
395 }
396 }
397}
398
399impl<'a> Display for PdfPageText<'a> {
400 #[inline]
401 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
402 f.write_str(self.all().as_str())
403 }
404}
405
406impl<'a> Drop for PdfPageText<'a> {
407 #[inline]
409 fn drop(&mut self) {
410 self.bindings().FPDFText_ClosePage(self.text_page_handle());
411 }
412}
413
414#[cfg(test)]
415mod tests {
416 use crate::prelude::*;
417 use crate::utils::test::test_bind_to_pdfium;
418
419 #[test]
420 fn test_overlapping_chars_results() -> Result<(), PdfiumError> {
421 let pdfium = test_bind_to_pdfium();
426
427 let mut document = pdfium.create_new_pdf()?;
430
431 let mut page = document
432 .pages_mut()
433 .create_page_at_start(PdfPagePaperSize::a4())?;
434
435 let font = document.fonts_mut().courier();
436
437 let txt1 = page.objects_mut().create_text_object(
438 PdfPoints::ZERO,
439 PdfPoints::ZERO,
440 "AAAAAA",
441 font,
442 PdfPoints::new(10.0),
443 )?;
444
445 let txt2 = page.objects_mut().create_text_object(
446 PdfPoints::ZERO,
447 PdfPoints::ZERO,
448 "BBBBBB",
449 font,
450 PdfPoints::new(10.0),
451 )?;
452
453 let txt3 = page.objects_mut().create_text_object(
454 PdfPoints::ZERO,
455 PdfPoints::ZERO,
456 "CDCDCDE",
457 font,
458 PdfPoints::new(10.0),
459 )?;
460
461 let page_text = page.text()?;
462
463 assert!(test_one_overlapping_text_object_results(
466 &txt1, &page_text, "AAAAAA"
467 )?);
468 assert!(test_one_overlapping_text_object_results(
469 &txt2, &page_text, "BBBBBB"
470 )?);
471 assert!(test_one_overlapping_text_object_results(
472 &txt3, &page_text, "CDCDCDE"
473 )?);
474
475 Ok(())
476 }
477
478 fn test_one_overlapping_text_object_results(
479 object: &PdfPageObject,
480 page_text: &PdfPageText,
481 expected: &str,
482 ) -> Result<bool, PdfiumError> {
483 if let Some(txt) = object.as_text_object() {
484 assert_eq!(txt.text().trim(), expected);
485 assert_eq!(page_text.for_object(txt).trim(), expected);
486
487 for (index, char) in txt.chars(&page_text)?.iter().enumerate() {
488 assert_eq!(txt.text().chars().nth(index), char.unicode_char());
489 assert_eq!(expected.chars().nth(index), char.unicode_char());
490 }
491
492 Ok(true)
493 } else {
494 Ok(false)
495 }
496 }
497}