Skip to main content

pdfkit/
async_api.rs

1//! Executor-agnostic async wrappers for PDFKit document finding.
2//!
3//! Enabled with the `async` Cargo feature.
4//!
5//! [`PdfDocumentFindStream`] runs `PDFDocument.findString(_:withOptions:)` on a
6//! worker thread, converts every match into an owned Rust snapshot, and emits a
7//! bounded async stream backed by [`doom_fish_utils::stream::BoundedAsyncStream`].
8//!
9//! The stream emits synthetic `DidBeginFind` / `DidEndFind` notifications around
10//! the match sequence. Dropping it waits for the worker thread to finish and then
11//! closes the stream.
12//!
13//! # Example
14//!
15//! ```no_run
16//! use pdfkit::async_api::{
17//!     PdfDocumentFindEvent, PdfDocumentFindOptions, PdfDocumentFindStream,
18//! };
19//! use pdfkit::PdfDocument;
20//!
21//! # async fn run() -> pdfkit::Result<()> {
22//! let document = PdfDocument::from_url("examples/assets/hello.pdf")?;
23//! let stream = PdfDocumentFindStream::find_string(
24//!     &document,
25//!     "Hello",
26//!     PdfDocumentFindOptions::NONE,
27//!     8,
28//! )?;
29//!
30//! while let Some(event) = stream.next().await {
31//!     match event {
32//!         PdfDocumentFindEvent::Notification(notification) => {
33//!             println!("notification={}", notification.name());
34//!         }
35//!         PdfDocumentFindEvent::Match(found) => {
36//!             println!("match={:?} pages={:?}", found.text, found.pages);
37//!         }
38//!         PdfDocumentFindEvent::Failed(error) => {
39//!             eprintln!("search failed: {error}");
40//!         }
41//!     }
42//! }
43//! # Ok(())
44//! # }
45//! ```
46
47#![cfg(feature = "async")]
48
49use core::ffi::c_void;
50use core::fmt;
51use std::ops::BitOr;
52use std::ptr;
53use std::thread::{self, JoinHandle};
54
55use doom_fish_utils::stream::{AsyncStreamSender, BoundedAsyncStream, NextItem};
56use serde::Deserialize;
57
58use crate::error::{PdfKitError, Result};
59use crate::ffi;
60use crate::handle::ObjectHandle;
61use crate::util;
62use crate::{PdfDocument, PdfDocumentNotification, PdfTextRange};
63
64/// `PDFDocument.findString(_:withOptions:)` comparison options.
65#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
66pub struct PdfDocumentFindOptions(u64);
67
68impl PdfDocumentFindOptions {
69    /// Default PDFKit string-find behaviour.
70    pub const NONE: Self = Self(0);
71    /// Case-insensitive matching.
72    pub const CASE_INSENSITIVE: Self = Self(1);
73    /// Literal matching without locale-aware folding.
74    pub const LITERAL: Self = Self(1 << 1);
75    /// Search backwards from the end of the document.
76    pub const BACKWARDS: Self = Self(1 << 2);
77
78    /// Return the raw `NSString.CompareOptions` bit pattern forwarded to PDFKit.
79    #[must_use]
80    pub const fn bits(self) -> u64 {
81        self.0
82    }
83}
84
85impl BitOr for PdfDocumentFindOptions {
86    type Output = Self;
87
88    fn bitor(self, rhs: Self) -> Self::Output {
89        Self(self.0 | rhs.0)
90    }
91}
92
93/// One page of a match snapshot emitted by [`PdfDocumentFindStream`].
94#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
95pub struct PdfDocumentFindPageMatch {
96    /// Zero-based page index in the searched document.
97    pub page_index: usize,
98    /// Text ranges within that page that belong to the match.
99    pub ranges: Vec<PdfTextRange>,
100}
101
102/// Owned snapshot of one `PDFDocument` string match.
103#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
104pub struct PdfDocumentFindMatch {
105    /// Plain-text representation of the match, if PDFKit can provide one.
106    pub text: Option<String>,
107    /// Per-page ranges that make up the match.
108    pub pages: Vec<PdfDocumentFindPageMatch>,
109}
110
111/// Events emitted while a PDFKit string search is running.
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub enum PdfDocumentFindEvent {
114    /// Synthetic lifecycle notifications emitted by the worker thread.
115    Notification(PdfDocumentNotification),
116    /// One owned match snapshot.
117    Match(PdfDocumentFindMatch),
118    /// Search failure reported by the worker thread.
119    Failed(PdfKitError),
120}
121
122struct SearchThreadHandle {
123    join: Option<JoinHandle<()>>,
124}
125
126impl Drop for SearchThreadHandle {
127    fn drop(&mut self) {
128        if let Some(join) = self.join.take() {
129            let _ = join.join();
130        }
131    }
132}
133
134impl fmt::Debug for SearchThreadHandle {
135    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
136        f.debug_struct("SearchThreadHandle")
137            .field("thread_running", &self.join.is_some())
138            .finish_non_exhaustive()
139    }
140}
141
142fn push_error(sender: &AsyncStreamSender<PdfDocumentFindEvent>, error: PdfKitError) {
143    sender.push(PdfDocumentFindEvent::Failed(error));
144}
145
146/// Async stream of `PDFDocument` find notifications and match snapshots.
147#[derive(Debug)]
148pub struct PdfDocumentFindStream {
149    inner: BoundedAsyncStream<PdfDocumentFindEvent>,
150    _handle: SearchThreadHandle,
151}
152
153impl PdfDocumentFindStream {
154    /// Start an async document search.
155    pub fn find_string(
156        document: &PdfDocument,
157        needle: &str,
158        options: PdfDocumentFindOptions,
159        capacity: usize,
160    ) -> Result<Self> {
161        if capacity == 0 {
162            return Err(PdfKitError::new(
163                ffi::status::INVALID_ARGUMENT,
164                "async stream capacity must be > 0",
165            ));
166        }
167
168        let needle = util::c_string(needle)?;
169        let document_ptr = unsafe { ffi::pdf_object_retain(document.as_handle_ptr()) };
170        if document_ptr.is_null() {
171            return Err(PdfKitError::new(
172                ffi::status::NULL_RESULT,
173                "PDFDocument retain returned null",
174            ));
175        }
176
177        let document_addr = document_ptr as usize;
178        let (stream, sender) = BoundedAsyncStream::new(capacity);
179        let join = thread::spawn(move || {
180            let Some(handle) =
181                (unsafe { ObjectHandle::from_retained_ptr(document_addr as *mut c_void) })
182            else {
183                push_error(
184                    &sender,
185                    PdfKitError::new(ffi::status::NULL_RESULT, "PDFDocument retain returned null"),
186                );
187                return;
188            };
189            let document = PdfDocument::from_handle(handle);
190
191            sender.push(PdfDocumentFindEvent::Notification(
192                PdfDocumentNotification::DidBeginFind,
193            ));
194
195            let mut out_error = ptr::null_mut();
196            let json_ptr = unsafe {
197                ffi::pdf_document_find_string_json(
198                    document.as_handle_ptr(),
199                    needle.as_ptr(),
200                    options.bits(),
201                    &mut out_error,
202                )
203            };
204
205            let Some(json) = util::take_string(json_ptr) else {
206                let message = util::take_string(out_error)
207                    .unwrap_or_else(|| "PDFDocument.findString returned null".to_string());
208                push_error(&sender, PdfKitError::new(ffi::status::FRAMEWORK, message));
209                return;
210            };
211
212            match serde_json::from_str::<Vec<PdfDocumentFindMatch>>(&json) {
213                Ok(matches) => {
214                    for found in matches {
215                        sender.push(PdfDocumentFindEvent::Match(found));
216                    }
217                    sender.push(PdfDocumentFindEvent::Notification(
218                        PdfDocumentNotification::DidEndFind,
219                    ));
220                }
221                Err(error) => push_error(
222                    &sender,
223                    PdfKitError::new(
224                        ffi::status::FRAMEWORK,
225                        format!("failed to parse PDFDocument find results: {error}"),
226                    ),
227                ),
228            }
229        });
230
231        Ok(Self {
232            inner: stream,
233            _handle: SearchThreadHandle { join: Some(join) },
234        })
235    }
236
237    /// Await the next find event.
238    #[must_use]
239    pub const fn next(&self) -> NextItem<'_, PdfDocumentFindEvent> {
240        self.inner.next()
241    }
242
243    /// Return the next buffered event without waiting.
244    #[must_use]
245    pub fn try_next(&self) -> Option<PdfDocumentFindEvent> {
246        self.inner.try_next()
247    }
248
249    /// Return the number of buffered events.
250    #[must_use]
251    pub fn buffered_count(&self) -> usize {
252        self.inner.buffered_count()
253    }
254
255    /// Return `true` once the worker thread has finished and the stream has closed.
256    #[must_use]
257    pub fn is_closed(&self) -> bool {
258        self.inner.is_closed()
259    }
260}