pub struct PdfLoader { /* private fields */ }Expand description
Loads documents from a PDF file.
Uses pdf_extract to extract text content from PDF files. Supports two
modes of operation:
- Single document (default): All pages are combined into one
Document. - Split pages: Each page becomes a separate
Document, split on form feed characters (\x0c) thatpdf_extractinserts between pages.
§Examples
use synaptic_pdf::{PdfLoader, Loader};
// Load entire PDF as one document
let loader = PdfLoader::new("document.pdf");
let docs = loader.load().await?;
assert_eq!(docs.len(), 1);
// Load with one document per page
let loader = PdfLoader::with_split_pages("document.pdf");
let docs = loader.load().await?;
// docs.len() == number of pagesImplementations§
Source§impl PdfLoader
impl PdfLoader
Sourcepub fn new(path: impl Into<PathBuf>) -> Self
pub fn new(path: impl Into<PathBuf>) -> Self
Create a new PdfLoader that extracts all text as a single document.
Sourcepub fn with_split_pages(path: impl Into<PathBuf>) -> Self
pub fn with_split_pages(path: impl Into<PathBuf>) -> Self
Create a new PdfLoader that splits text into one document per page.
Page boundaries are detected by form feed characters (\x0c) inserted
by the PDF extraction library.
Trait Implementations§
Auto Trait Implementations§
impl Freeze for PdfLoader
impl RefUnwindSafe for PdfLoader
impl Send for PdfLoader
impl Sync for PdfLoader
impl Unpin for PdfLoader
impl UnsafeUnpin for PdfLoader
impl UnwindSafe for PdfLoader
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more