pub struct PdfFileLoader<'a, T> { /* private fields */ }
pdf
only.Expand description
PdfFileLoader is a utility for loading pdf files from the filesystem using glob patterns or directory paths. It provides methods to read file contents and handle errors gracefully.
§Errors
This module defines a custom error type PdfLoaderError which can represent various errors that might occur during file loading operations, such as any FileLoaderError alongside specific PDF-related errors.
§Example Usage
use rig:loaders::PdfileLoader;
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Create a FileLoader using a glob pattern
let loader = PdfFileLoader::with_glob("tests/data/*.pdf")?;
// Load pdf file contents by page, ignoring any errors
let contents: Vec<String> = loader
.load_with_path()
.ignore_errors()
.by_page()
for content in contents {
println!("{}", content);
}
Ok(())
}
PdfFileLoader uses strict typing between the iterator methods to ensure that transitions between different implementations of the loaders and it’s methods are handled properly by the compiler.
Implementations§
Source§impl<'a> PdfFileLoader<'a, Result<PathBuf, PdfLoaderError>>
impl<'a> PdfFileLoader<'a, Result<PathBuf, PdfLoaderError>>
Sourcepub fn load(self) -> PdfFileLoader<'a, Result<Document, PdfLoaderError>>
pub fn load(self) -> PdfFileLoader<'a, Result<Document, PdfLoaderError>>
Loads the contents of the pdfs within the iterator returned by PdfFileLoader::with_glob or PdfFileLoader::with_dir. Loaded PDF documents are raw PDF instances that can be further processed (by page, etc).
§Example
Load pdfs in directory “tests/data/*.pdf” and return the loaded documents
let content = PdfFileLoader::with_glob("tests/data/*.pdf")?.load().into_iter();
for result in content {
match result {
Ok((path, doc)) => println!("{:?} {}", path, doc),
Err(e) => eprintln!("Error reading pdf: {}", e),
}
}
Sourcepub fn load_with_path(
self,
) -> PdfFileLoader<'a, Result<(PathBuf, Document), PdfLoaderError>>
pub fn load_with_path( self, ) -> PdfFileLoader<'a, Result<(PathBuf, Document), PdfLoaderError>>
Loads the contents of the pdfs within the iterator returned by PdfFileLoader::with_glob or PdfFileLoader::with_dir. Loaded PDF documents are raw PDF instances with their path that can be further processed.
§Example
Load pdfs in directory “tests/data/*.pdf” and return the loaded documents
let content = PdfFileLoader::with_glob("tests/data/*.pdf")?.load_with_path().into_iter();
for result in content {
match result {
Ok((path, doc)) => println!("{:?} {}", path, doc),
Err(e) => eprintln!("Error reading pdf: {}", e),
}
}
Source§impl<'a> PdfFileLoader<'a, Result<PathBuf, PdfLoaderError>>
impl<'a> PdfFileLoader<'a, Result<PathBuf, PdfLoaderError>>
Sourcepub fn read(self) -> PdfFileLoader<'a, Result<String, PdfLoaderError>>
pub fn read(self) -> PdfFileLoader<'a, Result<String, PdfLoaderError>>
Directly reads the contents of the pdfs within the iterator returned by PdfFileLoader::with_glob or PdfFileLoader::with_dir.
§Example
Read pdfs in directory “tests/data/*.pdf” and return the contents of the documents.
let content = PdfFileLoader::with_glob("tests/data/*.pdf")?.read_with_path().into_iter();
for result in content {
match result {
Ok((path, content)) => println!("{}", content),
Err(e) => eprintln!("Error reading pdf: {}", e),
}
}
Sourcepub fn read_with_path(
self,
) -> PdfFileLoader<'a, Result<(PathBuf, String), PdfLoaderError>>
pub fn read_with_path( self, ) -> PdfFileLoader<'a, Result<(PathBuf, String), PdfLoaderError>>
Directly reads the contents of the pdfs within the iterator returned by PdfFileLoader::with_glob or PdfFileLoader::with_dir and returns the path along with the content.
§Example
Read pdfs in directory “tests/data/*.pdf” and return the content and paths of the documents.
let content = PdfFileLoader::with_glob("tests/data/*.pdf")?.read_with_path().into_iter();
for result in content {
match result {
Ok((path, content)) => println!("{:?} {}", path, content),
Err(e) => eprintln!("Error reading pdf: {}", e),
}
}
Source§impl<'a> PdfFileLoader<'a, Document>
impl<'a> PdfFileLoader<'a, Document>
Sourcepub fn by_page(self) -> PdfFileLoader<'a, Result<String, PdfLoaderError>>
pub fn by_page(self) -> PdfFileLoader<'a, Result<String, PdfLoaderError>>
Chunks the pages of a loaded document by page, flattened as a single vector.
§Example
Load pdfs in directory “tests/data/*.pdf” and chunk all document into it’s pages.
let content = PdfFileLoader::with_glob("tests/data/*.pdf")?.load().by_page().into_iter();
for result in content {
match result {
Ok(page) => println!("{}", page),
Err(e) => eprintln!("Error reading pdf: {}", e),
}
}
Source§impl<'a> PdfFileLoader<'a, (PathBuf, Document)>
impl<'a> PdfFileLoader<'a, (PathBuf, Document)>
Sourcepub fn by_page(
self,
) -> PdfFileLoader<'a, (PathBuf, Vec<(usize, Result<String, PdfLoaderError>)>)>
pub fn by_page( self, ) -> PdfFileLoader<'a, (PathBuf, Vec<(usize, Result<String, PdfLoaderError>)>)>
Chunks the pages of a loaded document by page, processed as a vector of documents by path which each document container an inner vector of pages by page number.
§Example
Read pdfs in directory “tests/data/*.pdf” and chunk all documents by path by it’s pages.
let content = PdfFileLoader::with_glob("tests/data/*.pdf")?
.load_with_path()
.by_page()
.into_iter();
for result in content {
match result {
Ok(documents) => {
for doc in documents {
match doc {
Ok((pageno, content)) => println!("Page {}: {}", pageno, content),
Err(e) => eprintln!("Error reading page: {}", e),
}
}
},
Err(e) => eprintln!("Error reading pdf: {}", e),
}
}
Source§impl<'a> PdfFileLoader<'a, (PathBuf, Vec<(usize, Result<String, PdfLoaderError>)>)>
impl<'a> PdfFileLoader<'a, (PathBuf, Vec<(usize, Result<String, PdfLoaderError>)>)>
Sourcepub fn ignore_errors(self) -> PdfFileLoader<'a, (PathBuf, Vec<(usize, String)>)>
pub fn ignore_errors(self) -> PdfFileLoader<'a, (PathBuf, Vec<(usize, String)>)>
Ignores errors in the iterator, returning only successful results. This can be used on any PdfFileLoader state of iterator whose items are results.
§Example
Read files in directory “tests/data/*.pdf” and ignore errors from unreadable files.
let content = FileLoader::with_glob("tests/data/*.pdf")?.read().ignore_errors().into_iter();
for result in content {
println!("{}", content)
}
Source§impl<'a, T> PdfFileLoader<'a, Result<T, PdfLoaderError>>where
T: 'a,
impl<'a, T> PdfFileLoader<'a, Result<T, PdfLoaderError>>where
T: 'a,
Sourcepub fn ignore_errors(self) -> PdfFileLoader<'a, T>
pub fn ignore_errors(self) -> PdfFileLoader<'a, T>
Ignores errors in the iterator, returning only successful results. This can be used on any PdfFileLoader state of iterator whose items are results.
§Example
Read files in directory “tests/data/*.pdf” and ignore errors from unreadable files.
let content = FileLoader::with_glob("tests/data/*.pdf")?.read().ignore_errors().into_iter();
for result in content {
println!("{}", content)
}
Source§impl PdfFileLoader<'_, Result<PathBuf, FileLoaderError>>
impl PdfFileLoader<'_, Result<PathBuf, FileLoaderError>>
Sourcepub fn with_glob(
pattern: &str,
) -> Result<PdfFileLoader<'_, Result<PathBuf, PdfLoaderError>>, PdfLoaderError>
pub fn with_glob( pattern: &str, ) -> Result<PdfFileLoader<'_, Result<PathBuf, PdfLoaderError>>, PdfLoaderError>
Creates a new PdfFileLoader using a glob pattern to match files.
§Example
Create a PdfFileLoader for all .pdf
files that match the glob “tests/data/*.pdf”.
let loader = FileLoader::with_glob("tests/data/*.txt")?;
Sourcepub fn with_dir(
directory: &str,
) -> Result<PdfFileLoader<'_, Result<PathBuf, PdfLoaderError>>, PdfLoaderError>
pub fn with_dir( directory: &str, ) -> Result<PdfFileLoader<'_, Result<PathBuf, PdfLoaderError>>, PdfLoaderError>
Creates a new PdfFileLoader on all files within a directory.
§Example
Create a PdfFileLoader for all files that are in the directory “files”.
let loader = PdfFileLoader::with_dir("files")?;
Source§impl<'a> PdfFileLoader<'a, Vec<u8>>
impl<'a> PdfFileLoader<'a, Vec<u8>>
Sourcepub fn from_bytes(bytes: Vec<u8>) -> PdfFileLoader<'a, Vec<u8>>
pub fn from_bytes(bytes: Vec<u8>) -> PdfFileLoader<'a, Vec<u8>>
Ingest a PDF as a byte array.
Sourcepub fn from_bytes_multi(bytes_vec: Vec<Vec<u8>>) -> PdfFileLoader<'a, Vec<u8>>
pub fn from_bytes_multi(bytes_vec: Vec<Vec<u8>>) -> PdfFileLoader<'a, Vec<u8>>
Ingest multiple byte arrays.
Sourcepub fn load(self) -> PdfFileLoader<'a, Result<Document, PdfLoaderError>>
pub fn load(self) -> PdfFileLoader<'a, Result<Document, PdfLoaderError>>
Use this once you’ve created the loader to load the document in.
Sourcepub fn load_with_path(
self,
) -> PdfFileLoader<'a, Result<(PathBuf, Document), PdfLoaderError>>
pub fn load_with_path( self, ) -> PdfFileLoader<'a, Result<(PathBuf, Document), PdfLoaderError>>
Use this once you’ve created the loader to load the document in (and get the path).
Trait Implementations§
Auto Trait Implementations§
impl<'a, T> Freeze for PdfFileLoader<'a, T>
impl<'a, T> !RefUnwindSafe for PdfFileLoader<'a, T>
impl<'a, T> !Send for PdfFileLoader<'a, T>
impl<'a, T> !Sync for PdfFileLoader<'a, T>
impl<'a, T> Unpin for PdfFileLoader<'a, T>
impl<'a, T> !UnwindSafe for PdfFileLoader<'a, T>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more