Skip to main content

Document

Struct Document 

Source
pub struct Document { /* private fields */ }
Expand description

A high-level PDF document handle.

Provides ergonomic access to pages, text, rendering, metadata, annotations, forms, signatures, and modification.

Implementations§

Source§

impl Document

Source

pub fn open<P: AsRef<Path>>(path: P) -> Result<Self>

Open a PDF file from a path.

Examples found in repository?
examples/render_page.rs (line 14)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: render_page <pdf-file> [page] [dpi] [output.png]");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15    let page_idx: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(0);
16    let dpi: f64 = args.get(3).and_then(|s| s.parse().ok()).unwrap_or(150.0);
17    let output = args.get(4).map(|s| s.as_str()).unwrap_or("output.png");
18
19    let page = doc.page(page_idx)?;
20    println!("Page {}: {:.0}x{:.0} pt, rotation {}", page_idx + 1, page.width(), page.height(), page.rotation());
21
22    let png = page.render_png(dpi)?;
23    std::fs::write(output, &png)?;
24    println!("Rendered to {output} ({} bytes, {dpi} DPI)", png.len());
25
26    Ok(())
27}
More examples
Hide additional examples
examples/basic_read.rs (line 14)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: basic_read <pdf-file>");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15
16    println!("Version: {}", doc.version_string());
17    println!("Pages: {}", doc.page_count());
18    println!("Encrypted: {}", doc.is_encrypted());
19
20    if let Some(title) = doc.title() {
21        println!("Title: {title}");
22    }
23    if let Some(author) = doc.author() {
24        println!("Author: {author}");
25    }
26
27    // Extract text from first page
28    if let Ok(page) = doc.page(0) {
29        println!("\n--- Page 1 text ---");
30        match page.text() {
31            Ok(text) => println!("{}", &text[..text.len().min(500)]),
32            Err(e) => println!("(text extraction failed: {e})"),
33        }
34    }
35
36    Ok(())
37}
Source

pub fn from_bytes(data: Vec<u8>) -> Result<Self>

Parse a PDF from in-memory bytes.

Source

pub fn authenticate(&mut self, password: &[u8]) -> Result<()>

Authenticate an encrypted document with a password.

Source

pub fn page_count(&self) -> usize

Number of pages in the document.

Examples found in repository?
examples/basic_read.rs (line 17)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: basic_read <pdf-file>");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15
16    println!("Version: {}", doc.version_string());
17    println!("Pages: {}", doc.page_count());
18    println!("Encrypted: {}", doc.is_encrypted());
19
20    if let Some(title) = doc.title() {
21        println!("Title: {title}");
22    }
23    if let Some(author) = doc.author() {
24        println!("Author: {author}");
25    }
26
27    // Extract text from first page
28    if let Ok(page) = doc.page(0) {
29        println!("\n--- Page 1 text ---");
30        match page.text() {
31            Ok(text) => println!("{}", &text[..text.len().min(500)]),
32            Err(e) => println!("(text extraction failed: {e})"),
33        }
34    }
35
36    Ok(())
37}
Source

pub fn page(&self, index: usize) -> Result<Page<'_>>

Get a page by 0-based index.

Examples found in repository?
examples/render_page.rs (line 19)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: render_page <pdf-file> [page] [dpi] [output.png]");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15    let page_idx: usize = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(0);
16    let dpi: f64 = args.get(3).and_then(|s| s.parse().ok()).unwrap_or(150.0);
17    let output = args.get(4).map(|s| s.as_str()).unwrap_or("output.png");
18
19    let page = doc.page(page_idx)?;
20    println!("Page {}: {:.0}x{:.0} pt, rotation {}", page_idx + 1, page.width(), page.height(), page.rotation());
21
22    let png = page.render_png(dpi)?;
23    std::fs::write(output, &png)?;
24    println!("Rendered to {output} ({} bytes, {dpi} DPI)", png.len());
25
26    Ok(())
27}
More examples
Hide additional examples
examples/basic_read.rs (line 28)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: basic_read <pdf-file>");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15
16    println!("Version: {}", doc.version_string());
17    println!("Pages: {}", doc.page_count());
18    println!("Encrypted: {}", doc.is_encrypted());
19
20    if let Some(title) = doc.title() {
21        println!("Title: {title}");
22    }
23    if let Some(author) = doc.author() {
24        println!("Author: {author}");
25    }
26
27    // Extract text from first page
28    if let Ok(page) = doc.page(0) {
29        println!("\n--- Page 1 text ---");
30        match page.text() {
31            Ok(text) => println!("{}", &text[..text.len().min(500)]),
32            Err(e) => println!("(text extraction failed: {e})"),
33        }
34    }
35
36    Ok(())
37}
Source

pub fn pages(&self) -> PageIter<'_>

Iterate over all pages.

Source

pub fn version(&self) -> (u8, u8)

PDF version (e.g., (1, 7) for PDF 1.7).

Source

pub fn version_string(&self) -> String

PDF version as a string (e.g. “1.7”).

Examples found in repository?
examples/basic_read.rs (line 16)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: basic_read <pdf-file>");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15
16    println!("Version: {}", doc.version_string());
17    println!("Pages: {}", doc.page_count());
18    println!("Encrypted: {}", doc.is_encrypted());
19
20    if let Some(title) = doc.title() {
21        println!("Title: {title}");
22    }
23    if let Some(author) = doc.author() {
24        println!("Author: {author}");
25    }
26
27    // Extract text from first page
28    if let Ok(page) = doc.page(0) {
29        println!("\n--- Page 1 text ---");
30        match page.text() {
31            Ok(text) => println!("{}", &text[..text.len().min(500)]),
32            Err(e) => println!("(text extraction failed: {e})"),
33        }
34    }
35
36    Ok(())
37}
Source

pub fn is_encrypted(&self) -> bool

Whether the document is encrypted.

Examples found in repository?
examples/basic_read.rs (line 18)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: basic_read <pdf-file>");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15
16    println!("Version: {}", doc.version_string());
17    println!("Pages: {}", doc.page_count());
18    println!("Encrypted: {}", doc.is_encrypted());
19
20    if let Some(title) = doc.title() {
21        println!("Title: {title}");
22    }
23    if let Some(author) = doc.author() {
24        println!("Author: {author}");
25    }
26
27    // Extract text from first page
28    if let Ok(page) = doc.page(0) {
29        println!("\n--- Page 1 text ---");
30        match page.text() {
31            Ok(text) => println!("{}", &text[..text.len().min(500)]),
32            Err(e) => println!("(text extraction failed: {e})"),
33        }
34    }
35
36    Ok(())
37}
Source

pub fn is_authenticated(&self) -> bool

Whether the document is authenticated (or not encrypted).

Source

pub fn is_linearized(&self) -> bool

Whether the document is linearized (web-optimized).

Source

pub fn title(&self) -> Option<String>

Get document title from metadata.

Examples found in repository?
examples/basic_read.rs (line 20)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: basic_read <pdf-file>");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15
16    println!("Version: {}", doc.version_string());
17    println!("Pages: {}", doc.page_count());
18    println!("Encrypted: {}", doc.is_encrypted());
19
20    if let Some(title) = doc.title() {
21        println!("Title: {title}");
22    }
23    if let Some(author) = doc.author() {
24        println!("Author: {author}");
25    }
26
27    // Extract text from first page
28    if let Ok(page) = doc.page(0) {
29        println!("\n--- Page 1 text ---");
30        match page.text() {
31            Ok(text) => println!("{}", &text[..text.len().min(500)]),
32            Err(e) => println!("(text extraction failed: {e})"),
33        }
34    }
35
36    Ok(())
37}
Source

pub fn author(&self) -> Option<String>

Get document author from metadata.

Examples found in repository?
examples/basic_read.rs (line 23)
7fn main() -> Result<(), Box<dyn std::error::Error>> {
8    let args: Vec<String> = std::env::args().collect();
9    if args.len() < 2 {
10        eprintln!("Usage: basic_read <pdf-file>");
11        std::process::exit(1);
12    }
13
14    let doc = Document::open(&args[1])?;
15
16    println!("Version: {}", doc.version_string());
17    println!("Pages: {}", doc.page_count());
18    println!("Encrypted: {}", doc.is_encrypted());
19
20    if let Some(title) = doc.title() {
21        println!("Title: {title}");
22    }
23    if let Some(author) = doc.author() {
24        println!("Author: {author}");
25    }
26
27    // Extract text from first page
28    if let Ok(page) = doc.page(0) {
29        println!("\n--- Page 1 text ---");
30        match page.text() {
31            Ok(text) => println!("{}", &text[..text.len().min(500)]),
32            Err(e) => println!("(text extraction failed: {e})"),
33        }
34    }
35
36    Ok(())
37}
Source

pub fn subject(&self) -> Option<String>

Get document subject from metadata.

Source

pub fn keywords(&self) -> Option<String>

Get document keywords from metadata.

Source

pub fn creator(&self) -> Option<String>

Get document creator from metadata.

Source

pub fn producer(&self) -> Option<String>

Get document producer from metadata.

Source

pub fn creation_date(&self) -> Option<String>

Get creation date string from metadata.

Source

pub fn modification_date(&self) -> Option<String>

Get modification date string from metadata.

Source

pub fn metadata(&self) -> Vec<(String, String)>

Get all document metadata as key-value pairs.

Source

pub fn text(&self) -> Result<String>

Extract text from all pages, concatenated.

Source

pub fn search(&self, query: &str) -> Result<Vec<(usize, Vec<SearchResult>)>>

Search for text across all pages. Returns (page_index, matches) pairs.

Source

pub fn outlines(&self) -> Result<Vec<OutlineItem>>

Get bookmarks/outlines.

Source

pub fn page_labels(&self) -> Result<Vec<PageLabelRange>>

Get page labels.

Source

pub fn annotations(&self, page_index: usize) -> Result<Vec<Annotation>>

Get annotations for a specific page.

Source

pub fn form_fields(&self) -> Result<Option<AcroForm>>

Get form fields (if any).

Source

pub fn embedded_files(&self) -> Result<Vec<FileSpec>>

Get embedded files.

Source

pub fn signatures(&self) -> Result<Vec<SignatureInfo>>

Get digital signature information.

Source

pub fn modify(&self) -> Result<Modifier>

Create a modifier for editing this document.

The modifier works on a copy of the raw PDF bytes, so the original Document is not affected.

Source

pub fn inner(&self) -> &PdfDocument

Get the underlying PdfDocument for low-level access.

Source

pub fn inner_mut(&mut self) -> &mut PdfDocument

Get a mutable reference to the underlying PdfDocument.

Source

pub fn into_inner(self) -> PdfDocument

Consume this Document and return the underlying PdfDocument.

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V