Struct PdfiumDocument

Source

pub struct PdfiumDocument { /* private fields */ }

Expand description

Rust interface to FPDF_DOCUMENT

Implementations§

Source §

impl PdfiumDocument

Source

pub fn new() -> PdfiumResult<Self>

Creates a new empty PdfiumDocument

Examples found in repository ?

examples/import_pages.rs (line 43)

40pub fn example_import_pages() -> PdfiumResult<()> {
41    // Create a new, empty PDF document that will serve as our destination
42    // This document starts with 0 pages and we'll add pages to it
43    let document = PdfiumDocument::new()?;
44
45    // Load the source PDF document from which we'll extract pages
46    // The second parameter (None) means we're not providing a password
47    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
48
49    // Import specific pages from the source document into our destination document
50    // Parameters breakdown:
51    // - &src_doc: Reference to the source document to import from
52    // - "12,14,30-34": String specifying which pages to import
53    //   * Page 12 (individual page)
54    //   * Page 14 (individual page)
55    //   * Pages 30-34 (range of 5 pages: 30, 31, 32, 33, 34)
56    //   * Total: 7 pages will be imported
57    // - 0: Index position where imported pages should be inserted
58    //   * 0 means insert at the beginning of the destination document
59    //   * If the destination had existing pages, imported pages would be inserted before them
60    document.pages().import(&src_doc, "12,14,30-34", 0)?;
61
62    // Save the destination document with imported pages to a new file
63    // The second parameter (None) indicates we're not specifying a version
64    document.save_to_path("pride-1.pdf", None)?;
65
66    // Verification step: reload the saved document to confirm the operation
67    let document = PdfiumDocument::new_from_path("pride-1.pdf", None)?;
68
69    // Get the total number of pages in the saved document
70    let page_count = document.page_count();
71
72    // Assert that we have exactly 7 pages as expected
73    // This confirms that all specified pages were imported correctly:
74    // 1 page (12) + 1 page (14) + 5 pages (30-34) = 7 pages total
75    assert_eq!(page_count, 7);
76
77    Ok(())
78}
79
80/// Demonstrates importing PDF pages using index-based page specification.
81///
82/// This function shows an alternative approach to page importing using
83/// explicit page indices rather than string specifications. This method
84/// provides more programmatic control and is useful when:
85/// - You have a dynamically generated list of page numbers
86/// - You need to import non-contiguous pages with complex patterns
87/// - You're working with 0-based indexing in your application logic
88///
89/// Key differences from string-based approach:
90/// - Uses 0-based indexing (first page is index 0)
91/// - Requires explicit specification of each page index
92/// - More verbose but offers precise control
93/// - Better for programmatic generation of page lists
94pub fn example_import_pages_by_index() -> PdfiumResult<()> {
95    // Create a new, empty PDF document to serve as our destination
96    let document = PdfiumDocument::new()?;
97
98    // Load the same source PDF document as in the previous example
99    // We're extracting the same pages but using a different method
100    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
101
102    // Import pages using explicit 0-based indices
103    // Parameters breakdown:
104    // - &src_doc: Reference to the source document
105    // - Some(&[11, 13, 29, 30, 31, 32, 33]): Vector of 0-based page indices
106    //   * Index 11 = Page 12 in human numbering (11 + 1 = 12)
107    //   * Index 13 = Page 14 in human numbering (13 + 1 = 14)
108    //   * Indices 29-33 = Pages 30-34 in human numbering
109    //   * Note: This matches exactly the same pages as "12,14,30-34" from the previous example
110    //   * The Some() wrapper indicates we're providing a specific list of indices
111    //   * Using None instead would import all pages from the source document
112    // - 0: Insertion position (beginning of destination document)
113    document
114        .pages()
115        .import_by_index(&src_doc, Some(&[11, 13, 29, 30, 31, 32, 33]), 0)?;
116
117    // Save the document with a different filename to distinguish from the first example
118    // Even though the content should be identical, using different names helps with testing
119    document.save_to_path("pride-2.pdf", None)?;
120
121    // Verification: reload the saved document to ensure it was created correctly
122    let document = PdfiumDocument::new_from_path("pride-2.pdf", None)?;
123
124    // Count the pages in the saved document
125    let page_count = document.page_count();
126
127    // Verify that we have the same 7 pages as the string-based method
128    // This confirms both methods produce identical results:
129    // - 1 page at index 11 (page 12)
130    // - 1 page at index 13 (page 14)
131    // - 5 pages at indices 29-33 (pages 30-34)
132    // Total: 7 pages
133    assert_eq!(page_count, 7);
134
135    Ok(())
136}

Source

pub fn new_from_path<P: AsRef<Path>>( path: P, password: Option<&str>, ) -> PdfiumResult<Self>

Load a PdfiumDocument from a Path

Examples found in repository ?

examples/import_pages.rs (line 47)

40pub fn example_import_pages() -> PdfiumResult<()> {
41    // Create a new, empty PDF document that will serve as our destination
42    // This document starts with 0 pages and we'll add pages to it
43    let document = PdfiumDocument::new()?;
44
45    // Load the source PDF document from which we'll extract pages
46    // The second parameter (None) means we're not providing a password
47    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
48
49    // Import specific pages from the source document into our destination document
50    // Parameters breakdown:
51    // - &src_doc: Reference to the source document to import from
52    // - "12,14,30-34": String specifying which pages to import
53    //   * Page 12 (individual page)
54    //   * Page 14 (individual page)
55    //   * Pages 30-34 (range of 5 pages: 30, 31, 32, 33, 34)
56    //   * Total: 7 pages will be imported
57    // - 0: Index position where imported pages should be inserted
58    //   * 0 means insert at the beginning of the destination document
59    //   * If the destination had existing pages, imported pages would be inserted before them
60    document.pages().import(&src_doc, "12,14,30-34", 0)?;
61
62    // Save the destination document with imported pages to a new file
63    // The second parameter (None) indicates we're not specifying a version
64    document.save_to_path("pride-1.pdf", None)?;
65
66    // Verification step: reload the saved document to confirm the operation
67    let document = PdfiumDocument::new_from_path("pride-1.pdf", None)?;
68
69    // Get the total number of pages in the saved document
70    let page_count = document.page_count();
71
72    // Assert that we have exactly 7 pages as expected
73    // This confirms that all specified pages were imported correctly:
74    // 1 page (12) + 1 page (14) + 5 pages (30-34) = 7 pages total
75    assert_eq!(page_count, 7);
76
77    Ok(())
78}
79
80/// Demonstrates importing PDF pages using index-based page specification.
81///
82/// This function shows an alternative approach to page importing using
83/// explicit page indices rather than string specifications. This method
84/// provides more programmatic control and is useful when:
85/// - You have a dynamically generated list of page numbers
86/// - You need to import non-contiguous pages with complex patterns
87/// - You're working with 0-based indexing in your application logic
88///
89/// Key differences from string-based approach:
90/// - Uses 0-based indexing (first page is index 0)
91/// - Requires explicit specification of each page index
92/// - More verbose but offers precise control
93/// - Better for programmatic generation of page lists
94pub fn example_import_pages_by_index() -> PdfiumResult<()> {
95    // Create a new, empty PDF document to serve as our destination
96    let document = PdfiumDocument::new()?;
97
98    // Load the same source PDF document as in the previous example
99    // We're extracting the same pages but using a different method
100    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
101
102    // Import pages using explicit 0-based indices
103    // Parameters breakdown:
104    // - &src_doc: Reference to the source document
105    // - Some(&[11, 13, 29, 30, 31, 32, 33]): Vector of 0-based page indices
106    //   * Index 11 = Page 12 in human numbering (11 + 1 = 12)
107    //   * Index 13 = Page 14 in human numbering (13 + 1 = 14)
108    //   * Indices 29-33 = Pages 30-34 in human numbering
109    //   * Note: This matches exactly the same pages as "12,14,30-34" from the previous example
110    //   * The Some() wrapper indicates we're providing a specific list of indices
111    //   * Using None instead would import all pages from the source document
112    // - 0: Insertion position (beginning of destination document)
113    document
114        .pages()
115        .import_by_index(&src_doc, Some(&[11, 13, 29, 30, 31, 32, 33]), 0)?;
116
117    // Save the document with a different filename to distinguish from the first example
118    // Even though the content should be identical, using different names helps with testing
119    document.save_to_path("pride-2.pdf", None)?;
120
121    // Verification: reload the saved document to ensure it was created correctly
122    let document = PdfiumDocument::new_from_path("pride-2.pdf", None)?;
123
124    // Count the pages in the saved document
125    let page_count = document.page_count();
126
127    // Verify that we have the same 7 pages as the string-based method
128    // This confirms both methods produce identical results:
129    // - 1 page at index 11 (page 12)
130    // - 1 page at index 13 (page 14)
131    // - 5 pages at indices 29-33 (pages 30-34)
132    // Total: 7 pages
133    assert_eq!(page_count, 7);
134
135    Ok(())
136}

More examples

Hide additional examples

examples/text_extract_search.rs (line 26)

23pub fn example_extract_text() -> PdfiumResult<()> {
24    // Load the PDF document from the specified file path
25    // The second parameter (None) indicates no password is required
26    let document = PdfiumDocument::new_from_path("resources/chapter1.pdf", None)?;
27
28    // Iterate through all pages in the document
29    // enumerate() provides both the index and the page object
30    for (index, page) in document.pages().enumerate() {
31        // Extract the full text content from the current page
32        // The ?. operators handle potential errors at each step:
33        // - page? ensures the page loaded successfully
34        // - .text()? extracts text objects from the page
35        // - .full() gets the complete text content as a string
36        let text = page?.text()?.full();
37
38        // Print formatted output for each page
39        println!("Page {}", index + 1); // Pages are 1-indexed for user display
40        println!("------");
41        println!("{text}");
42        println!() // Empty line for separation between pages
43    }
44
45    // Expected output:
46    //
47    // Page 1
48    // ------
49    //
50    // Page 2
51    // ------
52    // Ruskin
53    // House.
54    // 156. Charing
55    // Cross Road.
56    // London
57    // George Allen.
58    //
59    // Page 3
60    // ------
61    //
62    // Page 4
63    // ------
64    // I
65    // Chapter I.
66    // T is a truth universally acknowledged, that a single man in possession of a good
67    // fortune must be in want of a wife.
68    // However little known the feelings or views of such a man may be on his first
69    // entering a neighbourhood, this truth is so well fixed in the minds of the surrounding
70    // families, that he is considered as the rightful property of some one or other of their
71    // daughters.
72    // “My dear Mr. Bennet,” said his lady to him one day, “have you heard that
73    // Netherfield Park is let at last?”
74    // ...
75
76    Ok(())
77}
78
79/// Demonstrates text search functionality within a PDF document
80pub fn example_search() -> PdfiumResult<()> {
81    // Load the PDF document to search within
82    let document = PdfiumDocument::new_from_path("resources/groningen.pdf", None)?;
83
84    // Get the first page (index 0) for searching
85    let page = document.page(0)?;
86
87    // Extract text objects from the page for searching
88    let text = page.text()?;
89
90    // Search for "amsterdam" with case-insensitive matching
91    // PdfiumSearchFlags::empty() means no special search flags (case-insensitive by default)
92    // The last parameter (0) is the starting position for the search
93    let search = text.find("amsterdam", PdfiumSearchFlags::empty(), 0);
94    println!("Found amsterdam {} times", search.count());
95
96    // Search for "groningen" with case-insensitive matching
97    let search = text.find("groningen", PdfiumSearchFlags::empty(), 0);
98    println!(
99        "Found groningen {} times (case insensitive)",
100        search.count()
101    );
102
103    // Search for "Groningen" with case-sensitive matching
104    // MATCH_CASE flag enforces exact case matching
105    let search = text.find("Groningen", PdfiumSearchFlags::MATCH_CASE, 0);
106    println!("Found Groningen {} times (case sensitive)", search.count());
107
108    // Perform another case-insensitive search to iterate through results
109    let search = text.find("groningen", PdfiumSearchFlags::empty(), 0);
110
111    // Iterate through each search result to extract detailed information
112    for result in search {
113        // Extract the text fragment at the found position
114        // result.index() gives the character position where the match starts
115        // result.count() gives the length of the matched text
116        let fragment = text.extract(result.index(), result.count());
117        println!(
118            "Found groningen (case insensitive) at {}, fragment = '{fragment}'",
119            result.index()
120        );
121    }
122
123    // Expected output:
124    //
125    // Found amsterdam 0 times
126    // Found groningen 5 times (case insensitive)
127    // Found Groningen 5 times (case sensitive)
128    // Found groningen (case insensitive) at 14, fragment = 'Groningen'
129    // Found groningen (case insensitive) at 232, fragment = 'Groningen'
130    // Found groningen (case insensitive) at 475, fragment = 'Groningen'
131    // Found groningen (case insensitive) at 920, fragment = 'Groningen'
132    // Found groningen (case insensitive) at 1050, fragment = 'Groningen'
133
134    Ok(())
135}

examples/export_pages.rs (line 39)

34pub fn example_export_pages_to_images() -> PdfiumResult<()> {
35    // Load the PDF document from the specified file path
36    // Parameters:
37    // - "resources/groningen.pdf": Path to the PDF file (relative to current working directory)
38    // - None: No password required for this PDF (use Some("password") if needed)
39    let document = PdfiumDocument::new_from_path("resources/groningen.pdf", None)?;
40
41    // Iterate through all pages in the document
42    // document.pages() returns an iterator over all pages
43    // enumerate() adds an index counter starting from 0
44    // This gives us both the page object and its 0-based index
45    for (index, page) in document.pages().enumerate() {
46        // Render the current page as a bitmap image
47        // This is where the PDF content gets converted to a raster image
48        //
49        // In the configuration we only specify the height in pixels. The width will be calculated
50        // automatically to maintain aspect ratio.
51        let config = PdfiumRenderConfig::new().with_height(1080);
52        let bitmap = page?.render(&config)?;
53
54        // Verify that the bitmap was rendered at the requested height
55        // This assertion ensures the rendering process worked as expected
56        // If this fails, it indicates a bug in the rendering logic
57        assert_eq!(bitmap.height(), 1080);
58
59        // Generate a unique filename for this page
60        // Format: "groningen-page-{page_number}.jpg"
61        // - index + 1 converts from 0-based index to 1-based page numbers
62        //   * Page 0 becomes "groningen-page-1.jpg"
63        //   * Page 1 becomes "groningen-page-2.jpg", etc.
64        // - The .jpg extension indicates JPEG format will be used
65        let filename = format!("groningen-page-{}.jpg", index + 1);
66
67        // Save the rendered bitmap to disk as a JPEG image
68        // Parameters:
69        // - &filename: Reference to the generated filename string
70        // - image::ImageFormat::Jpeg: Specifies JPEG compression format
71        //   * Alternative format: Png (lossless)
72        //   * JPEG provides good compression but is lossy (some quality loss)
73        //
74        // The save operation handles:
75        // - Converting from BGRA format to JPEG-compatible format
76        // - Applying JPEG compression
77        // - Writing the file to disk
78        bitmap.save(&filename, image::ImageFormat::Jpeg)?;
79
80        // Note: No explicit cleanup needed - Rust's ownership system automatically
81        // deallocates the bitmap memory when it goes out of scope at the end of this iteration
82    }
83
84    // Return success - all pages have been successfully exported
85    Ok(())
86}

Source

pub fn new_from_reader<R: Read + Seek + 'static>( reader: R, password: Option<&str>, ) -> PdfiumResult<Self>

Load a PdfiumDocument using a reader implementing Read and Seek

Source

pub fn save_to_path<P: AsRef<Path>>( &self, path: P, version: Option<i32>, ) -> PdfiumResult<()>

Saves this PdfiumDocument to a file at the specified path.

This is a convenience method that creates a new file at the given path and writes the PDF document to it. The file will be created if it doesn’t exist, or truncated if it does exist.

§Arguments

path - A path-like type (String, &str, Path, PathBuf, etc.) that specifies where to save the PDF file. Uses AsRef<Path> for maximum flexibility.
version - Optional PDF version to save as. If None, saves as a copy of the original document preserving its version. If Some(version), converts the document to the specified PDF version (e.g., 14 for PDF 1.4).

§Returns

PdfiumResult<()> - Ok(()) on success, or an error if file creation fails or the PDF save operation encounters an issue.

§Examples

// Save to current directory preserving original PDF version
document.save_to_path("document.pdf", None)?;

// Save as PDF 1.4 to a specific path
document.save_to_path("document_v14.pdf", Some(14))?;

§Errors

This function can fail if:

The file cannot be created (permissions, invalid path, disk full, etc.)
The underlying PDF save operation fails (corrupt document, unsupported features, etc.)

Examples found in repository ?

examples/import_pages.rs (line 64)

40pub fn example_import_pages() -> PdfiumResult<()> {
41    // Create a new, empty PDF document that will serve as our destination
42    // This document starts with 0 pages and we'll add pages to it
43    let document = PdfiumDocument::new()?;
44
45    // Load the source PDF document from which we'll extract pages
46    // The second parameter (None) means we're not providing a password
47    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
48
49    // Import specific pages from the source document into our destination document
50    // Parameters breakdown:
51    // - &src_doc: Reference to the source document to import from
52    // - "12,14,30-34": String specifying which pages to import
53    //   * Page 12 (individual page)
54    //   * Page 14 (individual page)
55    //   * Pages 30-34 (range of 5 pages: 30, 31, 32, 33, 34)
56    //   * Total: 7 pages will be imported
57    // - 0: Index position where imported pages should be inserted
58    //   * 0 means insert at the beginning of the destination document
59    //   * If the destination had existing pages, imported pages would be inserted before them
60    document.pages().import(&src_doc, "12,14,30-34", 0)?;
61
62    // Save the destination document with imported pages to a new file
63    // The second parameter (None) indicates we're not specifying a version
64    document.save_to_path("pride-1.pdf", None)?;
65
66    // Verification step: reload the saved document to confirm the operation
67    let document = PdfiumDocument::new_from_path("pride-1.pdf", None)?;
68
69    // Get the total number of pages in the saved document
70    let page_count = document.page_count();
71
72    // Assert that we have exactly 7 pages as expected
73    // This confirms that all specified pages were imported correctly:
74    // 1 page (12) + 1 page (14) + 5 pages (30-34) = 7 pages total
75    assert_eq!(page_count, 7);
76
77    Ok(())
78}
79
80/// Demonstrates importing PDF pages using index-based page specification.
81///
82/// This function shows an alternative approach to page importing using
83/// explicit page indices rather than string specifications. This method
84/// provides more programmatic control and is useful when:
85/// - You have a dynamically generated list of page numbers
86/// - You need to import non-contiguous pages with complex patterns
87/// - You're working with 0-based indexing in your application logic
88///
89/// Key differences from string-based approach:
90/// - Uses 0-based indexing (first page is index 0)
91/// - Requires explicit specification of each page index
92/// - More verbose but offers precise control
93/// - Better for programmatic generation of page lists
94pub fn example_import_pages_by_index() -> PdfiumResult<()> {
95    // Create a new, empty PDF document to serve as our destination
96    let document = PdfiumDocument::new()?;
97
98    // Load the same source PDF document as in the previous example
99    // We're extracting the same pages but using a different method
100    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
101
102    // Import pages using explicit 0-based indices
103    // Parameters breakdown:
104    // - &src_doc: Reference to the source document
105    // - Some(&[11, 13, 29, 30, 31, 32, 33]): Vector of 0-based page indices
106    //   * Index 11 = Page 12 in human numbering (11 + 1 = 12)
107    //   * Index 13 = Page 14 in human numbering (13 + 1 = 14)
108    //   * Indices 29-33 = Pages 30-34 in human numbering
109    //   * Note: This matches exactly the same pages as "12,14,30-34" from the previous example
110    //   * The Some() wrapper indicates we're providing a specific list of indices
111    //   * Using None instead would import all pages from the source document
112    // - 0: Insertion position (beginning of destination document)
113    document
114        .pages()
115        .import_by_index(&src_doc, Some(&[11, 13, 29, 30, 31, 32, 33]), 0)?;
116
117    // Save the document with a different filename to distinguish from the first example
118    // Even though the content should be identical, using different names helps with testing
119    document.save_to_path("pride-2.pdf", None)?;
120
121    // Verification: reload the saved document to ensure it was created correctly
122    let document = PdfiumDocument::new_from_path("pride-2.pdf", None)?;
123
124    // Count the pages in the saved document
125    let page_count = document.page_count();
126
127    // Verify that we have the same 7 pages as the string-based method
128    // This confirms both methods produce identical results:
129    // - 1 page at index 11 (page 12)
130    // - 1 page at index 13 (page 14)
131    // - 5 pages at indices 29-33 (pages 30-34)
132    // Total: 7 pages
133    assert_eq!(page_count, 7);
134
135    Ok(())
136}

Source

pub fn save_to_bytes(&self, version: Option<i32>) -> PdfiumResult<Vec<u8>>

Saves this PdfiumDocument to a byte vector in memory.

This method is useful when you need the PDF data as bytes rather than writing directly to a file. Common use cases include:

Serving PDF content over HTTP without creating temporary files
Storing PDF data in a database as a BLOB
Further processing the PDF bytes (compression, encryption, etc.)
Testing scenarios where you want to verify PDF content

§Arguments

version - Optional PDF version to save as. If None, preserves the original document’s PDF version. If Some(version), converts to the specified version (e.g., 17 for PDF 1.7).

§Returns

PdfiumResult<Vec<u8>> - On success, returns a Vec<u8> containing the complete PDF file data. On failure, returns a PdfiumResult error.

§Memory Considerations

The entire PDF is loaded into memory, so this method may use significant RAM for large documents. Consider save_to_writer() with a streaming writer for very large PDFs.

§Examples

// Get PDF bytes preserving original version
let pdf_bytes = document.save_to_bytes(None)?;

// Convert to PDF 1.5 and get bytes
let pdf_v15_bytes = document.save_to_bytes(Some(15))?;

// Use the bytes (e.g., send over HTTP)
response.set_body(pdf_bytes);

Source

pub fn save_to_writer<W: Write + 'static>( &self, writer: W, version: Option<i32>, ) -> PdfiumResult<Box<W>>

Writes this PdfiumDocument to the given writer.

This is the core implementation method that all other save methods delegate to. It accepts any type that implements the Write trait, providing maximum flexibility for different output destinations (files, network streams, in-memory buffers, etc.).

The method wraps the provided writer in a PdfiumWriter, which handles the low-level details of interfacing with the Pdfium C library, such as:

Implements the callback interface expected by Pdfium’s C API
Handles buffering and error propagation
Manages the lifetime and ownership of the underlying writer

§Arguments

writer - Any type implementing Write + ’static. The ’static lifetime bound ensures the writer can be stored and moved around safely without lifetime issues. Common types include File, TcpStream, Cursor<Vec<u8>>, etc.
version - Optional PDF version specification:
- None: Save as copy preserving original document version and structure
- Some(version): Convert document to specified PDF version (10-20 typical range)

§Returns

PdfiumResult<Box<W>> - On success, returns the original writer wrapped in a Box. This allows you to continue using the writer after the save operation completes (e.g., to write additional data).

§PDF Version Notes

PDF versions are typically specified as integers:

10 = PDF 1.0, 11 = PDF 1.1, …, 17 = PDF 1.7, 20 = PDF 2.0
Converting to an older version may lose features not supported in that version
Converting to a newer version may enable additional features but reduce compatibility

§Examples

// Save to a file
let file = File::create("document.pdf")?;
let file = document.save_to_writer(file, None)?;

// Save to a network stream
let stream = TcpStream::connect("server:8080")?;
let stream = document.save_to_writer(stream, Some(17))?;

// Save to memory buffer
let buffer = Cursor::new(Vec::new());
let buffer = document.save_to_writer(buffer, None)?;

§Implementation Details

The method uses the Pdfium library’s C API functions:

FPDF_SaveWithVersion() - When a specific version is requested
FPDF_SaveAsCopy() - When preserving the original version

Both functions use a callback-based approach where Pdfium calls back into our PdfiumWriter to actually write the data chunks as they’re generated.

Source

pub fn page_count(&self) -> i32

Returns the number of pages in this PdfiumDocument.

Examples found in repository ?

examples/import_pages.rs (line 70)

40pub fn example_import_pages() -> PdfiumResult<()> {
41    // Create a new, empty PDF document that will serve as our destination
42    // This document starts with 0 pages and we'll add pages to it
43    let document = PdfiumDocument::new()?;
44
45    // Load the source PDF document from which we'll extract pages
46    // The second parameter (None) means we're not providing a password
47    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
48
49    // Import specific pages from the source document into our destination document
50    // Parameters breakdown:
51    // - &src_doc: Reference to the source document to import from
52    // - "12,14,30-34": String specifying which pages to import
53    //   * Page 12 (individual page)
54    //   * Page 14 (individual page)
55    //   * Pages 30-34 (range of 5 pages: 30, 31, 32, 33, 34)
56    //   * Total: 7 pages will be imported
57    // - 0: Index position where imported pages should be inserted
58    //   * 0 means insert at the beginning of the destination document
59    //   * If the destination had existing pages, imported pages would be inserted before them
60    document.pages().import(&src_doc, "12,14,30-34", 0)?;
61
62    // Save the destination document with imported pages to a new file
63    // The second parameter (None) indicates we're not specifying a version
64    document.save_to_path("pride-1.pdf", None)?;
65
66    // Verification step: reload the saved document to confirm the operation
67    let document = PdfiumDocument::new_from_path("pride-1.pdf", None)?;
68
69    // Get the total number of pages in the saved document
70    let page_count = document.page_count();
71
72    // Assert that we have exactly 7 pages as expected
73    // This confirms that all specified pages were imported correctly:
74    // 1 page (12) + 1 page (14) + 5 pages (30-34) = 7 pages total
75    assert_eq!(page_count, 7);
76
77    Ok(())
78}
79
80/// Demonstrates importing PDF pages using index-based page specification.
81///
82/// This function shows an alternative approach to page importing using
83/// explicit page indices rather than string specifications. This method
84/// provides more programmatic control and is useful when:
85/// - You have a dynamically generated list of page numbers
86/// - You need to import non-contiguous pages with complex patterns
87/// - You're working with 0-based indexing in your application logic
88///
89/// Key differences from string-based approach:
90/// - Uses 0-based indexing (first page is index 0)
91/// - Requires explicit specification of each page index
92/// - More verbose but offers precise control
93/// - Better for programmatic generation of page lists
94pub fn example_import_pages_by_index() -> PdfiumResult<()> {
95    // Create a new, empty PDF document to serve as our destination
96    let document = PdfiumDocument::new()?;
97
98    // Load the same source PDF document as in the previous example
99    // We're extracting the same pages but using a different method
100    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
101
102    // Import pages using explicit 0-based indices
103    // Parameters breakdown:
104    // - &src_doc: Reference to the source document
105    // - Some(&[11, 13, 29, 30, 31, 32, 33]): Vector of 0-based page indices
106    //   * Index 11 = Page 12 in human numbering (11 + 1 = 12)
107    //   * Index 13 = Page 14 in human numbering (13 + 1 = 14)
108    //   * Indices 29-33 = Pages 30-34 in human numbering
109    //   * Note: This matches exactly the same pages as "12,14,30-34" from the previous example
110    //   * The Some() wrapper indicates we're providing a specific list of indices
111    //   * Using None instead would import all pages from the source document
112    // - 0: Insertion position (beginning of destination document)
113    document
114        .pages()
115        .import_by_index(&src_doc, Some(&[11, 13, 29, 30, 31, 32, 33]), 0)?;
116
117    // Save the document with a different filename to distinguish from the first example
118    // Even though the content should be identical, using different names helps with testing
119    document.save_to_path("pride-2.pdf", None)?;
120
121    // Verification: reload the saved document to ensure it was created correctly
122    let document = PdfiumDocument::new_from_path("pride-2.pdf", None)?;
123
124    // Count the pages in the saved document
125    let page_count = document.page_count();
126
127    // Verify that we have the same 7 pages as the string-based method
128    // This confirms both methods produce identical results:
129    // - 1 page at index 11 (page 12)
130    // - 1 page at index 13 (page 14)
131    // - 5 pages at indices 29-33 (pages 30-34)
132    // Total: 7 pages
133    assert_eq!(page_count, 7);
134
135    Ok(())
136}

Source

pub fn page(&self, index: i32) -> PdfiumResult<PdfiumPage>

Returns the PdfiumPage indicated by index from this PdfiumDocument.

Examples found in repository ?

examples/text_extract_search.rs (line 85)

80pub fn example_search() -> PdfiumResult<()> {
81    // Load the PDF document to search within
82    let document = PdfiumDocument::new_from_path("resources/groningen.pdf", None)?;
83
84    // Get the first page (index 0) for searching
85    let page = document.page(0)?;
86
87    // Extract text objects from the page for searching
88    let text = page.text()?;
89
90    // Search for "amsterdam" with case-insensitive matching
91    // PdfiumSearchFlags::empty() means no special search flags (case-insensitive by default)
92    // The last parameter (0) is the starting position for the search
93    let search = text.find("amsterdam", PdfiumSearchFlags::empty(), 0);
94    println!("Found amsterdam {} times", search.count());
95
96    // Search for "groningen" with case-insensitive matching
97    let search = text.find("groningen", PdfiumSearchFlags::empty(), 0);
98    println!(
99        "Found groningen {} times (case insensitive)",
100        search.count()
101    );
102
103    // Search for "Groningen" with case-sensitive matching
104    // MATCH_CASE flag enforces exact case matching
105    let search = text.find("Groningen", PdfiumSearchFlags::MATCH_CASE, 0);
106    println!("Found Groningen {} times (case sensitive)", search.count());
107
108    // Perform another case-insensitive search to iterate through results
109    let search = text.find("groningen", PdfiumSearchFlags::empty(), 0);
110
111    // Iterate through each search result to extract detailed information
112    for result in search {
113        // Extract the text fragment at the found position
114        // result.index() gives the character position where the match starts
115        // result.count() gives the length of the matched text
116        let fragment = text.extract(result.index(), result.count());
117        println!(
118            "Found groningen (case insensitive) at {}, fragment = '{fragment}'",
119            result.index()
120        );
121    }
122
123    // Expected output:
124    //
125    // Found amsterdam 0 times
126    // Found groningen 5 times (case insensitive)
127    // Found Groningen 5 times (case sensitive)
128    // Found groningen (case insensitive) at 14, fragment = 'Groningen'
129    // Found groningen (case insensitive) at 232, fragment = 'Groningen'
130    // Found groningen (case insensitive) at 475, fragment = 'Groningen'
131    // Found groningen (case insensitive) at 920, fragment = 'Groningen'
132    // Found groningen (case insensitive) at 1050, fragment = 'Groningen'
133
134    Ok(())
135}

Source

pub fn pages(&self) -> PdfiumPages<'_>

Return an Iterator for the pages in this PdfiumDocument.

Examples found in repository ?

examples/import_pages.rs (line 60)

40pub fn example_import_pages() -> PdfiumResult<()> {
41    // Create a new, empty PDF document that will serve as our destination
42    // This document starts with 0 pages and we'll add pages to it
43    let document = PdfiumDocument::new()?;
44
45    // Load the source PDF document from which we'll extract pages
46    // The second parameter (None) means we're not providing a password
47    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
48
49    // Import specific pages from the source document into our destination document
50    // Parameters breakdown:
51    // - &src_doc: Reference to the source document to import from
52    // - "12,14,30-34": String specifying which pages to import
53    //   * Page 12 (individual page)
54    //   * Page 14 (individual page)
55    //   * Pages 30-34 (range of 5 pages: 30, 31, 32, 33, 34)
56    //   * Total: 7 pages will be imported
57    // - 0: Index position where imported pages should be inserted
58    //   * 0 means insert at the beginning of the destination document
59    //   * If the destination had existing pages, imported pages would be inserted before them
60    document.pages().import(&src_doc, "12,14,30-34", 0)?;
61
62    // Save the destination document with imported pages to a new file
63    // The second parameter (None) indicates we're not specifying a version
64    document.save_to_path("pride-1.pdf", None)?;
65
66    // Verification step: reload the saved document to confirm the operation
67    let document = PdfiumDocument::new_from_path("pride-1.pdf", None)?;
68
69    // Get the total number of pages in the saved document
70    let page_count = document.page_count();
71
72    // Assert that we have exactly 7 pages as expected
73    // This confirms that all specified pages were imported correctly:
74    // 1 page (12) + 1 page (14) + 5 pages (30-34) = 7 pages total
75    assert_eq!(page_count, 7);
76
77    Ok(())
78}
79
80/// Demonstrates importing PDF pages using index-based page specification.
81///
82/// This function shows an alternative approach to page importing using
83/// explicit page indices rather than string specifications. This method
84/// provides more programmatic control and is useful when:
85/// - You have a dynamically generated list of page numbers
86/// - You need to import non-contiguous pages with complex patterns
87/// - You're working with 0-based indexing in your application logic
88///
89/// Key differences from string-based approach:
90/// - Uses 0-based indexing (first page is index 0)
91/// - Requires explicit specification of each page index
92/// - More verbose but offers precise control
93/// - Better for programmatic generation of page lists
94pub fn example_import_pages_by_index() -> PdfiumResult<()> {
95    // Create a new, empty PDF document to serve as our destination
96    let document = PdfiumDocument::new()?;
97
98    // Load the same source PDF document as in the previous example
99    // We're extracting the same pages but using a different method
100    let src_doc = PdfiumDocument::new_from_path("resources/pg1342-images-3.pdf", None)?;
101
102    // Import pages using explicit 0-based indices
103    // Parameters breakdown:
104    // - &src_doc: Reference to the source document
105    // - Some(&[11, 13, 29, 30, 31, 32, 33]): Vector of 0-based page indices
106    //   * Index 11 = Page 12 in human numbering (11 + 1 = 12)
107    //   * Index 13 = Page 14 in human numbering (13 + 1 = 14)
108    //   * Indices 29-33 = Pages 30-34 in human numbering
109    //   * Note: This matches exactly the same pages as "12,14,30-34" from the previous example
110    //   * The Some() wrapper indicates we're providing a specific list of indices
111    //   * Using None instead would import all pages from the source document
112    // - 0: Insertion position (beginning of destination document)
113    document
114        .pages()
115        .import_by_index(&src_doc, Some(&[11, 13, 29, 30, 31, 32, 33]), 0)?;
116
117    // Save the document with a different filename to distinguish from the first example
118    // Even though the content should be identical, using different names helps with testing
119    document.save_to_path("pride-2.pdf", None)?;
120
121    // Verification: reload the saved document to ensure it was created correctly
122    let document = PdfiumDocument::new_from_path("pride-2.pdf", None)?;
123
124    // Count the pages in the saved document
125    let page_count = document.page_count();
126
127    // Verify that we have the same 7 pages as the string-based method
128    // This confirms both methods produce identical results:
129    // - 1 page at index 11 (page 12)
130    // - 1 page at index 13 (page 14)
131    // - 5 pages at indices 29-33 (pages 30-34)
132    // Total: 7 pages
133    assert_eq!(page_count, 7);
134
135    Ok(())
136}

More examples

Hide additional examples

examples/text_extract_search.rs (line 30)

23pub fn example_extract_text() -> PdfiumResult<()> {
24    // Load the PDF document from the specified file path
25    // The second parameter (None) indicates no password is required
26    let document = PdfiumDocument::new_from_path("resources/chapter1.pdf", None)?;
27
28    // Iterate through all pages in the document
29    // enumerate() provides both the index and the page object
30    for (index, page) in document.pages().enumerate() {
31        // Extract the full text content from the current page
32        // The ?. operators handle potential errors at each step:
33        // - page? ensures the page loaded successfully
34        // - .text()? extracts text objects from the page
35        // - .full() gets the complete text content as a string
36        let text = page?.text()?.full();
37
38        // Print formatted output for each page
39        println!("Page {}", index + 1); // Pages are 1-indexed for user display
40        println!("------");
41        println!("{text}");
42        println!() // Empty line for separation between pages
43    }
44
45    // Expected output:
46    //
47    // Page 1
48    // ------
49    //
50    // Page 2
51    // ------
52    // Ruskin
53    // House.
54    // 156. Charing
55    // Cross Road.
56    // London
57    // George Allen.
58    //
59    // Page 3
60    // ------
61    //
62    // Page 4
63    // ------
64    // I
65    // Chapter I.
66    // T is a truth universally acknowledged, that a single man in possession of a good
67    // fortune must be in want of a wife.
68    // However little known the feelings or views of such a man may be on his first
69    // entering a neighbourhood, this truth is so well fixed in the minds of the surrounding
70    // families, that he is considered as the rightful property of some one or other of their
71    // daughters.
72    // “My dear Mr. Bennet,” said his lady to him one day, “have you heard that
73    // Netherfield Park is let at last?”
74    // ...
75
76    Ok(())
77}

examples/export_pages.rs (line 45)

34pub fn example_export_pages_to_images() -> PdfiumResult<()> {
35    // Load the PDF document from the specified file path
36    // Parameters:
37    // - "resources/groningen.pdf": Path to the PDF file (relative to current working directory)
38    // - None: No password required for this PDF (use Some("password") if needed)
39    let document = PdfiumDocument::new_from_path("resources/groningen.pdf", None)?;
40
41    // Iterate through all pages in the document
42    // document.pages() returns an iterator over all pages
43    // enumerate() adds an index counter starting from 0
44    // This gives us both the page object and its 0-based index
45    for (index, page) in document.pages().enumerate() {
46        // Render the current page as a bitmap image
47        // This is where the PDF content gets converted to a raster image
48        //
49        // In the configuration we only specify the height in pixels. The width will be calculated
50        // automatically to maintain aspect ratio.
51        let config = PdfiumRenderConfig::new().with_height(1080);
52        let bitmap = page?.render(&config)?;
53
54        // Verify that the bitmap was rendered at the requested height
55        // This assertion ensures the rendering process worked as expected
56        // If this fails, it indicates a bug in the rendering logic
57        assert_eq!(bitmap.height(), 1080);
58
59        // Generate a unique filename for this page
60        // Format: "groningen-page-{page_number}.jpg"
61        // - index + 1 converts from 0-based index to 1-based page numbers
62        //   * Page 0 becomes "groningen-page-1.jpg"
63        //   * Page 1 becomes "groningen-page-2.jpg", etc.
64        // - The .jpg extension indicates JPEG format will be used
65        let filename = format!("groningen-page-{}.jpg", index + 1);
66
67        // Save the rendered bitmap to disk as a JPEG image
68        // Parameters:
69        // - &filename: Reference to the generated filename string
70        // - image::ImageFormat::Jpeg: Specifies JPEG compression format
71        //   * Alternative format: Png (lossless)
72        //   * JPEG provides good compression but is lossy (some quality loss)
73        //
74        // The save operation handles:
75        // - Converting from BGRA format to JPEG-compatible format
76        // - Applying JPEG compression
77        // - Writing the file to disk
78        bitmap.save(&filename, image::ImageFormat::Jpeg)?;
79
80        // Note: No explicit cleanup needed - Rust's ownership system automatically
81        // deallocates the bitmap memory when it goes out of scope at the end of this iteration
82    }
83
84    // Return success - all pages have been successfully exported
85    Ok(())
86}