manual_image_extraction/
manual_image_extraction.rs

1//! Basic usage example for the pptx-to-md crate
2//!
3//! This example demonstrates how to open a PPTX file and convert all slides to Markdown.
4//!
5//! Run with: cargo run --example manual_image_extraction <path/to/your/presentation.pptx>
6
7use pptx_to_md::{PptxContainer, Result, ParserConfig, ImageHandlingMode};
8use std::{env, fs};
9use std::fs::File;
10use std::io::Write;
11use std::path::Path;
12use base64::Engine;
13use base64::engine::general_purpose;
14
15fn main() -> Result<()> {
16    // Get the PPTX file path from command line arguments
17    let args: Vec<String> = env::args().collect();
18    let pptx_path = if args.len() > 1 {
19        &args[1]
20    } else {
21        eprintln!("Usage: cargo run --example manual_image_extraction <path/to/presentation.pptx>");
22        return Ok(());
23    };
24
25    println!("Processing PPTX file: {}", pptx_path);
26
27    // Use the config builder to build your config
28    let config = ParserConfig::builder()
29        .extract_images(true)
30        .compress_images(true)
31        .quality(75)
32        .image_handling_mode(ImageHandlingMode::Manually)
33        .build();
34
35    // Open the PPTX file
36    let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
37
38    // Parse all slides
39    let slides = container.parse_all()?;
40
41    println!("Found {} slides", slides.len());
42
43    // create a new Markdown file
44    let mut md_file = File::create("output.md")?;
45
46    // Create output directory
47    let output_dir = "extracted_images";
48    fs::create_dir_all(output_dir)?;
49
50    // Process slides one by one using the iterator
51    let mut image_count = 1;
52
53    // Convert each slide to Markdown and save
54    for slide in slides {
55        if let Some(md_content) = slide.convert_to_md() {
56            writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57        }
58        
59        // Manually load the base64 encoded image strings from the slide
60        if let Some(images) = slide.load_images_manually() {
61            for image in images {
62                
63                // Decode the base64 strings back to raw image data
64                let image_data = general_purpose::STANDARD.decode(image.base64_content.clone()).unwrap();
65
66                // Extract image extension if the image is not compressed, otherwise its always `.jpg`
67                let ext = slide.config.compress_images
68                    .then(|| "jpg".to_string())
69                    .unwrap_or_else(|| slide.get_image_extension(&image.img_ref.target.clone()));
70
71                // Construct a unique file name
72                let file_name = format!("slide{}_image{}_{}", slide.slide_number, image_count, &image.img_ref.id);
73                
74                // Save the image
75                let output_path = format!(
76                    "{}/{}.{}",
77                    output_dir,
78                    &file_name,
79                    ext
80                );
81                fs::write(&output_path, image_data)?;
82                println!("Saved image to {}", output_path);
83
84                // Write the image data into the Markdown file
85                writeln!(md_file, "![{}](data:image/{};base64,{})", file_name, ext, image.base64_content).expect("Couldn't write to file");
86                
87                image_count += 1;
88            }
89        }
90    }
91
92    println!("All slides converted successfully!");
93
94    Ok(())
95}