ParserConfig

Struct ParserConfig 

Source
pub struct ParserConfig {
    pub extract_images: bool,
    pub compress_images: bool,
    pub quality: u8,
    pub image_handling_mode: ImageHandlingMode,
    pub image_output_path: Option<PathBuf>,
    pub include_slide_comment: bool,
}
Expand description

Configuration options for the PPTX parser.

Use ParserConfig::builder() to create a configuration instance. This allows you to customize only the desired fields while falling back to sensible defaults for the rest.

§Configuration Options

ParameterTypeDefaultDescription
extract_imagesbooltrueWhether images are extracted from slides or not. If false, images can not be extracted manually either
compress_imagesbooltrueWhether images are compressed before encoding or not. Effects manually extracted images too
image_qualityu880Compression level (0-100);
higher values retain more detail but increase file size
image_handling_modeImageHandlingModeInMarkdownDetermines how images are handled during content export
image_output_pathOption<PathBuf>NoneOutput directory path for ImageHandlingMode::Save (mandatory for the saving mode)
include_slide_commentbooltrueWeather the slide number comment is included or not (<!-- Slide [n] -->)

§Example

use std::path::PathBuf;
use pptx_to_md::{ImageHandlingMode, ParserConfig};

let config = ParserConfig::builder()
    .extract_images(true)
    .compress_images(true)
    .quality(75)
    .image_handling_mode(ImageHandlingMode::Save)
    .image_output_path(PathBuf::from("/path/to/output/dir/"))
    .build();

Fields§

§extract_images: bool§compress_images: bool§quality: u8§image_handling_mode: ImageHandlingMode§image_output_path: Option<PathBuf>§include_slide_comment: bool

Implementations§

Source§

impl ParserConfig

Source

pub fn builder() -> ParserConfigBuilder

Examples found in repository?
examples/save_images.rs (line 26)
13fn main() -> Result<()> {
14    // Get the PPTX file path from command line arguments and provide the mandatory output path
15    let args: Vec<String> = env::args().collect();
16    let pptx_path = if args.len() > 1 {
17        &args[1]
18    } else {
19        eprintln!("Usage: cargo run --example save_images <path/to/presentation.pptx>");
20        return Ok(());
21    };
22
23    println!("Processing PPTX file: {}", pptx_path);
24
25    // Use the config builder to build your config
26    let config = ParserConfig::builder()
27        .extract_images(true)
28        .compress_images(true)
29        .quality(75)
30        .image_handling_mode(ImageHandlingMode::Save)
31        .image_output_path(PathBuf::from("C:/Users/nilsk/Downloads/extracted_images"))
32        .build();
33
34    // Open the PPTX file
35    let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
36
37    // Parse all slides
38    let slides = container.parse_all()?;
39
40    println!("Found {} slides", slides.len());
41
42    // create a new Markdown file
43    let mut md_file = File::create("output.md")?;
44
45    // Convert each slide to Markdown and save the images automatically
46    for slide in slides {
47        if let Some(md_content) = slide.convert_to_md() {
48            writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
49        }
50    }
51
52    println!("All slides converted successfully!");
53
54    Ok(())
55}
More examples
Hide additional examples
examples/memory_efficient_streaming.rs (line 26)
13fn main() -> Result<()> {
14    // Get the PPTX file path from command line arguments
15    let args: Vec<String> = env::args().collect();
16    let pptx_path = if args.len() > 1 {
17        &args[1]
18    } else {
19        eprintln!("Usage: cargo run --example memory_efficient_streaming <path/to/presentation.pptx>");
20        return Ok(());
21    };
22
23    println!("Processing PPTX file: {}", pptx_path);
24
25    // Use the config builder to build your config
26    let config = ParserConfig::builder()
27        .extract_images(true)
28        .build();
29    
30    // Open the PPTX file with the streaming API
31    let mut streamer = PptxContainer::open(Path::new(pptx_path), config)?;
32    
33    // Create output directory
34    let output_dir = "output_streaming";
35    fs::create_dir_all(output_dir)?;
36
37    // Process slides one by one using the iterator
38    for slide_result in streamer.iter_slides() {
39        match slide_result {
40            Ok(slide) => {
41                println!("Processing slide {} ({} elements)", slide.slide_number, slide.elements.len());
42
43                if let Some(md_content) = slide.convert_to_md() {
44                    let output_path = format!("{}/slide_{}.md", output_dir, slide.slide_number);
45                    fs::write(&output_path, md_content)?;
46                    println!("Saved slide {} to {}", slide.slide_number, output_path);
47                }
48            },
49            Err(e) => {
50                eprintln!("Error processing slide: {:?}", e);
51            }
52        }
53    }
54
55    println!("All slides processed successfully!");
56
57    Ok(())
58}
examples/basic_usage.rs (line 33)
13fn main() -> Result<()> {
14    // Get the PPTX file path from command line arguments
15    let args: Vec<String> = env::args().collect();
16    let pptx_path = if args.len() > 1 {
17        &args[1]
18    } else {
19        eprintln!("Usage: cargo run --example basic_usage <path/to/presentation.pptx> <extract_images>\ncargo run --example basic_usage sample.pptx true");
20        return Ok(());
21    };
22    
23    // Tries to read if the extract_images flag is false else set to true
24    let extract_images = if args.len() > 2 {
25        !(args[2] == "false" || args[2] == "False" || args[2] == "0")
26    } else {
27        true
28    };
29    
30    println!("Processing PPTX file: {}", pptx_path);
31
32    // Use the config builder to build your config
33    let config = ParserConfig::builder()
34        .extract_images(extract_images)
35        .compress_images(true)
36        .quality(75)
37        .image_handling_mode(ImageHandlingMode::InMarkdown)
38        .include_slide_comment(true)
39        .build();
40    
41    // Open the PPTX file
42    let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
43
44    // Parse all slides
45    let slides = container.parse_all()?;
46
47    println!("Found {} slides", slides.len());
48
49    // create a new Markdown file
50    let mut md_file = File::create("output.md")?;
51
52    // Convert each slide to Markdown and save
53    for slide in slides {
54        if let Some(md_content) = slide.convert_to_md() {
55            println!("{}", md_content);
56            writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57        }
58    }
59
60    println!("All slides converted successfully!");
61
62    Ok(())
63}
examples/slide_elements.rs (line 25)
12fn main() -> Result<()> {
13    // Get the PPTX file path from command line arguments
14    let args: Vec<String> = env::args().collect();
15    let pptx_path = if args.len() > 1 {
16        &args[1]
17    } else {
18        eprintln!("Usage: cargo run --example slide_elements <path/to/presentation.pptx>");
19        return Ok(());
20    };
21
22    println!("Processing PPTX file: {}", pptx_path);
23
24    // Use the config builder to build your config
25    let config = ParserConfig::builder()
26        .extract_images(true)
27        .build();
28    
29    // Open the PPTX file with the streaming API
30    let mut streamer = PptxContainer::open(Path::new(pptx_path), config)?;
31
32    // Process slides one by one using the iterator
33    for slide_result in streamer.iter_slides() {
34        match slide_result {
35            Ok(slide) => {
36                println!("Processing slide {} ({} elements)", slide.slide_number, slide.elements.len());
37
38                // iterate over each slide element and match them to add custom logic
39                for element in &slide.elements {
40                    match element {
41                        SlideElement::Text(text, pos) => { println!("{:?}\t{:?}\n", text, pos) }
42                        SlideElement::Table(table, pos) => { println!("{:?}\t{:?}\n", table, pos) }
43                        SlideElement::Image(image, pos) => { println!("{:?}\t{:?}\n", image, pos) }
44                        SlideElement::List(list, pos) => { println!("{:?}\t{:?}\n", list, pos) }
45                        SlideElement::Unknown => { println!("An Unknown element was found.\n") }
46                    }
47                }
48            },
49            Err(e) => {
50                eprintln!("Error processing slide: {:?}", e);
51            }
52        }
53    }
54
55    println!("All slides processed successfully!");
56
57    Ok(())
58}
examples/manual_image_extraction.rs (line 28)
15fn main() -> Result<()> {
16    // Get the PPTX file path from command line arguments
17    let args: Vec<String> = env::args().collect();
18    let pptx_path = if args.len() > 1 {
19        &args[1]
20    } else {
21        eprintln!("Usage: cargo run --example manual_image_extraction <path/to/presentation.pptx>");
22        return Ok(());
23    };
24
25    println!("Processing PPTX file: {}", pptx_path);
26
27    // Use the config builder to build your config
28    let config = ParserConfig::builder()
29        .extract_images(true)
30        .compress_images(true)
31        .quality(75)
32        .image_handling_mode(ImageHandlingMode::Manually)
33        .build();
34
35    // Open the PPTX file
36    let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
37
38    // Parse all slides
39    let slides = container.parse_all()?;
40
41    println!("Found {} slides", slides.len());
42
43    // create a new Markdown file
44    let mut md_file = File::create("output.md")?;
45
46    // Create output directory
47    let output_dir = "extracted_images";
48    fs::create_dir_all(output_dir)?;
49
50    // Process slides one by one using the iterator
51    let mut image_count = 1;
52
53    // Convert each slide to Markdown and save
54    for slide in slides {
55        if let Some(md_content) = slide.convert_to_md() {
56            writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57        }
58        
59        // Manually load the base64 encoded image strings from the slide
60        if let Some(images) = slide.load_images_manually() {
61            for image in images {
62                
63                // Decode the base64 strings back to raw image data
64                let image_data = general_purpose::STANDARD.decode(image.base64_content.clone()).unwrap();
65
66                // Extract image extension if the image is not compressed, otherwise its always `.jpg`
67                let ext = slide.config.compress_images
68                    .then(|| "jpg".to_string())
69                    .unwrap_or_else(|| slide.get_image_extension(&image.img_ref.target.clone()));
70
71                // Construct a unique file name
72                let file_name = format!("slide{}_image{}_{}", slide.slide_number, image_count, &image.img_ref.id);
73                
74                // Save the image
75                let output_path = format!(
76                    "{}/{}.{}",
77                    output_dir,
78                    &file_name,
79                    ext
80                );
81                fs::write(&output_path, image_data)?;
82                println!("Saved image to {}", output_path);
83
84                // Write the image data into the Markdown file
85                writeln!(md_file, "![{}](data:image/{};base64,{})", file_name, ext, image.base64_content).expect("Couldn't write to file");
86                
87                image_count += 1;
88            }
89        }
90    }
91
92    println!("All slides converted successfully!");
93
94    Ok(())
95}
examples/performance_test.rs (line 100)
70fn main() -> Result<()> {
71    // Get the PPTX file path and optional iteration count from command line arguments
72    let args: Vec<String> = env::args().collect();
73    let pptx_path = if args.len() > 1 {
74        &args[1]
75    } else {
76        eprintln!("Usage: cargo run --example performance_test <path/to/presentation.pptx> [iterations]");
77        return Ok(());
78    };
79
80    let iterations = if args.len() > 2 {
81        args[2].parse().unwrap_or(5)
82    } else {
83        10 // Default to 10 iterations
84    };
85
86    println!("Performance testing with {} iterations on: {}", iterations, pptx_path);
87
88    
89    
90    // =========== Single-threaded Approach ===========
91    let mut single_thread_bench = Benchmark::new("Single-threaded parsing");
92
93    let mut total_slides = 0;
94
95    for i in 0..iterations {
96        println!("\nIteration {} (Single-threaded)", i + 1);
97
98        // Measure container creation
99        let mut container = single_thread_bench.measure(|| {
100            let config = ParserConfig::builder()
101                .extract_images(true)
102                .build();
103            PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
104        });
105
106        println!("  Found {} slides in the presentation", container.slide_count);
107
108        // Measure parsing
109        let slides = single_thread_bench.measure(|| {
110            container.parse_all().expect("Failed to parse slides")
111        });
112
113        // Measure conversion
114        let _md_content = single_thread_bench.measure(|| {
115            slides.iter()
116                .filter_map(|slide| slide.convert_to_md())
117                .collect::<Vec<String>>()
118        });
119
120        total_slides += slides.len();
121    }
122
123    single_thread_bench.report();
124    println!("Average slides per presentation: {}", total_slides / iterations);
125
126
127
128    // =========== Single-threaded Streamed Approach ===========
129    let mut single_thread_streamed_bench = Benchmark::new("Single-threaded streamed parsing");
130
131    total_slides = 0;
132
133    for i in 0..iterations {
134        println!("\nIteration {} (Single-threaded streamed)", i + 1);
135
136        // Measure container creation
137        let mut container = single_thread_streamed_bench.measure(|| {
138            let config = ParserConfig::builder()
139                .extract_images(true)
140                .build();
141            PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
142        });
143
144        println!("  Found {} slides in the presentation", container.slide_count);
145
146        // Zähle die Slides im Voraus für die statistische Auswertung
147        let expected_slides = container.slide_count;
148
149        // Measure slide processing (including parsing and conversion)
150        let slides_processed = single_thread_streamed_bench.measure(|| {
151            let mut processed = 0;
152
153            // Process slides one by one using the iterator
154            for slide_result in container.iter_slides() {
155                match slide_result {
156                    Ok(slide) => {
157                        // Konvertiere den Slide zu Markdown
158                        let _md_content = slide.convert_to_md();
159                        processed += 1;
160                    },
161                    Err(e) => {
162                        eprintln!("Error processing slide: {:?}", e);
163                    }
164                }
165            }
166
167            processed
168        });
169
170        println!("  Processed {} out of {} slides", slides_processed, expected_slides);
171        total_slides += slides_processed;
172    }
173
174    single_thread_streamed_bench.report();
175    println!("Average slides per presentation: {}", total_slides / iterations);
176
177
178
179    // =========== Optimized Multi-threaded Approach ===========
180    let mut optimized_multi_thread_bench = Benchmark::new("Optimized Multi-threaded parsing");
181
182    total_slides = 0;
183
184    for i in 0..iterations {
185        println!("\nIteration {} (Optimized Multi-threaded)", i + 1);
186
187        // Container öffnen mit der gewünschten Konfiguration
188        let mut container = optimized_multi_thread_bench.measure(|| {
189            let config = ParserConfig::builder()
190                .extract_images(true)
191                .build();
192            PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
193        });
194
195        println!("  Found {} slides in the presentation", container.slide_count);
196
197        // Verwende die neue optimierte Multi-Threading-Methode
198        let slides = optimized_multi_thread_bench.measure(|| {
199            container.parse_all_multi_threaded().expect("Failed to parse slides")
200        });
201
202        println!("  Successfully processed {} slides", slides.len());
203
204        // Parallel zu Markdown konvertieren (bleibt unverändert)
205        let _md_content = optimized_multi_thread_bench.measure(|| {
206            slides.par_iter()
207                .filter_map(|slide| slide.convert_to_md())
208                .collect::<Vec<String>>()
209        });
210
211        total_slides += slides.len();
212    }
213
214    optimized_multi_thread_bench.report();
215    println!("Average slides per presentation: {}", total_slides / iterations);
216
217    // =========== Performance Comparison ===========
218    if !single_thread_bench.results.is_empty() &&
219        !single_thread_streamed_bench.results.is_empty() &&
220        !optimized_multi_thread_bench.results.is_empty() {
221
222        let single_avg: Duration = single_thread_bench.results.iter().sum::<Duration>() /
223            single_thread_bench.results.len() as u32;
224        let single_streamed_avg: Duration = single_thread_streamed_bench.results.iter().sum::<Duration>() /
225            single_thread_streamed_bench.results.len() as u32;
226        let optimized_multi_avg: Duration = optimized_multi_thread_bench.results.iter().sum::<Duration>() /
227            optimized_multi_thread_bench.results.len() as u32;
228
229        println!("\nPerformance Comparison");
230        println!("=====================");
231        println!("Single-threaded average: {:?}", single_avg);
232        println!("Single-threaded streaming average: {:?}", single_streamed_avg);
233        println!("Optimized multi-threaded average: {:?}", optimized_multi_avg);
234
235        // Compare single-threaded vs single-threaded streaming
236        if single_avg > single_streamed_avg {
237            let speedup = single_avg.as_secs_f64() / single_streamed_avg.as_secs_f64();
238            println!("Single-threaded streaming is {:.2}x faster than single-threaded", speedup);
239        } else {
240            let slowdown = single_streamed_avg.as_secs_f64() / single_avg.as_secs_f64();
241            println!("Single-threaded streaming is {:.2}x slower than single-threaded", slowdown);
242        }
243
244        // Compare single-threaded vs optimized multithreaded
245        if single_avg > optimized_multi_avg {
246            let speedup = single_avg.as_secs_f64() / optimized_multi_avg.as_secs_f64();
247            println!("Optimized multi-threaded is {:.2}x faster than single-threaded", speedup);
248        } else {
249            let slowdown = optimized_multi_avg.as_secs_f64() / single_avg.as_secs_f64();
250            println!("Optimized multi-threaded is {:.2}x slower than single-threaded", slowdown);
251        }
252
253        // Compare single-threaded streaming vs optimized multithreaded
254        if single_streamed_avg > optimized_multi_avg {
255            let speedup = single_streamed_avg.as_secs_f64() / optimized_multi_avg.as_secs_f64();
256            println!("Optimized multi-threaded is {:.2}x faster than single-threaded streaming", speedup);
257        } else {
258            let slowdown = optimized_multi_avg.as_secs_f64() / single_streamed_avg.as_secs_f64();
259            println!("Optimized multi-threaded is {:.2}x slower than single-threaded streaming", slowdown);
260        }
261
262        // Determine the overall fastest approach
263        let fastest_approach = if single_avg <= single_streamed_avg && single_avg <= optimized_multi_avg {
264            "Single-threaded"
265        } else if single_streamed_avg <= single_avg && single_streamed_avg <= optimized_multi_avg {
266            "Single-threaded streaming"
267        } else {
268            "Optimized multi-threaded"
269        };
270
271        println!("\nOverall result: {} approach is the fastest for this workload.", fastest_approach);
272    }
273
274    Ok(())
275}

Trait Implementations§

Source§

impl Clone for ParserConfig

Source§

fn clone(&self) -> ParserConfig

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for ParserConfig

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Default for ParserConfig

Source§

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<R, P> ReadPrimitive<R> for P
where R: Read + ReadEndian<P>, P: Default,

Source§

fn read_from_little_endian(read: &mut R) -> Result<Self, Error>

Read this value from the supplied reader. Same as ReadEndian::read_from_little_endian().
Source§

fn read_from_big_endian(read: &mut R) -> Result<Self, Error>

Read this value from the supplied reader. Same as ReadEndian::read_from_big_endian().
Source§

fn read_from_native_endian(read: &mut R) -> Result<Self, Error>

Read this value from the supplied reader. Same as ReadEndian::read_from_native_endian().
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.