pub struct ParserConfig {
pub extract_images: bool,
pub compress_images: bool,
pub quality: u8,
pub image_handling_mode: ImageHandlingMode,
pub image_output_path: Option<PathBuf>,
pub include_slide_comment: bool,
}Expand description
Configuration options for the PPTX parser.
Use ParserConfig::builder() to create a configuration instance.
This allows you to customize only the desired fields while falling back to sensible defaults for the rest.
§Configuration Options
| Parameter | Type | Default | Description |
|---|---|---|---|
extract_images | bool | true | Whether images are extracted from slides or not. If false, images can not be extracted manually either |
compress_images | bool | true | Whether images are compressed before encoding or not. Effects manually extracted images too |
image_quality | u8 | 80 | Compression level (0-100); higher values retain more detail but increase file size |
image_handling_mode | ImageHandlingMode | InMarkdown | Determines how images are handled during content export |
image_output_path | Option<PathBuf> | None | Output directory path for ImageHandlingMode::Save (mandatory for the saving mode) |
include_slide_comment | bool | true | Weather the slide number comment is included or not (<!-- Slide [n] -->) |
§Example
use std::path::PathBuf;
use pptx_to_md::{ImageHandlingMode, ParserConfig};
let config = ParserConfig::builder()
.extract_images(true)
.compress_images(true)
.quality(75)
.image_handling_mode(ImageHandlingMode::Save)
.image_output_path(PathBuf::from("/path/to/output/dir/"))
.build();Fields§
§extract_images: bool§compress_images: bool§quality: u8§image_handling_mode: ImageHandlingMode§image_output_path: Option<PathBuf>§include_slide_comment: boolImplementations§
Source§impl ParserConfig
impl ParserConfig
Sourcepub fn builder() -> ParserConfigBuilder
pub fn builder() -> ParserConfigBuilder
Examples found in repository?
examples/save_images.rs (line 26)
13fn main() -> Result<()> {
14 // Get the PPTX file path from command line arguments and provide the mandatory output path
15 let args: Vec<String> = env::args().collect();
16 let pptx_path = if args.len() > 1 {
17 &args[1]
18 } else {
19 eprintln!("Usage: cargo run --example save_images <path/to/presentation.pptx>");
20 return Ok(());
21 };
22
23 println!("Processing PPTX file: {}", pptx_path);
24
25 // Use the config builder to build your config
26 let config = ParserConfig::builder()
27 .extract_images(true)
28 .compress_images(true)
29 .quality(75)
30 .image_handling_mode(ImageHandlingMode::Save)
31 .image_output_path(PathBuf::from("C:/Users/nilsk/Downloads/extracted_images"))
32 .build();
33
34 // Open the PPTX file
35 let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
36
37 // Parse all slides
38 let slides = container.parse_all()?;
39
40 println!("Found {} slides", slides.len());
41
42 // create a new Markdown file
43 let mut md_file = File::create("output.md")?;
44
45 // Convert each slide to Markdown and save the images automatically
46 for slide in slides {
47 if let Some(md_content) = slide.convert_to_md() {
48 writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
49 }
50 }
51
52 println!("All slides converted successfully!");
53
54 Ok(())
55}More examples
examples/memory_efficient_streaming.rs (line 26)
13fn main() -> Result<()> {
14 // Get the PPTX file path from command line arguments
15 let args: Vec<String> = env::args().collect();
16 let pptx_path = if args.len() > 1 {
17 &args[1]
18 } else {
19 eprintln!("Usage: cargo run --example memory_efficient_streaming <path/to/presentation.pptx>");
20 return Ok(());
21 };
22
23 println!("Processing PPTX file: {}", pptx_path);
24
25 // Use the config builder to build your config
26 let config = ParserConfig::builder()
27 .extract_images(true)
28 .build();
29
30 // Open the PPTX file with the streaming API
31 let mut streamer = PptxContainer::open(Path::new(pptx_path), config)?;
32
33 // Create output directory
34 let output_dir = "output_streaming";
35 fs::create_dir_all(output_dir)?;
36
37 // Process slides one by one using the iterator
38 for slide_result in streamer.iter_slides() {
39 match slide_result {
40 Ok(slide) => {
41 println!("Processing slide {} ({} elements)", slide.slide_number, slide.elements.len());
42
43 if let Some(md_content) = slide.convert_to_md() {
44 let output_path = format!("{}/slide_{}.md", output_dir, slide.slide_number);
45 fs::write(&output_path, md_content)?;
46 println!("Saved slide {} to {}", slide.slide_number, output_path);
47 }
48 },
49 Err(e) => {
50 eprintln!("Error processing slide: {:?}", e);
51 }
52 }
53 }
54
55 println!("All slides processed successfully!");
56
57 Ok(())
58}examples/basic_usage.rs (line 33)
13fn main() -> Result<()> {
14 // Get the PPTX file path from command line arguments
15 let args: Vec<String> = env::args().collect();
16 let pptx_path = if args.len() > 1 {
17 &args[1]
18 } else {
19 eprintln!("Usage: cargo run --example basic_usage <path/to/presentation.pptx> <extract_images>\ncargo run --example basic_usage sample.pptx true");
20 return Ok(());
21 };
22
23 // Tries to read if the extract_images flag is false else set to true
24 let extract_images = if args.len() > 2 {
25 !(args[2] == "false" || args[2] == "False" || args[2] == "0")
26 } else {
27 true
28 };
29
30 println!("Processing PPTX file: {}", pptx_path);
31
32 // Use the config builder to build your config
33 let config = ParserConfig::builder()
34 .extract_images(extract_images)
35 .compress_images(true)
36 .quality(75)
37 .image_handling_mode(ImageHandlingMode::InMarkdown)
38 .include_slide_comment(true)
39 .build();
40
41 // Open the PPTX file
42 let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
43
44 // Parse all slides
45 let slides = container.parse_all()?;
46
47 println!("Found {} slides", slides.len());
48
49 // create a new Markdown file
50 let mut md_file = File::create("output.md")?;
51
52 // Convert each slide to Markdown and save
53 for slide in slides {
54 if let Some(md_content) = slide.convert_to_md() {
55 println!("{}", md_content);
56 writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57 }
58 }
59
60 println!("All slides converted successfully!");
61
62 Ok(())
63}examples/slide_elements.rs (line 25)
12fn main() -> Result<()> {
13 // Get the PPTX file path from command line arguments
14 let args: Vec<String> = env::args().collect();
15 let pptx_path = if args.len() > 1 {
16 &args[1]
17 } else {
18 eprintln!("Usage: cargo run --example slide_elements <path/to/presentation.pptx>");
19 return Ok(());
20 };
21
22 println!("Processing PPTX file: {}", pptx_path);
23
24 // Use the config builder to build your config
25 let config = ParserConfig::builder()
26 .extract_images(true)
27 .build();
28
29 // Open the PPTX file with the streaming API
30 let mut streamer = PptxContainer::open(Path::new(pptx_path), config)?;
31
32 // Process slides one by one using the iterator
33 for slide_result in streamer.iter_slides() {
34 match slide_result {
35 Ok(slide) => {
36 println!("Processing slide {} ({} elements)", slide.slide_number, slide.elements.len());
37
38 // iterate over each slide element and match them to add custom logic
39 for element in &slide.elements {
40 match element {
41 SlideElement::Text(text, pos) => { println!("{:?}\t{:?}\n", text, pos) }
42 SlideElement::Table(table, pos) => { println!("{:?}\t{:?}\n", table, pos) }
43 SlideElement::Image(image, pos) => { println!("{:?}\t{:?}\n", image, pos) }
44 SlideElement::List(list, pos) => { println!("{:?}\t{:?}\n", list, pos) }
45 SlideElement::Unknown => { println!("An Unknown element was found.\n") }
46 }
47 }
48 },
49 Err(e) => {
50 eprintln!("Error processing slide: {:?}", e);
51 }
52 }
53 }
54
55 println!("All slides processed successfully!");
56
57 Ok(())
58}examples/manual_image_extraction.rs (line 28)
15fn main() -> Result<()> {
16 // Get the PPTX file path from command line arguments
17 let args: Vec<String> = env::args().collect();
18 let pptx_path = if args.len() > 1 {
19 &args[1]
20 } else {
21 eprintln!("Usage: cargo run --example manual_image_extraction <path/to/presentation.pptx>");
22 return Ok(());
23 };
24
25 println!("Processing PPTX file: {}", pptx_path);
26
27 // Use the config builder to build your config
28 let config = ParserConfig::builder()
29 .extract_images(true)
30 .compress_images(true)
31 .quality(75)
32 .image_handling_mode(ImageHandlingMode::Manually)
33 .build();
34
35 // Open the PPTX file
36 let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
37
38 // Parse all slides
39 let slides = container.parse_all()?;
40
41 println!("Found {} slides", slides.len());
42
43 // create a new Markdown file
44 let mut md_file = File::create("output.md")?;
45
46 // Create output directory
47 let output_dir = "extracted_images";
48 fs::create_dir_all(output_dir)?;
49
50 // Process slides one by one using the iterator
51 let mut image_count = 1;
52
53 // Convert each slide to Markdown and save
54 for slide in slides {
55 if let Some(md_content) = slide.convert_to_md() {
56 writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57 }
58
59 // Manually load the base64 encoded image strings from the slide
60 if let Some(images) = slide.load_images_manually() {
61 for image in images {
62
63 // Decode the base64 strings back to raw image data
64 let image_data = general_purpose::STANDARD.decode(image.base64_content.clone()).unwrap();
65
66 // Extract image extension if the image is not compressed, otherwise its always `.jpg`
67 let ext = slide.config.compress_images
68 .then(|| "jpg".to_string())
69 .unwrap_or_else(|| slide.get_image_extension(&image.img_ref.target.clone()));
70
71 // Construct a unique file name
72 let file_name = format!("slide{}_image{}_{}", slide.slide_number, image_count, &image.img_ref.id);
73
74 // Save the image
75 let output_path = format!(
76 "{}/{}.{}",
77 output_dir,
78 &file_name,
79 ext
80 );
81 fs::write(&output_path, image_data)?;
82 println!("Saved image to {}", output_path);
83
84 // Write the image data into the Markdown file
85 writeln!(md_file, "", file_name, ext, image.base64_content).expect("Couldn't write to file");
86
87 image_count += 1;
88 }
89 }
90 }
91
92 println!("All slides converted successfully!");
93
94 Ok(())
95}examples/performance_test.rs (line 100)
70fn main() -> Result<()> {
71 // Get the PPTX file path and optional iteration count from command line arguments
72 let args: Vec<String> = env::args().collect();
73 let pptx_path = if args.len() > 1 {
74 &args[1]
75 } else {
76 eprintln!("Usage: cargo run --example performance_test <path/to/presentation.pptx> [iterations]");
77 return Ok(());
78 };
79
80 let iterations = if args.len() > 2 {
81 args[2].parse().unwrap_or(5)
82 } else {
83 10 // Default to 10 iterations
84 };
85
86 println!("Performance testing with {} iterations on: {}", iterations, pptx_path);
87
88
89
90 // =========== Single-threaded Approach ===========
91 let mut single_thread_bench = Benchmark::new("Single-threaded parsing");
92
93 let mut total_slides = 0;
94
95 for i in 0..iterations {
96 println!("\nIteration {} (Single-threaded)", i + 1);
97
98 // Measure container creation
99 let mut container = single_thread_bench.measure(|| {
100 let config = ParserConfig::builder()
101 .extract_images(true)
102 .build();
103 PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
104 });
105
106 println!(" Found {} slides in the presentation", container.slide_count);
107
108 // Measure parsing
109 let slides = single_thread_bench.measure(|| {
110 container.parse_all().expect("Failed to parse slides")
111 });
112
113 // Measure conversion
114 let _md_content = single_thread_bench.measure(|| {
115 slides.iter()
116 .filter_map(|slide| slide.convert_to_md())
117 .collect::<Vec<String>>()
118 });
119
120 total_slides += slides.len();
121 }
122
123 single_thread_bench.report();
124 println!("Average slides per presentation: {}", total_slides / iterations);
125
126
127
128 // =========== Single-threaded Streamed Approach ===========
129 let mut single_thread_streamed_bench = Benchmark::new("Single-threaded streamed parsing");
130
131 total_slides = 0;
132
133 for i in 0..iterations {
134 println!("\nIteration {} (Single-threaded streamed)", i + 1);
135
136 // Measure container creation
137 let mut container = single_thread_streamed_bench.measure(|| {
138 let config = ParserConfig::builder()
139 .extract_images(true)
140 .build();
141 PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
142 });
143
144 println!(" Found {} slides in the presentation", container.slide_count);
145
146 // Zähle die Slides im Voraus für die statistische Auswertung
147 let expected_slides = container.slide_count;
148
149 // Measure slide processing (including parsing and conversion)
150 let slides_processed = single_thread_streamed_bench.measure(|| {
151 let mut processed = 0;
152
153 // Process slides one by one using the iterator
154 for slide_result in container.iter_slides() {
155 match slide_result {
156 Ok(slide) => {
157 // Konvertiere den Slide zu Markdown
158 let _md_content = slide.convert_to_md();
159 processed += 1;
160 },
161 Err(e) => {
162 eprintln!("Error processing slide: {:?}", e);
163 }
164 }
165 }
166
167 processed
168 });
169
170 println!(" Processed {} out of {} slides", slides_processed, expected_slides);
171 total_slides += slides_processed;
172 }
173
174 single_thread_streamed_bench.report();
175 println!("Average slides per presentation: {}", total_slides / iterations);
176
177
178
179 // =========== Optimized Multi-threaded Approach ===========
180 let mut optimized_multi_thread_bench = Benchmark::new("Optimized Multi-threaded parsing");
181
182 total_slides = 0;
183
184 for i in 0..iterations {
185 println!("\nIteration {} (Optimized Multi-threaded)", i + 1);
186
187 // Container öffnen mit der gewünschten Konfiguration
188 let mut container = optimized_multi_thread_bench.measure(|| {
189 let config = ParserConfig::builder()
190 .extract_images(true)
191 .build();
192 PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
193 });
194
195 println!(" Found {} slides in the presentation", container.slide_count);
196
197 // Verwende die neue optimierte Multi-Threading-Methode
198 let slides = optimized_multi_thread_bench.measure(|| {
199 container.parse_all_multi_threaded().expect("Failed to parse slides")
200 });
201
202 println!(" Successfully processed {} slides", slides.len());
203
204 // Parallel zu Markdown konvertieren (bleibt unverändert)
205 let _md_content = optimized_multi_thread_bench.measure(|| {
206 slides.par_iter()
207 .filter_map(|slide| slide.convert_to_md())
208 .collect::<Vec<String>>()
209 });
210
211 total_slides += slides.len();
212 }
213
214 optimized_multi_thread_bench.report();
215 println!("Average slides per presentation: {}", total_slides / iterations);
216
217 // =========== Performance Comparison ===========
218 if !single_thread_bench.results.is_empty() &&
219 !single_thread_streamed_bench.results.is_empty() &&
220 !optimized_multi_thread_bench.results.is_empty() {
221
222 let single_avg: Duration = single_thread_bench.results.iter().sum::<Duration>() /
223 single_thread_bench.results.len() as u32;
224 let single_streamed_avg: Duration = single_thread_streamed_bench.results.iter().sum::<Duration>() /
225 single_thread_streamed_bench.results.len() as u32;
226 let optimized_multi_avg: Duration = optimized_multi_thread_bench.results.iter().sum::<Duration>() /
227 optimized_multi_thread_bench.results.len() as u32;
228
229 println!("\nPerformance Comparison");
230 println!("=====================");
231 println!("Single-threaded average: {:?}", single_avg);
232 println!("Single-threaded streaming average: {:?}", single_streamed_avg);
233 println!("Optimized multi-threaded average: {:?}", optimized_multi_avg);
234
235 // Compare single-threaded vs single-threaded streaming
236 if single_avg > single_streamed_avg {
237 let speedup = single_avg.as_secs_f64() / single_streamed_avg.as_secs_f64();
238 println!("Single-threaded streaming is {:.2}x faster than single-threaded", speedup);
239 } else {
240 let slowdown = single_streamed_avg.as_secs_f64() / single_avg.as_secs_f64();
241 println!("Single-threaded streaming is {:.2}x slower than single-threaded", slowdown);
242 }
243
244 // Compare single-threaded vs optimized multithreaded
245 if single_avg > optimized_multi_avg {
246 let speedup = single_avg.as_secs_f64() / optimized_multi_avg.as_secs_f64();
247 println!("Optimized multi-threaded is {:.2}x faster than single-threaded", speedup);
248 } else {
249 let slowdown = optimized_multi_avg.as_secs_f64() / single_avg.as_secs_f64();
250 println!("Optimized multi-threaded is {:.2}x slower than single-threaded", slowdown);
251 }
252
253 // Compare single-threaded streaming vs optimized multithreaded
254 if single_streamed_avg > optimized_multi_avg {
255 let speedup = single_streamed_avg.as_secs_f64() / optimized_multi_avg.as_secs_f64();
256 println!("Optimized multi-threaded is {:.2}x faster than single-threaded streaming", speedup);
257 } else {
258 let slowdown = optimized_multi_avg.as_secs_f64() / single_streamed_avg.as_secs_f64();
259 println!("Optimized multi-threaded is {:.2}x slower than single-threaded streaming", slowdown);
260 }
261
262 // Determine the overall fastest approach
263 let fastest_approach = if single_avg <= single_streamed_avg && single_avg <= optimized_multi_avg {
264 "Single-threaded"
265 } else if single_streamed_avg <= single_avg && single_streamed_avg <= optimized_multi_avg {
266 "Single-threaded streaming"
267 } else {
268 "Optimized multi-threaded"
269 };
270
271 println!("\nOverall result: {} approach is the fastest for this workload.", fastest_approach);
272 }
273
274 Ok(())
275}Trait Implementations§
Source§impl Clone for ParserConfig
impl Clone for ParserConfig
Source§fn clone(&self) -> ParserConfig
fn clone(&self) -> ParserConfig
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for ParserConfig
impl Debug for ParserConfig
Auto Trait Implementations§
impl Freeze for ParserConfig
impl RefUnwindSafe for ParserConfig
impl Send for ParserConfig
impl Sync for ParserConfig
impl Unpin for ParserConfig
impl UnwindSafe for ParserConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<R, P> ReadPrimitive<R> for P
impl<R, P> ReadPrimitive<R> for P
Source§fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
Read this value from the supplied reader. Same as
ReadEndian::read_from_little_endian().