pub struct Slide {
pub rel_path: String,
pub slide_number: u32,
pub elements: Vec<SlideElement>,
pub images: Vec<ImageReference>,
pub image_data: HashMap<String, Vec<u8>>,
pub config: ParserConfig,
}Expand description
Represents a single slide extracted from a PowerPoint (pptx) file.
Contains structured slide data including slide number, parsed content elements (text, tables, images, lists), and associated image references.
A Slide can be converted into other formats, such as Markdown, or its
contained images can be extracted in base64 representation.
Typically, you retrieve instances of Slide through [PptxContainer::parse()].
Fields§
§rel_path: String§slide_number: u32§elements: Vec<SlideElement>§images: Vec<ImageReference>§image_data: HashMap<String, Vec<u8>>§config: ParserConfigImplementations§
Source§impl Slide
impl Slide
pub fn new( rel_path: String, slide_number: u32, elements: Vec<SlideElement>, images: Vec<ImageReference>, image_data: HashMap<String, Vec<u8>>, config: ParserConfig, ) -> Self
Sourcepub fn convert_to_md(&self) -> Option<String>
pub fn convert_to_md(&self) -> Option<String>
Converts slide contents into a Markdown formatted string.
Translates internal slide elements (text, tables, lists, images) to valid and readable Markdown. Embedded images will be encoded as base64 inline images.
§Returns
Returns an Option<String>:
Some(String): Markdown representation of slide if conversion succeeds.None: If a conversion error occurs during image encoding.
Examples found in repository?
13fn main() -> Result<()> {
14 // Get the PPTX file path from command line arguments and provide the mandatory output path
15 let args: Vec<String> = env::args().collect();
16 let pptx_path = if args.len() > 1 {
17 &args[1]
18 } else {
19 eprintln!("Usage: cargo run --example save_images <path/to/presentation.pptx>");
20 return Ok(());
21 };
22
23 println!("Processing PPTX file: {}", pptx_path);
24
25 // Use the config builder to build your config
26 let config = ParserConfig::builder()
27 .extract_images(true)
28 .compress_images(true)
29 .quality(75)
30 .image_handling_mode(ImageHandlingMode::Save)
31 .image_output_path(PathBuf::from("C:/Users/nilsk/Downloads/extracted_images"))
32 .build();
33
34 // Open the PPTX file
35 let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
36
37 // Parse all slides
38 let slides = container.parse_all()?;
39
40 println!("Found {} slides", slides.len());
41
42 // create a new Markdown file
43 let mut md_file = File::create("output.md")?;
44
45 // Convert each slide to Markdown and save the images automatically
46 for slide in slides {
47 if let Some(md_content) = slide.convert_to_md() {
48 writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
49 }
50 }
51
52 println!("All slides converted successfully!");
53
54 Ok(())
55}More examples
13fn main() -> Result<()> {
14 // Get the PPTX file path from command line arguments
15 let args: Vec<String> = env::args().collect();
16 let pptx_path = if args.len() > 1 {
17 &args[1]
18 } else {
19 eprintln!("Usage: cargo run --example memory_efficient_streaming <path/to/presentation.pptx>");
20 return Ok(());
21 };
22
23 println!("Processing PPTX file: {}", pptx_path);
24
25 // Use the config builder to build your config
26 let config = ParserConfig::builder()
27 .extract_images(true)
28 .build();
29
30 // Open the PPTX file with the streaming API
31 let mut streamer = PptxContainer::open(Path::new(pptx_path), config)?;
32
33 // Create output directory
34 let output_dir = "output_streaming";
35 fs::create_dir_all(output_dir)?;
36
37 // Process slides one by one using the iterator
38 for slide_result in streamer.iter_slides() {
39 match slide_result {
40 Ok(slide) => {
41 println!("Processing slide {} ({} elements)", slide.slide_number, slide.elements.len());
42
43 if let Some(md_content) = slide.convert_to_md() {
44 let output_path = format!("{}/slide_{}.md", output_dir, slide.slide_number);
45 fs::write(&output_path, md_content)?;
46 println!("Saved slide {} to {}", slide.slide_number, output_path);
47 }
48 },
49 Err(e) => {
50 eprintln!("Error processing slide: {:?}", e);
51 }
52 }
53 }
54
55 println!("All slides processed successfully!");
56
57 Ok(())
58}13fn main() -> Result<()> {
14 // Get the PPTX file path from command line arguments
15 let args: Vec<String> = env::args().collect();
16 let pptx_path = if args.len() > 1 {
17 &args[1]
18 } else {
19 eprintln!("Usage: cargo run --example basic_usage <path/to/presentation.pptx> <extract_images>\ncargo run --example basic_usage sample.pptx true");
20 return Ok(());
21 };
22
23 // Tries to read if the extract_images flag is false else set to true
24 let extract_images = if args.len() > 2 {
25 !(args[2] == "false" || args[2] == "False" || args[2] == "0")
26 } else {
27 true
28 };
29
30 println!("Processing PPTX file: {}", pptx_path);
31
32 // Use the config builder to build your config
33 let config = ParserConfig::builder()
34 .extract_images(extract_images)
35 .compress_images(true)
36 .quality(75)
37 .image_handling_mode(ImageHandlingMode::InMarkdown)
38 .include_slide_comment(true)
39 .build();
40
41 // Open the PPTX file
42 let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
43
44 // Parse all slides
45 let slides = container.parse_all()?;
46
47 println!("Found {} slides", slides.len());
48
49 // create a new Markdown file
50 let mut md_file = File::create("output.md")?;
51
52 // Convert each slide to Markdown and save
53 for slide in slides {
54 if let Some(md_content) = slide.convert_to_md() {
55 println!("{}", md_content);
56 writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57 }
58 }
59
60 println!("All slides converted successfully!");
61
62 Ok(())
63}15fn main() -> Result<()> {
16 // Get the PPTX file path from command line arguments
17 let args: Vec<String> = env::args().collect();
18 let pptx_path = if args.len() > 1 {
19 &args[1]
20 } else {
21 eprintln!("Usage: cargo run --example manual_image_extraction <path/to/presentation.pptx>");
22 return Ok(());
23 };
24
25 println!("Processing PPTX file: {}", pptx_path);
26
27 // Use the config builder to build your config
28 let config = ParserConfig::builder()
29 .extract_images(true)
30 .compress_images(true)
31 .quality(75)
32 .image_handling_mode(ImageHandlingMode::Manually)
33 .build();
34
35 // Open the PPTX file
36 let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
37
38 // Parse all slides
39 let slides = container.parse_all()?;
40
41 println!("Found {} slides", slides.len());
42
43 // create a new Markdown file
44 let mut md_file = File::create("output.md")?;
45
46 // Create output directory
47 let output_dir = "extracted_images";
48 fs::create_dir_all(output_dir)?;
49
50 // Process slides one by one using the iterator
51 let mut image_count = 1;
52
53 // Convert each slide to Markdown and save
54 for slide in slides {
55 if let Some(md_content) = slide.convert_to_md() {
56 writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57 }
58
59 // Manually load the base64 encoded image strings from the slide
60 if let Some(images) = slide.load_images_manually() {
61 for image in images {
62
63 // Decode the base64 strings back to raw image data
64 let image_data = general_purpose::STANDARD.decode(image.base64_content.clone()).unwrap();
65
66 // Extract image extension if the image is not compressed, otherwise its always `.jpg`
67 let ext = slide.config.compress_images
68 .then(|| "jpg".to_string())
69 .unwrap_or_else(|| slide.get_image_extension(&image.img_ref.target.clone()));
70
71 // Construct a unique file name
72 let file_name = format!("slide{}_image{}_{}", slide.slide_number, image_count, &image.img_ref.id);
73
74 // Save the image
75 let output_path = format!(
76 "{}/{}.{}",
77 output_dir,
78 &file_name,
79 ext
80 );
81 fs::write(&output_path, image_data)?;
82 println!("Saved image to {}", output_path);
83
84 // Write the image data into the Markdown file
85 writeln!(md_file, "", file_name, ext, image.base64_content).expect("Couldn't write to file");
86
87 image_count += 1;
88 }
89 }
90 }
91
92 println!("All slides converted successfully!");
93
94 Ok(())
95}70fn main() -> Result<()> {
71 // Get the PPTX file path and optional iteration count from command line arguments
72 let args: Vec<String> = env::args().collect();
73 let pptx_path = if args.len() > 1 {
74 &args[1]
75 } else {
76 eprintln!("Usage: cargo run --example performance_test <path/to/presentation.pptx> [iterations]");
77 return Ok(());
78 };
79
80 let iterations = if args.len() > 2 {
81 args[2].parse().unwrap_or(5)
82 } else {
83 10 // Default to 10 iterations
84 };
85
86 println!("Performance testing with {} iterations on: {}", iterations, pptx_path);
87
88
89
90 // =========== Single-threaded Approach ===========
91 let mut single_thread_bench = Benchmark::new("Single-threaded parsing");
92
93 let mut total_slides = 0;
94
95 for i in 0..iterations {
96 println!("\nIteration {} (Single-threaded)", i + 1);
97
98 // Measure container creation
99 let mut container = single_thread_bench.measure(|| {
100 let config = ParserConfig::builder()
101 .extract_images(true)
102 .build();
103 PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
104 });
105
106 println!(" Found {} slides in the presentation", container.slide_count);
107
108 // Measure parsing
109 let slides = single_thread_bench.measure(|| {
110 container.parse_all().expect("Failed to parse slides")
111 });
112
113 // Measure conversion
114 let _md_content = single_thread_bench.measure(|| {
115 slides.iter()
116 .filter_map(|slide| slide.convert_to_md())
117 .collect::<Vec<String>>()
118 });
119
120 total_slides += slides.len();
121 }
122
123 single_thread_bench.report();
124 println!("Average slides per presentation: {}", total_slides / iterations);
125
126
127
128 // =========== Single-threaded Streamed Approach ===========
129 let mut single_thread_streamed_bench = Benchmark::new("Single-threaded streamed parsing");
130
131 total_slides = 0;
132
133 for i in 0..iterations {
134 println!("\nIteration {} (Single-threaded streamed)", i + 1);
135
136 // Measure container creation
137 let mut container = single_thread_streamed_bench.measure(|| {
138 let config = ParserConfig::builder()
139 .extract_images(true)
140 .build();
141 PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
142 });
143
144 println!(" Found {} slides in the presentation", container.slide_count);
145
146 // Zähle die Slides im Voraus für die statistische Auswertung
147 let expected_slides = container.slide_count;
148
149 // Measure slide processing (including parsing and conversion)
150 let slides_processed = single_thread_streamed_bench.measure(|| {
151 let mut processed = 0;
152
153 // Process slides one by one using the iterator
154 for slide_result in container.iter_slides() {
155 match slide_result {
156 Ok(slide) => {
157 // Konvertiere den Slide zu Markdown
158 let _md_content = slide.convert_to_md();
159 processed += 1;
160 },
161 Err(e) => {
162 eprintln!("Error processing slide: {:?}", e);
163 }
164 }
165 }
166
167 processed
168 });
169
170 println!(" Processed {} out of {} slides", slides_processed, expected_slides);
171 total_slides += slides_processed;
172 }
173
174 single_thread_streamed_bench.report();
175 println!("Average slides per presentation: {}", total_slides / iterations);
176
177
178
179 // =========== Optimized Multi-threaded Approach ===========
180 let mut optimized_multi_thread_bench = Benchmark::new("Optimized Multi-threaded parsing");
181
182 total_slides = 0;
183
184 for i in 0..iterations {
185 println!("\nIteration {} (Optimized Multi-threaded)", i + 1);
186
187 // Container öffnen mit der gewünschten Konfiguration
188 let mut container = optimized_multi_thread_bench.measure(|| {
189 let config = ParserConfig::builder()
190 .extract_images(true)
191 .build();
192 PptxContainer::open(Path::new(pptx_path), config).expect("Failed to open PPTX")
193 });
194
195 println!(" Found {} slides in the presentation", container.slide_count);
196
197 // Verwende die neue optimierte Multi-Threading-Methode
198 let slides = optimized_multi_thread_bench.measure(|| {
199 container.parse_all_multi_threaded().expect("Failed to parse slides")
200 });
201
202 println!(" Successfully processed {} slides", slides.len());
203
204 // Parallel zu Markdown konvertieren (bleibt unverändert)
205 let _md_content = optimized_multi_thread_bench.measure(|| {
206 slides.par_iter()
207 .filter_map(|slide| slide.convert_to_md())
208 .collect::<Vec<String>>()
209 });
210
211 total_slides += slides.len();
212 }
213
214 optimized_multi_thread_bench.report();
215 println!("Average slides per presentation: {}", total_slides / iterations);
216
217 // =========== Performance Comparison ===========
218 if !single_thread_bench.results.is_empty() &&
219 !single_thread_streamed_bench.results.is_empty() &&
220 !optimized_multi_thread_bench.results.is_empty() {
221
222 let single_avg: Duration = single_thread_bench.results.iter().sum::<Duration>() /
223 single_thread_bench.results.len() as u32;
224 let single_streamed_avg: Duration = single_thread_streamed_bench.results.iter().sum::<Duration>() /
225 single_thread_streamed_bench.results.len() as u32;
226 let optimized_multi_avg: Duration = optimized_multi_thread_bench.results.iter().sum::<Duration>() /
227 optimized_multi_thread_bench.results.len() as u32;
228
229 println!("\nPerformance Comparison");
230 println!("=====================");
231 println!("Single-threaded average: {:?}", single_avg);
232 println!("Single-threaded streaming average: {:?}", single_streamed_avg);
233 println!("Optimized multi-threaded average: {:?}", optimized_multi_avg);
234
235 // Compare single-threaded vs single-threaded streaming
236 if single_avg > single_streamed_avg {
237 let speedup = single_avg.as_secs_f64() / single_streamed_avg.as_secs_f64();
238 println!("Single-threaded streaming is {:.2}x faster than single-threaded", speedup);
239 } else {
240 let slowdown = single_streamed_avg.as_secs_f64() / single_avg.as_secs_f64();
241 println!("Single-threaded streaming is {:.2}x slower than single-threaded", slowdown);
242 }
243
244 // Compare single-threaded vs optimized multithreaded
245 if single_avg > optimized_multi_avg {
246 let speedup = single_avg.as_secs_f64() / optimized_multi_avg.as_secs_f64();
247 println!("Optimized multi-threaded is {:.2}x faster than single-threaded", speedup);
248 } else {
249 let slowdown = optimized_multi_avg.as_secs_f64() / single_avg.as_secs_f64();
250 println!("Optimized multi-threaded is {:.2}x slower than single-threaded", slowdown);
251 }
252
253 // Compare single-threaded streaming vs optimized multithreaded
254 if single_streamed_avg > optimized_multi_avg {
255 let speedup = single_streamed_avg.as_secs_f64() / optimized_multi_avg.as_secs_f64();
256 println!("Optimized multi-threaded is {:.2}x faster than single-threaded streaming", speedup);
257 } else {
258 let slowdown = optimized_multi_avg.as_secs_f64() / single_streamed_avg.as_secs_f64();
259 println!("Optimized multi-threaded is {:.2}x slower than single-threaded streaming", slowdown);
260 }
261
262 // Determine the overall fastest approach
263 let fastest_approach = if single_avg <= single_streamed_avg && single_avg <= optimized_multi_avg {
264 "Single-threaded"
265 } else if single_streamed_avg <= single_avg && single_streamed_avg <= optimized_multi_avg {
266 "Single-threaded streaming"
267 } else {
268 "Optimized multi-threaded"
269 };
270
271 println!("\nOverall result: {} approach is the fastest for this workload.", fastest_approach);
272 }
273
274 Ok(())
275}Sourcepub fn extract_slide_number(path: &str) -> Option<u32>
pub fn extract_slide_number(path: &str) -> Option<u32>
Extracts the numeric slide identifier from a slide path.
Helper method to parse slide numbers from internal pptx
slide paths (e.g., “ppt/slides/slide1.xml” → 1).
Sourcepub fn link_images(&mut self)
pub fn link_images(&mut self)
Links slide images references with their corresponding targets.
Ensures that each image referenced by its ID is correctly linked to the actual internal resource paths stored in the slide. This method is typically used internally after parsing a slide
§Notes
Internally those are the values image references are holding
| Parameter | Example value |
|---|---|
id | rId2 |
target | ../media/image2.png |
Sourcepub fn get_image_extension(&self, path: &str) -> String
pub fn get_image_extension(&self, path: &str) -> String
Extracts the file extension from image paths
Examples found in repository?
15fn main() -> Result<()> {
16 // Get the PPTX file path from command line arguments
17 let args: Vec<String> = env::args().collect();
18 let pptx_path = if args.len() > 1 {
19 &args[1]
20 } else {
21 eprintln!("Usage: cargo run --example manual_image_extraction <path/to/presentation.pptx>");
22 return Ok(());
23 };
24
25 println!("Processing PPTX file: {}", pptx_path);
26
27 // Use the config builder to build your config
28 let config = ParserConfig::builder()
29 .extract_images(true)
30 .compress_images(true)
31 .quality(75)
32 .image_handling_mode(ImageHandlingMode::Manually)
33 .build();
34
35 // Open the PPTX file
36 let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
37
38 // Parse all slides
39 let slides = container.parse_all()?;
40
41 println!("Found {} slides", slides.len());
42
43 // create a new Markdown file
44 let mut md_file = File::create("output.md")?;
45
46 // Create output directory
47 let output_dir = "extracted_images";
48 fs::create_dir_all(output_dir)?;
49
50 // Process slides one by one using the iterator
51 let mut image_count = 1;
52
53 // Convert each slide to Markdown and save
54 for slide in slides {
55 if let Some(md_content) = slide.convert_to_md() {
56 writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57 }
58
59 // Manually load the base64 encoded image strings from the slide
60 if let Some(images) = slide.load_images_manually() {
61 for image in images {
62
63 // Decode the base64 strings back to raw image data
64 let image_data = general_purpose::STANDARD.decode(image.base64_content.clone()).unwrap();
65
66 // Extract image extension if the image is not compressed, otherwise its always `.jpg`
67 let ext = slide.config.compress_images
68 .then(|| "jpg".to_string())
69 .unwrap_or_else(|| slide.get_image_extension(&image.img_ref.target.clone()));
70
71 // Construct a unique file name
72 let file_name = format!("slide{}_image{}_{}", slide.slide_number, image_count, &image.img_ref.id);
73
74 // Save the image
75 let output_path = format!(
76 "{}/{}.{}",
77 output_dir,
78 &file_name,
79 ext
80 );
81 fs::write(&output_path, image_data)?;
82 println!("Saved image to {}", output_path);
83
84 // Write the image data into the Markdown file
85 writeln!(md_file, "", file_name, ext, image.base64_content).expect("Couldn't write to file");
86
87 image_count += 1;
88 }
89 }
90 }
91
92 println!("All slides converted successfully!");
93
94 Ok(())
95}Sourcepub fn load_images_manually(&self) -> Option<Vec<ManualImage>>
pub fn load_images_manually(&self) -> Option<Vec<ManualImage>>
Examples found in repository?
15fn main() -> Result<()> {
16 // Get the PPTX file path from command line arguments
17 let args: Vec<String> = env::args().collect();
18 let pptx_path = if args.len() > 1 {
19 &args[1]
20 } else {
21 eprintln!("Usage: cargo run --example manual_image_extraction <path/to/presentation.pptx>");
22 return Ok(());
23 };
24
25 println!("Processing PPTX file: {}", pptx_path);
26
27 // Use the config builder to build your config
28 let config = ParserConfig::builder()
29 .extract_images(true)
30 .compress_images(true)
31 .quality(75)
32 .image_handling_mode(ImageHandlingMode::Manually)
33 .build();
34
35 // Open the PPTX file
36 let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
37
38 // Parse all slides
39 let slides = container.parse_all()?;
40
41 println!("Found {} slides", slides.len());
42
43 // create a new Markdown file
44 let mut md_file = File::create("output.md")?;
45
46 // Create output directory
47 let output_dir = "extracted_images";
48 fs::create_dir_all(output_dir)?;
49
50 // Process slides one by one using the iterator
51 let mut image_count = 1;
52
53 // Convert each slide to Markdown and save
54 for slide in slides {
55 if let Some(md_content) = slide.convert_to_md() {
56 writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
57 }
58
59 // Manually load the base64 encoded image strings from the slide
60 if let Some(images) = slide.load_images_manually() {
61 for image in images {
62
63 // Decode the base64 strings back to raw image data
64 let image_data = general_purpose::STANDARD.decode(image.base64_content.clone()).unwrap();
65
66 // Extract image extension if the image is not compressed, otherwise its always `.jpg`
67 let ext = slide.config.compress_images
68 .then(|| "jpg".to_string())
69 .unwrap_or_else(|| slide.get_image_extension(&image.img_ref.target.clone()));
70
71 // Construct a unique file name
72 let file_name = format!("slide{}_image{}_{}", slide.slide_number, image_count, &image.img_ref.id);
73
74 // Save the image
75 let output_path = format!(
76 "{}/{}.{}",
77 output_dir,
78 &file_name,
79 ext
80 );
81 fs::write(&output_path, image_data)?;
82 println!("Saved image to {}", output_path);
83
84 // Write the image data into the Markdown file
85 writeln!(md_file, "", file_name, ext, image.base64_content).expect("Couldn't write to file");
86
87 image_count += 1;
88 }
89 }
90 }
91
92 println!("All slides converted successfully!");
93
94 Ok(())
95}Trait Implementations§
Auto Trait Implementations§
impl Freeze for Slide
impl RefUnwindSafe for Slide
impl Send for Slide
impl Sync for Slide
impl Unpin for Slide
impl UnwindSafe for Slide
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more