1use crate::parser_config::ImageHandlingMode;
2use crate::{ElementPosition, ImageReference, ParserConfig, SlideElement};
3use base64::{engine::general_purpose, Engine as _};
4use image::ImageOutputFormat;
5use std::collections::HashMap;
6use std::fs;
7use std::io::Cursor;
8use std::path::{Path, PathBuf};
9
10#[derive(Debug)]
12pub struct ManualImage {
13 pub base64_content: String,
14 pub img_ref: ImageReference,
15}
16
17impl ManualImage {
18 pub fn new(base64_content: String, img_ref: ImageReference) -> ManualImage {
19 Self {
20 base64_content,
21 img_ref,
22 }
23 }
24}
25
26#[derive(Debug)]
36pub struct Slide {
37 pub rel_path: String,
38 pub slide_number: u32,
39 pub elements: Vec<SlideElement>,
40 pub images: Vec<ImageReference>,
41 pub image_data: HashMap<String, Vec<u8>>,
42 pub config: ParserConfig
43}
44
45impl Slide {
46 pub fn new(
47 rel_path: String,
48 slide_number: u32,
49 elements: Vec<SlideElement>,
50 images: Vec<ImageReference>,
51 image_data: HashMap<String, Vec<u8>>,
52 config: ParserConfig,
53 ) -> Self {
54 Self {
55 rel_path,
56 slide_number,
57 elements,
58 images,
59 image_data,
60 config,
61 }
62 }
63
64 pub fn convert_to_md(&self) -> Option<String> {
75 let mut slide_txt = String::new();
76 if self.config.include_slide_comment { slide_txt.push_str(format!("<!-- Slide {} -->\n\n", self.slide_number).as_str()); }
77 let mut image_count = 0;
78
79 let mut sorted_elements = self.elements.clone();
80 sorted_elements.sort_by_key(|element| {
81 let ElementPosition { y, x } = element.position();
82 (y, x)
83 });
84
85 for element in sorted_elements {
86 match element {
87 SlideElement::Text(text, _pos) => {
88 for run in &text.runs {
89 slide_txt.push_str(&run.render_as_md());
90 }
91 slide_txt.push('\n');
92 },
93 SlideElement::Table(table, _pos) => {
94 let mut is_header = true;
95 for row in &table.rows {
96 let mut row_texts = Vec::new();
97 for cell in &row.cells {
98 let mut cell_text = String::new();
99 for run in &cell.runs {
100 cell_text.push_str(&run.extract());
101 }
102 row_texts.push(cell_text);
103 }
104
105 let row_line = format!("| {} |", row_texts.join(" | "));
106 slide_txt.push_str(&row_line);
107 slide_txt.push('\n');
108
109 if is_header {
110 let separator_line = format!("|{}|", row_texts.iter().map(|_| " --- ").collect::<Vec<_>>().join("|"));
111 slide_txt.push_str(&separator_line);
112 slide_txt.push('\n');
113 is_header = false;
114 }
115 }
116 slide_txt.push('\n');
117 },
118 SlideElement::Image(image_ref, _pos) => {
119 match self.config.image_handling_mode {
120 ImageHandlingMode::InMarkdown => {
121 if let Some(image_data) = self.image_data.get(&image_ref.id) {
122 let image_data = self.config.compress_images
123 .then(|| self.compress_image(image_data))
124 .unwrap_or_else(|| Option::from(image_data.clone()));
125
126 let base64_string = general_purpose::STANDARD.encode(image_data?);
127 let image_name = &image_ref.target.split('/').last()?;
128 let file_ext = &image_name.split('.').last()?;
129
130 slide_txt.push_str(format!("", image_name, file_ext, base64_string).as_str());
131 }
132 }
133 ImageHandlingMode::Save => {
134 if let Some(image_data) = self.image_data.get(&image_ref.id) {
135 let image_data = self.config.compress_images
136 .then(|| self.compress_image(image_data))
137 .unwrap_or_else(|| Option::from(image_data.clone()));
138
139 let ext = self.config.compress_images
140 .then(|| "jpg".to_string())
141 .unwrap_or_else(|| self.get_image_extension(&image_ref.target.clone()));
142
143 let output_dir = self.config
144 .image_output_path
145 .clone()
146 .unwrap_or_else(|| PathBuf::from("."));
147
148 let _ = fs::create_dir_all(&output_dir);
149
150 let mut image_path = output_dir.clone();
151 let file_name = format!("slide{}_image{}_{}.{}", self.slide_number, image_count + 1, &image_ref.id, ext);
152 image_path.push(&file_name);
153
154 let _ = fs::write(&image_path, image_data?);
155
156 let abs_file_url = self.path_to_file_url(&image_path);
157 let html_link = format!(r#"<a href={:?}>{file_name}</a>"#, abs_file_url?);
158 image_count += 1;
159 slide_txt.push_str(&html_link);
160 slide_txt.push('\n');
161 }
162 }
163 ImageHandlingMode::Manually => { slide_txt.push('\n'); continue; }
164 }
165 slide_txt.push('\n');
166 }
167 SlideElement::List(list_element, _pos) => {
168 let mut counters: Vec<usize> = Vec::new();
169 let mut previous_level = 0;
170
171 for item in &list_element.items {
172 let mut item_text = String::new();
173 for run in &item.runs {
174 item_text.push_str(&run.extract());
175 }
176
177 let level = item.level as usize;
178 if level >= counters.len() {
179 counters.resize(level + 1, 0);
180 }
181
182 match level.cmp(&previous_level) {
183 std::cmp::Ordering::Greater => counters[level] = 0,
184 std::cmp::Ordering::Less => counters.truncate(level + 1),
185 std::cmp::Ordering::Equal => {}
186 }
187
188 counters[level] += 1;
189 previous_level = level;
190
191 let indent = "\t".repeat(level);
192 let marker = if item.is_ordered {
193 format!("{}{}. ", indent, counters[level])
194 } else {
195 format!("{}- ", indent)
196 };
197
198 slide_txt.push_str(&format!("{}{}\n", marker, item_text));
199 }
200 },
201 _ => ()
202 }
203 }
204 Some(slide_txt)
205 }
206
207 pub fn extract_slide_number(path: &str) -> Option<u32> {
212 path.split('/')
213 .next_back()
214 .and_then(|filename| {
215 filename
216 .strip_prefix("slide")
217 .and_then(|s| s.strip_suffix(".xml"))
218 })
219 .and_then(|num_str| num_str.parse::<u32>().ok())
220 }
221
222 pub fn link_images(&mut self) {
238 let id_to_target: HashMap<String, String> = self.images
239 .iter()
240 .map(|img_ref| (img_ref.id.clone(), img_ref.target.clone()))
241 .collect();
242
243 for element in &mut self.elements {
244 if let SlideElement::Image(ref mut img_ref, _pos) = element {
245 if let Some(target) = id_to_target.get(&img_ref.id) {
246 img_ref.target = target.clone();
247 }
248 }
249 }
250 }
251
252 pub fn get_image_extension(&self, path: &str) -> String {
254 Path::new(path)
255 .extension()
256 .and_then(|ext| ext.to_str())
257 .unwrap_or("bin")
258 .to_string()
259 }
260
261 pub fn compress_image(&self, image_data: &[u8]) -> Option<Vec<u8>> {
275 let img = match image::load_from_memory(image_data) {
276 Ok(image) => image,
277 Err(_) => return None,
278 };
279
280 let mut output = Vec::new();
281 let quality = self.config.quality;
282
283 if img.write_to(&mut Cursor::new(&mut output), ImageOutputFormat::Jpeg(quality)).is_ok() {
284 Some(output)
285 } else {
286 None
287 }
288 }
289
290 pub fn load_images_manually(&self) -> Option<Vec<ManualImage>> {
291 let mut images: Vec<ManualImage> = Vec::new();
292
293 let image_refs: Vec<&ImageReference> = self.elements
294 .iter()
295 .filter_map(|element| match element {
296 SlideElement::Image(ref img, _pos) => Some(img),
297 _ => None,
298 })
299 .collect();
300
301 for image_ref in image_refs {
302 if let Some(image_data) = self.image_data.get(&image_ref.id) {
303 let image_data = self.config.compress_images
304 .then( | | self.compress_image(image_data))
305 .unwrap_or_else(|| Option::from(image_data.clone()));
306
307 let base64_str = general_purpose::STANDARD.encode(image_data?);
308
309 let image = ManualImage::new(
310 base64_str,
311 image_ref.clone(),
312 );
313 images.push(image);
314 }
315 }
316
317 Some(images)
318 }
319
320 fn path_to_file_url(&self, path: &Path) -> Option<String> {
321 let abs_path = path.canonicalize().ok()?;
322 let mut path_str = abs_path.to_string_lossy().replace('\\', "/");
323
324 if cfg!(windows) {
326 if let Some(stripped) = path_str.strip_prefix("//?/") {
327 path_str = stripped.to_string();
328 }
329 Some(format!("file:///{}", path_str))
330 } else {
331 Some(format!("file://{}", path_str))
332 }
333 }
334}
335
336#[cfg(test)]
337mod tests {
338 use std::fs;
339 use std::path::PathBuf;
340 use crate::ElementPosition;
341 use super::*;
342
343 fn mock_slide() -> Slide {
344 Slide {
345 rel_path: "ppt/slides/slide1.xml".to_string(),
346 slide_number: 1,
347 elements: vec![],
348 images: vec![],
349 image_data: HashMap::new(),
350 config: ParserConfig::default(),
351 }
352 }
353
354 fn load_image_data(filename: &str) -> Vec<u8> {
355 let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
356 path.push("tests");
357 path.push("test_data");
358 path.push(filename);
359 fs::read(path).expect("Unable to read test data file")
360 }
361
362 #[test]
363 fn test_extract_slide_number() {
364 let input = "ppt/slides/slide5.xml";
365
366 let actual = Slide::extract_slide_number(input).unwrap();
367 let expected: u32 = 5;
368
369 assert_eq!(actual, expected);
370 }
371
372 #[test]
373 fn test_get_image_extension() {
374 let slide = mock_slide();
375 let input = "../media/image1.png";
376
377 let actual = slide.get_image_extension(input);
378 let expected = "png";
379
380 assert_eq!(actual, expected);
381 }
382
383 #[test]
384 fn test_link_images() {
385 let mut slide = mock_slide();
386 let _position = ElementPosition::default();
387
388 slide.images.push(ImageReference { id: "rId2".to_string(), target: "../media/image1.png".to_string() });
389 slide.elements.push(SlideElement::Image(ImageReference { id: "rId2".to_string(), target: "".to_string() }, _position));
390
391 slide.link_images();
392
393 if let SlideElement::Image(img_ref, _postion) = &slide.elements[0] {
394 assert_eq!(img_ref.target, "../media/image1.png");
395 }
396 }
397
398 #[test]
399 fn test_image_compression_reduces_size() {
400 let mut slide = mock_slide();
401 slide.config.quality = 50;
402
403 let raw_image = load_image_data("example-image.jpg");
404
405 if let Some(compression_result) = slide.compress_image(&raw_image) {
406 assert!(compression_result.len() < raw_image.len());
407 } else {
408 panic!("Compression failed");
409 }
410 }
411
412 #[test]
413 fn test_compressed_image_is_valid_jpg() {
414 let slide = mock_slide();
415 let raw_image = load_image_data("example-image.jpg");
416
417 if let Some(compression_result) = slide.compress_image(&raw_image) {
418 let result = image::load_from_memory(&compression_result);
419 assert!(result.is_ok());
420 } else {
421 panic!("Compression failed");
422 }
423 }
424}