Skip to main content

ppt_rs/opc/
compress.rs

1//! PPTX Compression Module
2//!
3//! Provides functionality to optimize and compress PPTX files:
4//! - Remove unused media files
5//! - Compress images to reduce file size
6//! - Remove document properties and revision history
7//! - Optimize XML (remove unnecessary whitespace)
8
9use super::Package;
10use crate::exc::Result;
11use std::collections::HashSet;
12use std::path::Path;
13
14/// Compression level options
15#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum CompressionLevel {
17    /// Light compression - remove unused parts only
18    Light,
19    /// Medium compression - compress images slightly
20    Medium,
21    /// Aggressive compression - maximize size reduction
22    Aggressive,
23    /// Custom compression with specific image quality
24    Custom(u8), // JPEG quality 0-100
25}
26
27impl Default for CompressionLevel {
28    fn default() -> Self {
29        CompressionLevel::Medium
30    }
31}
32
33impl CompressionLevel {
34    /// Get image quality for this level (for JPEG compression)
35    pub fn image_quality(&self) -> u8 {
36        match self {
37            CompressionLevel::Light => 95,
38            CompressionLevel::Medium => 85,
39            CompressionLevel::Aggressive => 70,
40            CompressionLevel::Custom(q) => *q,
41        }
42    }
43
44    /// Whether to resize large images
45    pub fn should_resize_images(&self) -> bool {
46        matches!(self, CompressionLevel::Aggressive | CompressionLevel::Custom(_))
47    }
48
49    /// Maximum image dimension for this level
50    pub fn max_image_dimension(&self) -> u32 {
51        match self {
52            CompressionLevel::Light => 2048,
53            CompressionLevel::Medium => 1600,
54            CompressionLevel::Aggressive => 1280,
55            CompressionLevel::Custom(_) => 1600,
56        }
57    }
58}
59
60/// Compression options
61#[derive(Debug, Clone)]
62pub struct CompressionOptions {
63    /// Compression level
64    pub level: CompressionLevel,
65    /// Remove unused media files
66    pub remove_unused_media: bool,
67    /// Remove document properties
68    pub remove_properties: bool,
69    /// Remove notes slides
70    pub remove_notes: bool,
71    /// Remove comments
72    pub remove_comments: bool,
73    /// Optimize XML (remove whitespace)
74    pub optimize_xml: bool,
75    /// Target file size in bytes (0 = no target)
76    pub target_size: usize,
77}
78
79impl Default for CompressionOptions {
80    fn default() -> Self {
81        Self {
82            level: CompressionLevel::Medium,
83            remove_unused_media: true,
84            remove_properties: false,
85            remove_notes: false,
86            remove_comments: true,
87            optimize_xml: true,
88            target_size: 0,
89        }
90    }
91}
92
93impl CompressionOptions {
94    /// Create new options with defaults
95    pub fn new() -> Self {
96        Self::default()
97    }
98
99    /// Set compression level
100    pub fn with_level(mut self, level: CompressionLevel) -> Self {
101        self.level = level;
102        self
103    }
104
105    /// Set unused media removal
106    pub fn with_unused_media_removal(mut self, remove: bool) -> Self {
107        self.remove_unused_media = remove;
108        self
109    }
110
111    /// Set properties removal
112    pub fn with_properties_removal(mut self, remove: bool) -> Self {
113        self.remove_properties = remove;
114        self
115    }
116
117    /// Set notes removal
118    pub fn with_notes_removal(mut self, remove: bool) -> Self {
119        self.remove_notes = remove;
120        self
121    }
122
123    /// Set comments removal
124    pub fn with_comments_removal(mut self, remove: bool) -> Self {
125        self.remove_comments = remove;
126        self
127    }
128
129    /// Set XML optimization
130    pub fn with_xml_optimization(mut self, optimize: bool) -> Self {
131        self.optimize_xml = optimize;
132        self
133    }
134
135    /// Set target file size
136    pub fn with_target_size(mut self, size: usize) -> Self {
137        self.target_size = size;
138        self
139    }
140
141    /// Preset for maximum compression
142    pub fn maximum() -> Self {
143        Self {
144            level: CompressionLevel::Aggressive,
145            remove_unused_media: true,
146            remove_properties: true,
147            remove_notes: true,
148            remove_comments: true,
149            optimize_xml: true,
150            target_size: 0,
151        }
152    }
153
154    /// Preset for web optimization
155    pub fn web() -> Self {
156        Self {
157            level: CompressionLevel::Medium,
158            remove_unused_media: true,
159            remove_properties: true,
160            remove_notes: false,
161            remove_comments: true,
162            optimize_xml: true,
163            target_size: 5 * 1024 * 1024, // 5MB target
164        }
165    }
166}
167
168/// Compression result
169#[derive(Debug)]
170pub struct CompressionResult {
171    /// Original file size in bytes
172    pub original_size: usize,
173    /// Compressed file size in bytes
174    pub compressed_size: usize,
175    /// Reduction percentage
176    pub reduction_percent: f64,
177    /// Number of unused media files removed
178    pub unused_media_removed: usize,
179    /// Number of images compressed
180    pub images_compressed: usize,
181    /// Whether target size was achieved
182    pub target_achieved: bool,
183}
184
185/// Compress a PPTX file
186///
187/// # Arguments
188/// * `input_path` - Path to input PPTX file
189/// * `output_path` - Path for compressed output
190/// * `options` - Compression options
191///
192/// # Returns
193/// Compression result with statistics
194///
195/// # Example
196/// ```no_run
197/// use ppt_rs::opc::compress::{compress_pptx, CompressionOptions, CompressionLevel};
198///
199/// let options = CompressionOptions::new()
200///     .with_level(CompressionLevel::Medium);
201///
202/// let result = compress_pptx("input.pptx", "output.pptx", &options).unwrap();
203/// println!("Reduced by {:.1}%", result.reduction_percent);
204/// ```
205pub fn compress_pptx<P: AsRef<Path>, Q: AsRef<Path>>(
206    input_path: P,
207    output_path: Q,
208    options: &CompressionOptions,
209) -> Result<CompressionResult> {
210    // Load package
211    let mut package = Package::open(input_path.as_ref())?;
212
213    let original_size = std::fs::metadata(input_path.as_ref())?.len() as usize;
214
215    let mut unused_media_removed = 0;
216    let images_compressed = 0;
217
218    // Remove unused media files
219    if options.remove_unused_media {
220        unused_media_removed = remove_unused_media(&mut package)?;
221    }
222
223    // Remove properties if requested
224    if options.remove_properties {
225        remove_document_properties(&mut package);
226    }
227
228    // Remove notes if requested
229    if options.remove_notes {
230        remove_notes_slides(&mut package)?;
231    }
232
233    // Optimize XML
234    if options.optimize_xml {
235        optimize_xml_content(&mut package)?;
236    }
237
238    // Save compressed package
239    package.save(output_path.as_ref())?;
240
241    let compressed_size = std::fs::metadata(output_path.as_ref())?.len() as usize;
242    let reduction_percent = if original_size > 0 {
243        ((original_size - compressed_size) as f64 / original_size as f64) * 100.0
244    } else {
245        0.0
246    };
247
248    let target_achieved = options.target_size == 0 || compressed_size <= options.target_size;
249
250    Ok(CompressionResult {
251        original_size,
252        compressed_size,
253        reduction_percent,
254        unused_media_removed,
255        images_compressed,
256        target_achieved,
257    })
258}
259
260/// Compress a PPTX in memory
261pub fn compress_pptx_in_memory(
262    data: &[u8],
263    options: &CompressionOptions,
264) -> Result<(Vec<u8>, CompressionResult)> {
265    // Write to temp file
266    let temp_dir = std::env::temp_dir();
267    let temp_input = temp_dir.join("compress_input.pptx");
268    let temp_output = temp_dir.join("compress_output.pptx");
269
270    std::fs::write(&temp_input, data)?;
271
272    let result = compress_pptx(&temp_input, &temp_output, options)?;
273    let output_data = std::fs::read(&temp_output)?;
274
275    // Cleanup
276    let _ = std::fs::remove_file(&temp_input);
277    let _ = std::fs::remove_file(&temp_output);
278
279    Ok((output_data, result))
280}
281
282/// Remove unused media files from package
283fn remove_unused_media(package: &mut Package) -> Result<usize> {
284    let media_paths: Vec<String> = package
285        .part_paths()
286        .iter()
287        .filter(|p| p.starts_with("ppt/media/"))
288        .map(|s| s.to_string())
289        .collect();
290
291    let mut referenced = HashSet::new();
292    let mut removed = 0;
293
294    // Find all media references in slide files
295    for path in package.part_paths() {
296        if path.starts_with("ppt/slides/slide") && path.ends_with(".xml") {
297            if let Some(content) = package.get_part_string(&path) {
298                // Look for media references like rId5, image1.png, etc.
299                for media_path in &media_paths {
300                    let filename = Path::new(media_path)
301                        .file_name()
302                        .and_then(|n| n.to_str())
303                        .unwrap_or("");
304                    if content.contains(filename) || content.contains(&media_path[4..]) {
305                        referenced.insert(media_path.clone());
306                    }
307                }
308            }
309        }
310    }
311
312    // Remove unreferenced media
313    for media_path in media_paths {
314        if !referenced.contains(&media_path) {
315            package.remove_part(&media_path);
316            removed += 1;
317        }
318    }
319
320    Ok(removed)
321}
322
323/// Remove document properties
324fn remove_document_properties(package: &mut Package) {
325    // Remove core properties
326    package.remove_part("docProps/core.xml");
327    // Remove app properties
328    package.remove_part("docProps/app.xml");
329    // Remove custom properties
330    package.remove_part("docProps/custom.xml");
331    // Remove thumbnail
332    package.remove_part("docProps/thumbnail.jpeg");
333}
334
335/// Remove notes slides
336fn remove_notes_slides(package: &mut Package) -> Result<()> {
337    let notes_paths: Vec<String> = package
338        .part_paths()
339        .iter()
340        .filter(|p| p.starts_with("ppt/notesSlides/"))
341        .map(|s| s.to_string())
342        .collect();
343
344    for path in notes_paths {
345        package.remove_part(&path);
346        // Also remove relationships
347        let rels_path = path.replace("notesSlides/", "notesSlides/_rels/") + ".rels";
348        package.remove_part(&rels_path);
349    }
350
351    Ok(())
352}
353
354/// Optimize XML content (minimize whitespace)
355fn optimize_xml_content(package: &mut Package) -> Result<()> {
356    let xml_paths: Vec<String> = package
357        .part_paths()
358        .iter()
359        .filter(|p| p.ends_with(".xml") || p.ends_with(".rels"))
360        .map(|s| s.to_string())
361        .collect();
362
363    for path in xml_paths {
364        if let Some(content) = package.get_part_string(&path) {
365            let optimized = minimize_xml(&content);
366            package.add_part(path, optimized.into_bytes());
367        }
368    }
369
370    Ok(())
371}
372
373/// Minimize XML by removing unnecessary whitespace
374fn minimize_xml(xml: &str) -> String {
375    let mut result = String::with_capacity(xml.len());
376    let mut in_tag = false;
377    let mut in_string = false;
378    let mut prev_char = ' ';
379
380    for ch in xml.chars() {
381        match ch {
382            '"' if !in_tag => {
383                in_string = !in_string;
384                result.push(ch);
385            }
386            '"' if in_tag => {
387                in_string = !in_string;
388                result.push(ch);
389            }
390            '<' if !in_string => {
391                in_tag = true;
392                // Remove whitespace before tag
393                if prev_char == ' ' || prev_char == '\n' || prev_char == '\t' {
394                    if !result.is_empty() {
395                        result.pop();
396                    }
397                }
398                result.push(ch);
399            }
400            '>' if !in_string => {
401                in_tag = false;
402                result.push(ch);
403            }
404            ' ' | '\n' | '\t' | '\r' if !in_tag && !in_string => {
405                // Skip whitespace between tags
406                if prev_char != ' ' {
407                    result.push(' ');
408                }
409            }
410            _ => {
411                result.push(ch);
412            }
413        }
414        prev_char = ch;
415    }
416
417    result
418}
419
420/// Analyze PPTX file and return size breakdown
421pub fn analyze_pptx<P: AsRef<Path>>(path: P) -> Result<PptxAnalysis> {
422    let package = Package::open(path.as_ref())?;
423    let total_size = std::fs::metadata(path.as_ref())?.len() as usize;
424
425    let mut images_size = 0;
426    let mut xml_size = 0;
427    let mut other_size = 0;
428
429    let mut image_count = 0;
430    let mut slide_count = 0;
431    let mut media_count = 0;
432
433    for part_path in package.part_paths() {
434        if let Some(data) = package.get_part(part_path) {
435            let size = data.len();
436
437            if part_path.starts_with("ppt/media/") {
438                if part_path.ends_with(".png")
439                    || part_path.ends_with(".jpg")
440                    || part_path.ends_with(".jpeg")
441                {
442                    images_size += size;
443                    image_count += 1;
444                } else {
445                    media_count += 1;
446                    other_size += size;
447                }
448            } else if part_path.ends_with(".xml") || part_path.ends_with(".rels") {
449                xml_size += size;
450                if part_path.starts_with("ppt/slides/slide") && part_path.ends_with(".xml") {
451                    slide_count += 1;
452                }
453            } else {
454                other_size += size;
455            }
456        }
457    }
458
459    Ok(PptxAnalysis {
460        total_size,
461        images_size,
462        xml_size,
463        other_size,
464        image_count,
465        slide_count,
466        media_count,
467    })
468}
469
470/// Analysis result for PPTX file
471#[derive(Debug)]
472pub struct PptxAnalysis {
473    /// Total file size in bytes
474    pub total_size: usize,
475    /// Size of image files
476    pub images_size: usize,
477    /// Size of XML files
478    pub xml_size: usize,
479    /// Size of other files
480    pub other_size: usize,
481    /// Number of images
482    pub image_count: usize,
483    /// Number of slides
484    pub slide_count: usize,
485    /// Number of other media files
486    pub media_count: usize,
487}
488
489impl PptxAnalysis {
490    /// Get human-readable summary
491    pub fn summary(&self) -> String {
492        format!(
493            "PPTX Analysis:\n\
494            - Total size: {}\n\
495            - Images: {} ({} MB)\n\
496            - Slides: {}\n\
497            - XML data: {}\n\
498            - Other media: {} files ({})",
499            format_bytes(self.total_size),
500            self.image_count,
501            format_bytes(self.images_size),
502            self.slide_count,
503            format_bytes(self.xml_size),
504            self.media_count,
505            format_bytes(self.other_size)
506        )
507    }
508
509    /// Get images as percentage of total
510    pub fn images_percentage(&self) -> f64 {
511        if self.total_size > 0 {
512            (self.images_size as f64 / self.total_size as f64) * 100.0
513        } else {
514            0.0
515        }
516    }
517}
518
519fn format_bytes(bytes: usize) -> String {
520    if bytes < 1024 {
521        format!("{} B", bytes)
522    } else if bytes < 1024 * 1024 {
523        format!("{:.1} KB", bytes as f64 / 1024.0)
524    } else if bytes < 1024 * 1024 * 1024 {
525        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
526    } else {
527        format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
528    }
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    #[test]
536    fn test_compression_level() {
537        assert_eq!(CompressionLevel::Light.image_quality(), 95);
538        assert_eq!(CompressionLevel::Medium.image_quality(), 85);
539        assert_eq!(CompressionLevel::Aggressive.image_quality(), 70);
540        assert_eq!(CompressionLevel::Custom(80).image_quality(), 80);
541    }
542
543    #[test]
544    fn test_compression_level_resize() {
545        assert!(!CompressionLevel::Light.should_resize_images());
546        assert!(!CompressionLevel::Medium.should_resize_images());
547        assert!(CompressionLevel::Aggressive.should_resize_images());
548        assert!(CompressionLevel::Custom(80).should_resize_images());
549    }
550
551    #[test]
552    fn test_compression_level_max_dimension() {
553        assert_eq!(CompressionLevel::Light.max_image_dimension(), 2048);
554        assert_eq!(CompressionLevel::Medium.max_image_dimension(), 1600);
555        assert_eq!(CompressionLevel::Aggressive.max_image_dimension(), 1280);
556    }
557
558    #[test]
559    fn test_compression_options_builder() {
560        let opts = CompressionOptions::new()
561            .with_level(CompressionLevel::Aggressive)
562            .with_unused_media_removal(true)
563            .with_properties_removal(true)
564            .with_notes_removal(true)
565            .with_comments_removal(false)
566            .with_xml_optimization(true)
567            .with_target_size(10 * 1024 * 1024);
568
569        assert_eq!(opts.level, CompressionLevel::Aggressive);
570        assert!(opts.remove_unused_media);
571        assert!(opts.remove_properties);
572        assert!(opts.remove_notes);
573        assert!(!opts.remove_comments);
574        assert!(opts.optimize_xml);
575        assert_eq!(opts.target_size, 10 * 1024 * 1024);
576    }
577
578    #[test]
579    fn test_maximum_preset() {
580        let opts = CompressionOptions::maximum();
581        assert!(matches!(opts.level, CompressionLevel::Aggressive));
582        assert!(opts.remove_properties);
583        assert!(opts.remove_notes);
584        assert!(opts.remove_unused_media);
585        assert!(opts.remove_comments);
586        assert!(opts.optimize_xml);
587    }
588
589    #[test]
590    fn test_web_preset() {
591        let opts = CompressionOptions::web();
592        assert!(matches!(opts.level, CompressionLevel::Medium));
593        assert_eq!(opts.target_size, 5 * 1024 * 1024);
594        assert!(opts.remove_unused_media);
595        assert!(opts.remove_properties);
596    }
597
598    #[test]
599    fn test_minimize_xml() {
600        let input = r#"<?xml version="1.0"?>
601<root>
602    <element attr="value" />
603</root>"#;
604
605        let minimized = minimize_xml(input);
606        assert!(!minimized.contains("\n"));
607        assert!(!minimized.contains("    "));
608        assert!(minimized.contains("<root>"));
609        assert!(minimized.contains("<element"));
610    }
611
612    #[test]
613    fn test_minimize_xml_preserves_content() {
614        let input = r#"<a>  text  </a>"#;
615        let minimized = minimize_xml(input);
616        // Whitespace inside tags should be preserved
617        assert!(minimized.contains("text"));
618    }
619
620    #[test]
621    fn test_format_bytes() {
622        assert_eq!(format_bytes(500), "500 B");
623        assert_eq!(format_bytes(1024), "1.0 KB");
624        assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
625        assert_eq!(format_bytes(1024 * 1024 * 1024), "1.00 GB");
626    }
627
628    #[test]
629    fn test_pptx_analysis_images_percentage() {
630        let analysis = PptxAnalysis {
631            total_size: 1000,
632            images_size: 500,
633            xml_size: 300,
634            other_size: 200,
635            image_count: 5,
636            slide_count: 10,
637            media_count: 2,
638        };
639
640        assert_eq!(analysis.images_percentage(), 50.0);
641    }
642
643    #[test]
644    fn test_pptx_analysis_images_percentage_zero() {
645        let analysis = PptxAnalysis {
646            total_size: 0,
647            images_size: 0,
648            xml_size: 0,
649            other_size: 0,
650            image_count: 0,
651            slide_count: 0,
652            media_count: 0,
653        };
654
655        assert_eq!(analysis.images_percentage(), 0.0);
656    }
657
658    #[test]
659    fn test_pptx_analysis_summary() {
660        let analysis = PptxAnalysis {
661            total_size: 1024 * 1024,
662            images_size: 512 * 1024,
663            xml_size: 256 * 1024,
664            other_size: 256 * 1024,
665            image_count: 3,
666            slide_count: 5,
667            media_count: 1,
668        };
669
670        let summary = analysis.summary();
671        assert!(summary.contains("PPTX Analysis"));
672        assert!(summary.contains("1.0 MB"));
673        assert!(summary.contains("3"));
674        assert!(summary.contains("5"));
675    }
676
677    #[test]
678    fn test_compression_result_fields() {
679        let result = CompressionResult {
680            original_size: 1000,
681            compressed_size: 800,
682            reduction_percent: 20.0,
683            unused_media_removed: 2,
684            images_compressed: 3,
685            target_achieved: true,
686        };
687
688        assert_eq!(result.original_size, 1000);
689        assert_eq!(result.compressed_size, 800);
690        assert_eq!(result.reduction_percent, 20.0);
691        assert_eq!(result.unused_media_removed, 2);
692        assert_eq!(result.images_compressed, 3);
693        assert!(result.target_achieved);
694    }
695}