Skip to main content

torsh_package/
optimization.rs

1//! Package optimization utilities
2//!
3//! This module provides tools for optimizing package size and performance,
4//! including resource deduplication, compression analysis, and optimization recommendations.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use torsh_core::error::Result;
9
10use crate::package::Package;
11use crate::resources::ResourceType;
12use crate::utils::format_file_size;
13
14/// Estimate compression ratio based on resource type
15fn estimate_compression_ratio_by_type(resource_type: &ResourceType) -> f64 {
16    match resource_type {
17        ResourceType::Model => 0.7, // Model weights often compress well (30% reduction)
18        ResourceType::Source => 0.5, // Source code compresses very well (50% reduction)
19        ResourceType::Data => 0.6,  // General data compresses moderately (40% reduction)
20        ResourceType::Config => 0.4, // Config files compress very well (60% reduction)
21        ResourceType::Documentation => 0.3, // Text compresses very well (70% reduction)
22        ResourceType::Text => 0.3,  // Text compresses very well (70% reduction)
23        ResourceType::Binary => 0.9, // Binary data may be pre-compressed (10% reduction)
24        ResourceType::License => 0.4, // License text compresses well (60% reduction)
25        ResourceType::Metadata => 0.4, // JSON/metadata compresses well (60% reduction)
26    }
27}
28
29/// Package optimization report
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct OptimizationReport {
32    /// Original package size in bytes
33    pub original_size: u64,
34    /// Estimated optimized size in bytes
35    pub optimized_size: u64,
36    /// Potential size savings in bytes
37    pub savings: u64,
38    /// Savings percentage
39    pub savings_percent: f64,
40    /// List of optimization opportunities
41    pub opportunities: Vec<OptimizationOpportunity>,
42    /// Resource deduplication analysis
43    pub deduplication: DeduplicationAnalysis,
44    /// Compression analysis
45    pub compression: CompressionAnalysis,
46}
47
48/// A single optimization opportunity
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct OptimizationOpportunity {
51    /// Type of optimization
52    pub optimization_type: OptimizationType,
53    /// Description of the opportunity
54    pub description: String,
55    /// Potential size savings in bytes
56    pub potential_savings: u64,
57    /// Priority level (1-5, 5 being highest)
58    pub priority: u8,
59    /// Resources affected
60    pub affected_resources: Vec<String>,
61}
62
63/// Types of optimizations
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
65pub enum OptimizationType {
66    /// Resource deduplication
67    Deduplication,
68    /// Better compression algorithm selection
69    CompressionUpgrade,
70    /// Remove unused resources
71    RemoveUnused,
72    /// Compress uncompressed resources
73    AddCompression,
74    /// Merge small resources
75    MergeSmall,
76    /// Split large resources
77    SplitLarge,
78}
79
80/// Resource deduplication analysis
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct DeduplicationAnalysis {
83    /// Total number of resources
84    pub total_resources: usize,
85    /// Number of unique resources (by hash)
86    pub unique_resources: usize,
87    /// Number of duplicate resources
88    pub duplicate_count: usize,
89    /// Duplicate groups (hash -> list of resource names)
90    pub duplicate_groups: HashMap<String, Vec<String>>,
91    /// Potential savings from deduplication in bytes
92    pub potential_savings: u64,
93}
94
95/// Compression analysis
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct CompressionAnalysis {
98    /// Resources that would benefit from compression
99    pub compressible_resources: Vec<CompressibleResource>,
100    /// Resources already well-compressed
101    pub well_compressed_count: usize,
102    /// Total potential compression savings in bytes
103    pub potential_savings: u64,
104}
105
106/// A resource that could benefit from compression
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct CompressibleResource {
109    /// Resource name
110    pub name: String,
111    /// Current size in bytes
112    pub current_size: u64,
113    /// Estimated compressed size in bytes
114    pub estimated_compressed_size: u64,
115    /// Potential savings in bytes
116    pub potential_savings: u64,
117    /// Estimated compression ratio
118    pub compression_ratio: f64,
119}
120
121/// Package optimizer
122pub struct PackageOptimizer {
123    /// Minimum size threshold for compression (bytes)
124    pub min_compression_size: u64,
125    /// Minimum compression ratio to recommend
126    pub min_compression_ratio: f64,
127    /// Enable resource deduplication
128    pub enable_deduplication: bool,
129}
130
131impl PackageOptimizer {
132    /// Create a new package optimizer with default settings
133    pub fn new() -> Self {
134        Self {
135            min_compression_size: 1024, // 1KB
136            min_compression_ratio: 0.7, // 30% savings
137            enable_deduplication: true,
138        }
139    }
140
141    /// Analyze package and generate optimization report
142    pub fn analyze(&self, package: &Package) -> Result<OptimizationReport> {
143        let original_size = self.calculate_package_size(package);
144
145        // Perform various analyses
146        let deduplication = if self.enable_deduplication {
147            self.analyze_deduplication(package)
148        } else {
149            DeduplicationAnalysis {
150                total_resources: 0,
151                unique_resources: 0,
152                duplicate_count: 0,
153                duplicate_groups: HashMap::new(),
154                potential_savings: 0,
155            }
156        };
157
158        let compression = self.analyze_compression(package)?;
159
160        // Generate optimization opportunities
161        let mut opportunities = Vec::new();
162
163        // Add deduplication opportunities
164        if deduplication.duplicate_count > 0 {
165            opportunities.push(OptimizationOpportunity {
166                optimization_type: OptimizationType::Deduplication,
167                description: format!(
168                    "Found {} duplicate resources that could be deduplicated",
169                    deduplication.duplicate_count
170                ),
171                potential_savings: deduplication.potential_savings,
172                priority: 5,
173                affected_resources: deduplication
174                    .duplicate_groups
175                    .values()
176                    .flatten()
177                    .cloned()
178                    .collect(),
179            });
180        }
181
182        // Add compression opportunities
183        for resource in &compression.compressible_resources {
184            if resource.potential_savings > self.min_compression_size {
185                opportunities.push(OptimizationOpportunity {
186                    optimization_type: OptimizationType::AddCompression,
187                    description: format!(
188                        "Resource '{}' could be compressed to save {}",
189                        resource.name,
190                        format_file_size(resource.potential_savings)
191                    ),
192                    potential_savings: resource.potential_savings,
193                    priority: if resource.compression_ratio < 0.5 {
194                        4
195                    } else {
196                        3
197                    },
198                    affected_resources: vec![resource.name.clone()],
199                });
200            }
201        }
202
203        // Calculate total potential savings
204        let total_savings = deduplication.potential_savings + compression.potential_savings;
205        let optimized_size = original_size.saturating_sub(total_savings);
206        let savings_percent = if original_size > 0 {
207            (total_savings as f64 / original_size as f64) * 100.0
208        } else {
209            0.0
210        };
211
212        // Sort opportunities by priority
213        opportunities.sort_by(|a, b| b.priority.cmp(&a.priority));
214
215        Ok(OptimizationReport {
216            original_size,
217            optimized_size,
218            savings: total_savings,
219            savings_percent,
220            opportunities,
221            deduplication,
222            compression,
223        })
224    }
225
226    /// Calculate total package size
227    fn calculate_package_size(&self, package: &Package) -> u64 {
228        package.resources().values().map(|r| r.size() as u64).sum()
229    }
230
231    /// Analyze resource deduplication opportunities
232    fn analyze_deduplication(&self, package: &Package) -> DeduplicationAnalysis {
233        let mut hash_to_resources: HashMap<String, Vec<String>> = HashMap::new();
234        let mut hash_to_size: HashMap<String, u64> = HashMap::new();
235
236        // Group resources by hash
237        for (name, resource) in package.resources() {
238            let hash = resource.sha256();
239            let size = resource.size() as u64;
240
241            hash_to_resources
242                .entry(hash.clone())
243                .or_insert_with(Vec::new)
244                .push(name.clone());
245
246            hash_to_size.insert(hash, size);
247        }
248
249        // Find duplicate groups
250        let duplicate_groups: HashMap<String, Vec<String>> = hash_to_resources
251            .iter()
252            .filter(|(_, resources)| resources.len() > 1)
253            .map(|(hash, resources)| (hash.clone(), resources.clone()))
254            .collect();
255
256        let duplicate_count: usize = duplicate_groups
257            .values()
258            .map(|v| v.len() - 1) // -1 because we keep one copy
259            .sum();
260
261        // Calculate potential savings
262        let duplicate_savings: u64 = duplicate_groups
263            .iter()
264            .map(|(hash, resources)| {
265                let size = hash_to_size.get(hash).copied().unwrap_or(0);
266                size * (resources.len() as u64 - 1) // Save size for each duplicate copy
267            })
268            .sum();
269
270        let total_resources = package.resources().len();
271        let unique_resources = hash_to_resources.len();
272
273        DeduplicationAnalysis {
274            total_resources,
275            unique_resources,
276            duplicate_count,
277            duplicate_groups,
278            potential_savings: duplicate_savings,
279        }
280    }
281
282    /// Analyze compression opportunities
283    fn analyze_compression(&self, package: &Package) -> Result<CompressionAnalysis> {
284        let mut compressible_resources = Vec::new();
285        let mut well_compressed_count = 0;
286        let mut total_savings = 0u64;
287
288        // Analyze each resource
289        for (name, resource) in package.resources() {
290            let size = resource.size() as u64;
291
292            // Skip resources smaller than minimum size
293            if size < self.min_compression_size {
294                continue;
295            }
296
297            // Skip resources that are already compressed
298            if resource.is_compressed() {
299                well_compressed_count += 1;
300                continue;
301            }
302
303            // Estimate compression ratio based on resource type
304            let compression_ratio = estimate_compression_ratio_by_type(&resource.resource_type);
305
306            if compression_ratio <= self.min_compression_ratio {
307                let estimated_compressed = (size as f64 * compression_ratio) as u64;
308                let savings = size.saturating_sub(estimated_compressed);
309
310                compressible_resources.push(CompressibleResource {
311                    name: name.clone(),
312                    current_size: size,
313                    estimated_compressed_size: estimated_compressed,
314                    potential_savings: savings,
315                    compression_ratio,
316                });
317
318                total_savings += savings;
319            } else {
320                well_compressed_count += 1;
321            }
322        }
323
324        Ok(CompressionAnalysis {
325            compressible_resources,
326            well_compressed_count,
327            potential_savings: total_savings,
328        })
329    }
330
331    /// Apply optimizations to a package
332    pub fn optimize(&self, package: &mut Package) -> Result<OptimizationReport> {
333        let report = self.analyze(package)?;
334
335        // Apply deduplication
336        if self.enable_deduplication && !report.deduplication.duplicate_groups.is_empty() {
337            self.apply_deduplication(package, &report.deduplication)?;
338        }
339
340        // Apply compression improvements
341        // (Would need to implement compression application logic)
342
343        Ok(report)
344    }
345
346    /// Apply deduplication optimizations
347    fn apply_deduplication(
348        &self,
349        package: &mut Package,
350        analysis: &DeduplicationAnalysis,
351    ) -> Result<()> {
352        // For each group of duplicates, keep the first one and remove others
353        for (_hash, resource_names) in &analysis.duplicate_groups {
354            if resource_names.len() > 1 {
355                // Keep the first resource, remove the rest
356                for name in &resource_names[1..] {
357                    package.resources_mut().remove(name);
358                }
359            }
360        }
361
362        Ok(())
363    }
364}
365
366impl Default for PackageOptimizer {
367    fn default() -> Self {
368        Self::new()
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    #[test]
377    fn test_optimizer_creation() {
378        let optimizer = PackageOptimizer::new();
379        assert_eq!(optimizer.min_compression_size, 1024);
380        assert!(optimizer.enable_deduplication);
381    }
382
383    #[test]
384    fn test_optimization_type() {
385        let opt_type = OptimizationType::Deduplication;
386        assert_eq!(opt_type, OptimizationType::Deduplication);
387    }
388
389    #[test]
390    fn test_default_optimizer() {
391        let optimizer = PackageOptimizer::default();
392        assert_eq!(optimizer.min_compression_size, 1024);
393    }
394
395    #[test]
396    fn test_deduplication_analysis() {
397        let analysis = DeduplicationAnalysis {
398            total_resources: 10,
399            unique_resources: 7,
400            duplicate_count: 3,
401            duplicate_groups: HashMap::new(),
402            potential_savings: 1024,
403        };
404
405        assert_eq!(analysis.total_resources, 10);
406        assert_eq!(analysis.duplicate_count, 3);
407        assert_eq!(analysis.potential_savings, 1024);
408    }
409
410    #[test]
411    fn test_compressible_resource() {
412        let resource = CompressibleResource {
413            name: "test.txt".to_string(),
414            current_size: 10000,
415            estimated_compressed_size: 3000,
416            potential_savings: 7000,
417            compression_ratio: 0.3,
418        };
419
420        assert_eq!(resource.current_size, 10000);
421        assert_eq!(resource.potential_savings, 7000);
422        assert!(resource.compression_ratio < 0.5);
423    }
424}